From 4b35c00f0dc64a648f59d0829b5ccaef1b3d5199 Mon Sep 17 00:00:00 2001 From: Arne Morten Kvarving Date: Tue, 9 Apr 2019 17:37:45 +0200 Subject: [PATCH] added: support for partitioning based parallel computations automatically partition mesh and distribute in parallel. this is rather suboptimal in many ways, but still useful. --- src/ASM/ASMbase.h | 3 + src/ASM/ASMs2D.C | 15 ++ src/ASM/ASMs2D.h | 3 + src/ASM/ASMs2Dmx.C | 20 +- src/ASM/ASMs3D.C | 18 ++ src/ASM/ASMs3D.h | 3 + src/ASM/ASMs3Dmx.C | 15 +- src/ASM/DomainDecomposition.C | 279 +++++++++++++++++++++++-- src/ASM/DomainDecomposition.h | 14 ++ src/ASM/LR/ASMu2D.C | 17 ++ src/ASM/LR/ASMu2D.h | 3 + src/ASM/LR/ASMu2Dmx.C | 23 +- src/ASM/LR/ASMu3D.C | 14 ++ src/ASM/LR/ASMu3D.h | 4 + src/ASM/LR/ASMu3Dmx.C | 383 +++++++++++++++++----------------- src/ASM/SAMpatchPETSc.C | 29 ++- src/IFEM.C | 2 +- src/LinAlg/PETScMatrix.C | 53 +++-- src/SIM/SIMbase.C | 24 ++- src/SIM/SIMdummy.h | 4 +- src/SIM/SIMoutput.C | 62 +++++- src/Utility/HDF5Writer.C | 2 + 22 files changed, 733 insertions(+), 257 deletions(-) diff --git a/src/ASM/ASMbase.h b/src/ASM/ASMbase.h index f632c526..9f02d5bf 100644 --- a/src/ASM/ASMbase.h +++ b/src/ASM/ASMbase.h @@ -425,6 +425,8 @@ public: virtual void generateThreadGroups(const Integrand&, bool, bool) {} //! \brief Generates element groups for multi-threading of boundary integrals. virtual void generateThreadGroups(char, bool, bool) {} + //! \brief Generate element-groups for multi-threading based on a partition. + virtual void generateThreadGroupsFromElms(const std::vector&) {} // Methods for integration of finite element quantities. @@ -839,6 +841,7 @@ protected: IntVec myMLGE; //!< The actual Matrix of Local to Global Element numbers IntVec myMLGN; //!< The actual Matrix of Local to Global Node numbers IntMat myMNPC; //!< The actual Matrix of Nodal Point Correspondance + IntVec myElms; //!< Elements on patch - used with partitioning //! \brief Numerical integration scheme for this patch. //! \details A value in the range [1,10] means use that number of Gauss diff --git a/src/ASM/ASMs2D.C b/src/ASM/ASMs2D.C index 3f333d14..8d054698 100644 --- a/src/ASM/ASMs2D.C +++ b/src/ASM/ASMs2D.C @@ -2318,6 +2318,10 @@ bool ASMs2D::integrate (Integrand& integrand, int lIndex, fe.iel = abs(MLGE[doXelms+iel-1]); if (fe.iel < 1) continue; // zero-area element + if (!myElms.empty() && !glInt.threadSafe() && + std::find(myElms.begin(), myElms.end(), iel-1) == myElms.end()) + continue; + #ifdef SP_DEBUG if (dbgElm < 0 && iel != -dbgElm) continue; // Skipping all elements, except for -dbgElm @@ -3119,3 +3123,14 @@ void ASMs2D::getBoundaryElms (int lIndex, int, IntVec& elms) const elms.push_back(MLGE[i + (lIndex-3)*N1m*(N2m-1)] - 1); } } + + +void ASMs2D::generateThreadGroupsFromElms(const std::vector& elms) +{ + myElms.clear(); + for (int elm : elms) + if (this->getElmIndex(elm+1) > 0) + myElms.push_back(this->getElmIndex(elm+1) - 1); + + threadGroups = threadGroups.filter(myElms); +} diff --git a/src/ASM/ASMs2D.h b/src/ASM/ASMs2D.h index 96f96b1c..410a0701 100644 --- a/src/ASM/ASMs2D.h +++ b/src/ASM/ASMs2D.h @@ -588,6 +588,9 @@ protected: void generateThreadGroups(size_t strip1, size_t strip2, bool silence, bool ignoreGlobalLM); + //! \brief Generate element groups from a partition. + virtual void generateThreadGroupsFromElms(const std::vector& elms); + public: //! \brief Auxilliary function for computation of basis function indices. static void scatterInd(int n1, int n2, int p1, int p2, diff --git a/src/ASM/ASMs2Dmx.C b/src/ASM/ASMs2Dmx.C index 4085921e..61ad7f36 100644 --- a/src/ASM/ASMs2Dmx.C +++ b/src/ASM/ASMs2Dmx.C @@ -528,13 +528,17 @@ bool ASMs2Dmx::integrate (Integrand& integrand, const int n1 = surf->numCoefs_u(); const int nel1 = n1 - p1 + 1; + ThreadGroups oneGroup; + if (glInt.threadSafe()) oneGroup.oneStripe(nel); + const ThreadGroups& groups = glInt.threadSafe() ? oneGroup : threadGroups; + // === Assembly loop over all elements in the patch ========================== bool ok=true; - for (size_t g=0;g dNxdu(m_basis.size()); std::vector d2Nxdu2(m_basis.size()); @@ -543,9 +547,9 @@ bool ASMs2Dmx::integrate (Integrand& integrand, Matrix Xnod, Jac; double param[3] = { 0.0, 0.0, 0.0 }; Vec4 X(param); - for (size_t i = 0; i < threadGroups[g][t].size() && ok; ++i) + for (size_t i = 0; i < groups[g][t].size() && ok; ++i) { - int iel = threadGroups[g][t][i]; + int iel = groups[g][t][i]; fe.iel = MLGE[iel]; if (fe.iel < 1) continue; // zero-area element @@ -737,6 +741,10 @@ bool ASMs2Dmx::integrate (Integrand& integrand, int lIndex, fe.iel = MLGE[iel-1]; if (fe.iel < 1) continue; // zero-area element + if (!myElms.empty() && !glInt.threadSafe() && + std::find(myElms.begin(), myElms.end(), iel-1) == myElms.end()) + continue; + // Skip elements that are not on current boundary edge bool skipMe = false; switch (edgeDir) @@ -866,6 +874,10 @@ bool ASMs2Dmx::integrate (Integrand& integrand, fe.iel = abs(MLGE[iel]); if (fe.iel < 1) continue; // zero-area element + if (!myElms.empty() && !glInt.threadSafe() && + std::find(myElms.begin(), myElms.end(), iel-1) == myElms.end()) + continue; + short int status = iChk.hasContribution(iel,i1,i2); if (!status) continue; // no interface contributions for this element diff --git a/src/ASM/ASMs3D.C b/src/ASM/ASMs3D.C index 449f2a59..4475553c 100644 --- a/src/ASM/ASMs3D.C +++ b/src/ASM/ASMs3D.C @@ -2706,6 +2706,10 @@ bool ASMs3D::integrateEdge (Integrand& integrand, int lEdge, fe.iel = MLGE[iel-1]; if (fe.iel < 1) continue; // zero-volume element + if (!myElms.empty() && + std::find(myElms.begin(), myElms.end(), fe.iel-1) == myElms.end()) + continue; + // Skip elements that are not on current boundary edge bool skipMe = false; switch (lEdge) @@ -3533,3 +3537,17 @@ void ASMs3D::getBoundaryElms (int lIndex, int, IntVec& elms) const elms.push_back(MLGE[i + j*N1m + (lIndex-5)*(N1m*N2m*(N3m-1))] - 1); } } + + +void ASMs3D::generateThreadGroupsFromElms(const std::vector& elms) +{ + myElms.clear(); + for (int elm : elms) + if (this->getElmIndex(elm+1) > 0) + myElms.push_back(this->getElmIndex(elm+1)-1); + + threadGroupsVol = threadGroupsVol.filter(myElms); + + for (auto& group : threadGroupsFace) + group.second = group.second.filter(myElms); +} diff --git a/src/ASM/ASMs3D.h b/src/ASM/ASMs3D.h index 75312eba..9d2ed726 100644 --- a/src/ASM/ASMs3D.h +++ b/src/ASM/ASMs3D.h @@ -653,6 +653,9 @@ protected: //! \param[in] silence If \e true, suppress threading group outprint virtual void generateThreadGroups(char lIndex, bool silence, bool); + //! \brief Generate element groups from a partition. + virtual void generateThreadGroupsFromElms(const std::vector& elms); + public: //! \brief Auxilliary function for computation of basis function indices. static void scatterInd(int n1, int n2, int n3, int p1, int p2, int p3, diff --git a/src/ASM/ASMs3Dmx.C b/src/ASM/ASMs3Dmx.C index b897ebcb..5f5b2587 100644 --- a/src/ASM/ASMs3Dmx.C +++ b/src/ASM/ASMs3Dmx.C @@ -483,12 +483,17 @@ bool ASMs3Dmx::integrate (Integrand& integrand, const int nel1 = n1 - p1 + 1; const int nel2 = n2 - p2 + 1; + ThreadGroups oneGroup; + if (glInt.threadSafe()) oneGroup.oneStripe(nel); + const ThreadGroups& groups = glInt.threadSafe() ? oneGroup : threadGroupsVol; + + // === Assembly loop over all elements in the patch ========================== - bool ok=true; - for (size_t g=0;g dNxdu(m_basis.size()); std::vector d2Nxdu2(m_basis.size()); @@ -497,9 +502,9 @@ bool ASMs3Dmx::integrate (Integrand& integrand, Matrix Xnod, Jac; double param[3]; Vec4 X(param); - for (size_t l = 0; l < threadGroupsVol[g][t].size() && ok; ++l) + for (size_t l = 0; l < groups[g][t].size() && ok; ++l) { - int iel = threadGroupsVol[g][t][l]; + int iel = groups[g][t][l]; fe.iel = MLGE[iel]; if (fe.iel < 1) continue; // zero-volume element diff --git a/src/ASM/DomainDecomposition.C b/src/ASM/DomainDecomposition.C index c01d5845..19b8b947 100644 --- a/src/ASM/DomainDecomposition.C +++ b/src/ASM/DomainDecomposition.C @@ -18,6 +18,7 @@ #include "ASMunstruct.h" #include "LinSolParams.h" #include "ProcessAdm.h" +#include "Profiler.h" #include "SAMpatch.h" #include "SIMbase.h" #include "Utilities.h" @@ -26,6 +27,14 @@ #include #include +#ifdef HAS_ZOLTAN +#define OLD_MPI HAVE_MPI +#undef HAVE_MPI +#include +#undef HAVE_MPI +#define HAVE_MPI OLD_MPI +#endif + DomainDecomposition:: OrientIterator::OrientIterator(const ASMbase* pch, @@ -170,6 +179,95 @@ OrientIterator::OrientIterator(const ASMbase* pch, int orient, int lIdx) } +#ifdef HAS_ZOLTAN +static int getNumElements(void* mesh, int* err) +{ + *err = ZOLTAN_OK; + IntMat& neigh = *static_cast(mesh); + return neigh.size(); +} + + +static void getElementList(void* mesh, int numGlobalEntries, + int, ZOLTAN_ID_PTR gids, + ZOLTAN_ID_PTR lids, int, + float*, int* err) +{ + IntMat& neigh = *static_cast(mesh); + std::iota(gids, gids+neigh.size(), 0); + std::iota(lids, lids+neigh.size(), 0); + *err = ZOLTAN_OK; +} + + +static void getNumEdges(void* mesh, int sizeGID, int sizeLID, + int numCells, ZOLTAN_ID_PTR globalID, + ZOLTAN_ID_PTR localID, int* numEdges, int* err) +{ + IntMat& neigh = *static_cast(mesh); + int* ne = numEdges; + for (const std::vector& n : neigh) + *ne++ = std::accumulate(n.begin(), n.end(), 0, + [](const int& a, const int& b) + { + return b != -1 ? a + 1 : a; + }); + + *err = ZOLTAN_OK; +} + + +static void getEdges(void* mesh, int sizeGID, int sizeLID, int numCells, + ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID, + int* numEdges, ZOLTAN_ID_PTR nborGID, int* nborProc, + int wgtDim, float* egts, int* err) +{ + IntMat& neigh = *static_cast(mesh); + + memset(nborProc, 0, numCells*sizeof(int)); + for (const std::vector& elm : neigh) + for (int n : elm) + if (n != -1) + *nborGID++ = n; + + *err = ZOLTAN_OK; +} + + +static int getNullElements(void*, int* err) +{ + *err = ZOLTAN_OK; + return 0; +} + + +static void getNullList(void*, int, + int, ZOLTAN_ID_PTR, + ZOLTAN_ID_PTR, int, + float*, int* err) +{ + *err = ZOLTAN_OK; +} + + +static void getNumNullEdges(void*, int, int, int, + ZOLTAN_ID_PTR, ZOLTAN_ID_PTR, + int*, int* err) +{ + *err = ZOLTAN_OK; +} + + +static void getNullEdges(void*, int, int, int, + ZOLTAN_ID_PTR, ZOLTAN_ID_PTR, + int*, ZOLTAN_ID_PTR, int*, + int, float*, int* err) +{ + *err = ZOLTAN_OK; +} +#endif + + std::vector> DomainDecomposition::getSubdomains(int nx, int ny, int nz, int overlap, size_t block) const { @@ -811,6 +909,47 @@ bool DomainDecomposition::calcGlobalEqNumbers(const ProcessAdm& adm, } +bool DomainDecomposition::calcGlobalEqNumbersPart(const ProcessAdm& adm, + const SIMbase& sim) +{ +#ifdef HAVE_MPI + blocks[0].MLGEQ.clear(); + blocks[0].MLGEQ.resize(sim.getSAM()->getNoEquations(), -1); + + blocks[0].minEq = 1; + blocks[0].maxEq = 0; + + if (adm.getProcId() != 0) { + adm.receive(blocks[0].MLGEQ, adm.getProcId()-1); + adm.receive(blocks[0].minEq, adm.getProcId()-1); + blocks[0].maxEq = blocks[0].minEq; + blocks[0].minEq++; + } + + for (size_t i = 0; i < myElms.size(); ++i) { + IntVec meen; + sim.getSAM()->getElmEqns(meen, myElms[i]+1); + for (int eq : meen) + if (eq > 0 && blocks[0].MLGEQ[eq-1] == -1) + blocks[0].MLGEQ[eq-1] = ++blocks[0].maxEq; + } + + if (adm.getProcId() < adm.getNoProcs()-1) { + adm.send(blocks[0].MLGEQ, adm.getProcId()+1); + adm.send(blocks[0].maxEq, adm.getProcId()+1); + } + + MPI_Bcast(&blocks[0].MLGEQ[0], blocks[0].MLGEQ.size(), MPI_INT, + adm.getNoProcs()-1, *adm.getCommunicator()); + + for (size_t i = 0; i < blocks[0].MLGEQ.size(); ++i) + blocks[0].G2LEQ[blocks[0].MLGEQ[i]] = i+1; +#endif + + return true; +} + + int DomainDecomposition::getGlobalEq(int lEq, size_t idx) const { if (lEq < 1 || idx >= blocks.size()) @@ -933,37 +1072,44 @@ bool DomainDecomposition::setup(const ProcessAdm& adm, const SIMbase& sim) #endif #endif + if (ghostConnections.empty() && adm.getNoProcs() > 1 && myElms.empty()) + if (!this->graphPartition(adm, sim)) + return false; + sam = dynamic_cast(sim.getSAM()); int ok = 1; - - // Establish global node numbers - if (!calcGlobalNodeNumbers(adm, sim)) - ok = 0; - - // sanity check the established domain decomposition - if (getMinNode() > getMaxNode()) { - std::cerr << "**DomainDecomposition::setup ** Process " - << adm.getProcId() << " owns no nodes." << std::endl; - ok = 0; - } - #ifdef HAVE_MPI int lok = ok; - MPI_Allreduce(&lok, &ok, 1, MPI_INT, MPI_SUM, *adm.getCommunicator()); #endif + if (myElms.empty()) { + // Establish global node numbers + if (!calcGlobalNodeNumbers(adm, sim)) + ok = 0; - if (ok < adm.getNoProcs()) - return false; - - // sanity check all corners of the patches - if (!sanityCheckCorners(sim)) - return false; + // sanity check the established domain decomposition + if (getMinNode() > getMaxNode()) { + std::cerr << "**DomainDecomposition::setup ** Process " + << adm.getProcId() << " owns no nodes." << std::endl; + ok = 0; + } #ifdef HAVE_MPI - ok = 1; + MPI_Allreduce(&lok, &ok, 1, MPI_INT, MPI_SUM, *adm.getCommunicator()); #endif + if (ok < adm.getNoProcs()) + return false; + + // sanity check all corners of the patches + if (!sanityCheckCorners(sim)) + return false; + +#ifdef HAVE_MPI + ok = 1; +#endif + } + // Establish local equation mappings for each block. if (sim.getSolParams() && sim.getSolParams()->getNoBlocks() > 1) { IFEM::cout << "\tEstablishing local block equation numbers" << std::endl; @@ -1011,7 +1157,10 @@ bool DomainDecomposition::setup(const ProcessAdm& adm, const SIMbase& sim) } // Establish global equation numbers for all blocks. - if (!calcGlobalEqNumbers(adm, sim)) + if (!myElms.empty()) { + if (!calcGlobalEqNumbersPart(adm, sim)) + return false; + } else if (!calcGlobalEqNumbers(adm, sim)) #ifdef HAVE_MPI ok = 0; #else @@ -1072,3 +1221,91 @@ int DomainDecomposition::getPatchOwner(size_t p) const return it->second; } + + +bool DomainDecomposition::graphPartition(const ProcessAdm& adm, const SIMbase& sim) +{ + PROFILE("Mesh partition"); + myElms.clear(); +#ifdef HAS_ZOLTAN + static bool inited = false; + if (!inited) + { + float ver; + Zoltan_Initialize(0, nullptr, &ver); + inited = true; + } + struct Zoltan_Struct* zz = Zoltan_Create(*adm.getCommunicator()); + IntMat neigh; + if (adm.getProcId() == 0) { + neigh = sim.getElmConnectivities(); + Zoltan_Set_Num_Obj_Fn(zz, getNumElements, &neigh); + Zoltan_Set_Obj_List_Fn(zz, getElementList, &neigh); + Zoltan_Set_Num_Edges_Multi_Fn(zz, getNumEdges, &neigh); + Zoltan_Set_Edge_List_Multi_Fn(zz, getEdges, &neigh); + } else { + Zoltan_Set_Num_Obj_Fn(zz, getNullElements, nullptr); + Zoltan_Set_Obj_List_Fn(zz, getNullList, nullptr); + Zoltan_Set_Num_Edges_Multi_Fn(zz, getNumNullEdges, nullptr); + Zoltan_Set_Edge_List_Multi_Fn(zz, getNullEdges, nullptr); + } + + Zoltan_Set_Param(zz, "DEBUG_LEVEL", "0"); + Zoltan_Set_Param(zz, "LB_METHOD", "GRAPH"); + Zoltan_Set_Param(zz, "GRAPH_PACKAGE", "Scotch"); + Zoltan_Set_Param(zz, "LB_APPROACH", "PARTITION"); + Zoltan_Set_Param(zz, "NUM_GID_ENTRIES", "1"); + Zoltan_Set_Param(zz, "NUM_LID_ENTRIES", "1"); + Zoltan_Set_Param(zz, "RETURN_LISTS", "ALL"); + Zoltan_Set_Param(zz, "CHECK_GRAPH", "2"); + Zoltan_Set_Param(zz,"EDGE_WEIGHT_DIM","0"); + Zoltan_Set_Param(zz, "OBJ_WEIGHT_DIM", "0"); + Zoltan_Set_Param(zz, "PHG_EDGE_SIZE_THRESHOLD", ".35"); /* 0-remove all, 1-remove none */ + int changes, numGidEntries, numLidEntries, numImport, numExport; + ZOLTAN_ID_PTR importGlobalGids, importLocalGids, exportGlobalGids, exportLocalGids; + int* importProcs, *importToPart, *exportProcs, *exportToPart; + Zoltan_LB_Partition(zz, /* input (all remaining fields are output) */ + &changes, /* 1 if partitioning was changed, 0 otherwise */ + &numGidEntries, /* Number of integers used for a global ID */ + &numLidEntries, /* Number of integers used for a local ID */ + &numImport, /* Number of vertices to be sent to me */ + &importGlobalGids, /* Global IDs of vertices to be sent to me */ + &importLocalGids, /* Local IDs of vertices to be sent to me */ + &importProcs, /* Process rank for source of each incoming vertex */ + &importToPart, /* New partition for each incoming vertex */ + &numExport, /* Number of vertices I must send to other processes*/ + &exportGlobalGids, /* Global IDs of the vertices I must send */ + &exportLocalGids, /* Local IDs of the vertices I must send */ + &exportProcs, /* Process to which I send each of the vertices */ + &exportToPart); /* Partition to which each vertex will belong */ + + if (sim.getProcessAdm().getProcId() == 0) { + std::vector> toExp(adm.getNoProcs()); + std::vector offProc(sim.getNoElms(), false); + for (int i = 0; i < numExport; ++i) { + int gid = exportGlobalGids[i]; + offProc[gid] = true; + } + myElms.reserve(numExport); + for (size_t i = 0; i < sim.getNoElms(); ++i) + if (!offProc[i]) + myElms.push_back(i); + } + else { + myElms.resize(numImport); + std::copy(importGlobalGids, importGlobalGids+numImport, myElms.begin()); + } + Zoltan_LB_Free_Part(&importGlobalGids, &importLocalGids, &importProcs, &importToPart); + Zoltan_LB_Free_Part(&exportGlobalGids, &exportLocalGids, &exportProcs, &exportToPart); + Zoltan_Destroy(&zz); +#else + std::cerr << "*** DomainDecompositon::graphPartition: Compiled without Zoltan support. No partitioning available." << std::endl; + return false; +#endif + + if (!myElms.empty()) + IFEM::cout << "Graph partitioning: " << myElms.size() << " elements in partition." << std::endl; + + partitioned = !myElms.empty(); + return partitioned; +} diff --git a/src/ASM/DomainDecomposition.h b/src/ASM/DomainDecomposition.h index d28c77cd..f0442772 100644 --- a/src/ASM/DomainDecomposition.h +++ b/src/ASM/DomainDecomposition.h @@ -174,6 +174,12 @@ public: //! \brief Returns number of matrix blocks. size_t getNoBlocks() const { return blocks.size()-1; } + //! \brief Returns whether a graph based partition is used. + bool isPartitioned() const { return partitioned; } + + //! \brief Returns elements in partition. + const std::vector& getElms() const { return myElms; } + private: //! \brief Calculates a 1D partitioning with a given overlap. //! \param[in] nel1 Number of knot-spans in first parameter direction. @@ -237,11 +243,17 @@ private: //! \brief Calculate the global equation numbers for given finite element model. bool calcGlobalEqNumbers(const ProcessAdm& adm, const SIMbase& sim); + //! \brief Calculate the global equation numbers for a partitioned model. + bool calcGlobalEqNumbersPart(const ProcessAdm& adm, const SIMbase& sim); + //! \brief Sanity check model. //! \details Collects the corners of all patches in the model and make sure nodes //! with matching coordinates have the same global node ID. bool sanityCheckCorners(const SIMbase& sim); + //! \brief Setup domain decomposition based on graph partitioning. + bool graphPartition(const ProcessAdm& adm, const SIMbase& sim); + std::map patchOwner; //!< Process that owns a particular patch //! \brief Struct with information per matrix block. @@ -258,12 +270,14 @@ private: std::vector MLGN; //!< Process-local-to-global node numbers std::vector blocks; //!< Equation mappings for all matrix blocks. + std::vector myElms; //!< Elements in partition int minDof = 0; //!< First DOF we own int maxDof = 0; //!< Last DOF we own int minNode = 0; //!< First node we own int maxNode = 0; //!< Last node we own const SAMpatch* sam = nullptr; //!< The assembly handler the DD is constructed for. + bool partitioned = false; //!< \e true if graph based decomposition }; #endif diff --git a/src/ASM/LR/ASMu2D.C b/src/ASM/LR/ASMu2D.C index 9c99a0e0..bbaa29be 100644 --- a/src/ASM/LR/ASMu2D.C +++ b/src/ASM/LR/ASMu2D.C @@ -1574,6 +1574,12 @@ bool ASMu2D::integrate (Integrand& integrand, int lIndex, int iel = 0; for (const LR::Element* el : lrspline->getAllElements()) { + if (!myElms.empty() && !glInt.threadSafe() && + std::find(myElms.begin(), myElms.end(), iel) == myElms.end()) { + ++iel; + continue; + } + fe.iel = MLGE[iel++]; #ifdef SP_DEBUG if (dbgElm < 0 && iel != -dbgElm) @@ -2705,3 +2711,14 @@ void ASMu2D::getBoundaryElms (int lIndex, int orient, IntVec& elms) const for (const LR::Element* elem : elements) elms.push_back(MLGE[elem->getId()]-1); } + + +void ASMu2D::generateThreadGroupsFromElms(const std::vector& elms) +{ + myElms.clear(); + for (int elm : elms) + if (this->getElmIndex(elm+1) > 0) + myElms.push_back(this->getElmIndex(elm+1)-1); + + threadGroups = threadGroups.filter(myElms); +} diff --git a/src/ASM/LR/ASMu2D.h b/src/ASM/LR/ASMu2D.h index 2c0c0ec6..52d8d771 100644 --- a/src/ASM/LR/ASMu2D.h +++ b/src/ASM/LR/ASMu2D.h @@ -591,6 +591,9 @@ protected: void generateThreadGroups(const Integrand& integrand, bool silence, bool ignoreGlobalLM); + //! \brief Generate element groups from a partition. + virtual void generateThreadGroupsFromElms(const std::vector& elms); + //! \brief Remap element wise errors to basis functions. //! \param errors The remapped errors //! \param[in] origErr The element wise errors on the geometry mesh diff --git a/src/ASM/LR/ASMu2Dmx.C b/src/ASM/LR/ASMu2Dmx.C index 21c2e268..097a6f9e 100644 --- a/src/ASM/LR/ASMu2Dmx.C +++ b/src/ASM/LR/ASMu2Dmx.C @@ -301,16 +301,21 @@ bool ASMu2Dmx::integrate (Integrand& integrand, if (!xg || !wg) return false; bool use2ndDer = integrand.getIntegrandType() & Integrand::SECOND_DERIVATIVES; + ThreadGroups oneGroup; + if (glInt.threadSafe()) oneGroup.oneGroup(nel); + const IntMat& groups = glInt.threadSafe() ? oneGroup[0] : threadGroups[0]; + + // === Assembly loop over all elements in the patch ========================== bool ok = true; - for (size_t t = 0; t < threadGroups[0].size() && ok; ++t) + for (size_t t = 0; t < groups.size() && ok; ++t) { //#pragma omp parallel for schedule(static) - for (size_t e = 0; e < threadGroups[0][t].size(); ++e) + for (size_t e = 0; e < groups[t].size(); ++e) { if (!ok) continue; - int iel = threadGroups[0][t][e] + 1; + int iel = groups[t][e] + 1; auto el1 = threadBasis->elementBegin()+iel-1; double uh = ((*el1)->umin()+(*el1)->umax())/2.0; double vh = ((*el1)->vmin()+(*el1)->vmax())/2.0; @@ -524,6 +529,12 @@ bool ASMu2Dmx::integrate (Integrand& integrand, int lIndex, } if (skipMe) continue; + if (!myElms.empty() && !glInt.threadSafe() && + std::find(myElms.begin(), myElms.end(), iel-1) == myElms.end()) { + ++iel; + continue; + } + double uh = ((*el1)->umin()+(*el1)->umax())/2.0; double vh = ((*el1)->vmin()+(*el1)->vmax())/2.0; std::vector els; @@ -644,6 +655,12 @@ bool ASMu2Dmx::integrate (Integrand& integrand, for (int iel = 1; el1 != m_basis[0]->elementEnd(); ++el1, ++iel) { short int status = iChk.hasContribution(iel); if (!status) continue; // no interface contributions for this element + + if (!myElms.empty() && !glInt.threadSafe() && + std::find(myElms.begin(), myElms.end(), iel-1) == myElms.end()) { + ++iel, ++el1; + continue; + } // first push the hosting elements std::vector els(1,iel); std::vector elem_sizes(1,(*el1)->nBasisFunctions()); diff --git a/src/ASM/LR/ASMu3D.C b/src/ASM/LR/ASMu3D.C index d34c2a04..a91d8531 100644 --- a/src/ASM/LR/ASMu3D.C +++ b/src/ASM/LR/ASMu3D.C @@ -1209,6 +1209,9 @@ bool ASMu3D::integrate (Integrand& integrand, int lIndex, if (dbgElm < 0 && iEl+1 != -dbgElm) continue; // Skipping all elements, except for -dbgElm #endif + if (!myElms.empty() && !glInt.threadSafe() && + std::find(myElms.begin(), myElms.end(), iEl) == myElms.end()) + continue; FiniteElement fe; fe.iel = MLGE[iEl]; @@ -2324,3 +2327,14 @@ void ASMu3D::getBoundaryElms (int lIndex, int orient, IntVec& elms) const for (const LR::Element* elem : elements) elms.push_back(MLGE[elem->getId()]-1); } + + +void ASMu3D::generateThreadGroupsFromElms(const std::vector& elms) +{ + myElms.clear(); + for (int elm : elms) + if (this->getElmIndex(elm+1) > 0) + myElms.push_back(this->getElmIndex(elm+1)-1); + + threadGroups = threadGroups.filter(myElms); +} diff --git a/src/ASM/LR/ASMu3D.h b/src/ASM/LR/ASMu3D.h index 8735f1fa..58129f03 100644 --- a/src/ASM/LR/ASMu3D.h +++ b/src/ASM/LR/ASMu3D.h @@ -553,6 +553,9 @@ protected: void generateThreadGroups(const Integrand& integrand, bool silence, bool ignoreGlobalLM); + //! \brief Generate element groups from a partition. + virtual void generateThreadGroupsFromElms(const std::vector& elms); + //! \brief Remap element wise errors to basis functions. //! \param errors The remapped errors //! \param[in] origErr The element wise errors on the geometry mesh @@ -587,6 +590,7 @@ protected: Matrices myBezierExtract; //!< Bezier extraction matrices ThreadGroups threadGroups; //!< Element groups for multi-threaded assembly + IntVec myElms; //!< Elements on patch - used with partitioning mutable double vMin; //!< Minimum element volume for adaptive refinement }; diff --git a/src/ASM/LR/ASMu3Dmx.C b/src/ASM/LR/ASMu3Dmx.C index f30a1269..baeaf01d 100644 --- a/src/ASM/LR/ASMu3Dmx.C +++ b/src/ASM/LR/ASMu3Dmx.C @@ -360,205 +360,214 @@ bool ASMu3Dmx::integrate (Integrand& integrand, else if (nRed < 0) nRed = nGauss; // The integrand needs to know nGauss + ThreadGroups oneGroup; + if (glInt.threadSafe()) oneGroup.oneGroup(nel); + const IntMat& group = glInt.threadSafe() ? oneGroup[0] : threadGroups[0]; + // === Assembly loop over all elements in the patch ========================== - bool ok=true; - for(LR::Element *el : this->getBasis(geoBasis)->getAllElements()) { - double uh = (el->umin()+el->umax())/2.0; - double vh = (el->vmin()+el->vmax())/2.0; - double wh = (el->wmin()+el->wmax())/2.0; - std::vector els; - std::vector elem_sizes; - for (size_t i=0; i < m_basis.size(); ++i) { - els.push_back(m_basis[i]->getElementContaining(uh, vh, wh)+1); - elem_sizes.push_back((*(m_basis[i]->elementBegin()+els.back()-1))->nBasisFunctions()); - } - int iEl = el->getId(); - MxFiniteElement fe(elem_sizes); - fe.iel = MLGE[iEl]; - - std::vector dNxdu(m_basis.size()); - Matrix Xnod, Jac; - std::vector d2Nxdu2(m_basis.size()); - Matrix3D Hess; - double dXidu[3]; - double param[3] = { 0.0, 0.0, 0.0 }; - Vec4 X(param); - // Get element volume in the parameter space - double du = el->umax() - el->umin(); - double dv = el->vmax() - el->vmin(); - double dw = el->wmax() - el->wmin(); - double vol = el->volume(); - if (vol < 0.0) + bool ok = true; + for (size_t t = 0; t < group.size() && ok; t++) +//#pragma omp parallel for schedule(static) + for (size_t e = 0; e < group[t].size(); e++) { - ok = false; // topology error (probably logic error) - break; - } + int iel = group[t][e] + 1; + const LR::Element* el = lrspline->getElement(iel-1); + double uh = (el->umin()+el->umax())/2.0; + double vh = (el->vmin()+el->vmax())/2.0; + double wh = (el->wmin()+el->wmax())/2.0; + std::vector els; + std::vector elem_sizes; + for (size_t i=0; i < m_basis.size(); ++i) { + els.push_back(m_basis[i]->getElementContaining(uh, vh, wh)+1); + elem_sizes.push_back((*(m_basis[i]->elementBegin()+els.back()-1))->nBasisFunctions()); + } + int iEl = el->getId(); + MxFiniteElement fe(elem_sizes); + fe.iel = MLGE[iEl]; - // Set up control point (nodal) coordinates for current element - if (!this->getElementCoordinates(Xnod,iEl+1)) - { - ok = false; - break; - } - - // Compute parameter values of the Gauss points over the whole element - std::array gpar, redpar; - for (int d = 0; d < 3; d++) - { - this->getGaussPointParameters(gpar[d],d,nGauss,iEl+1,xg); - if (xr) - this->getGaussPointParameters(redpar[d],d,nRed,iEl+1,xr); - } - - - if (integrand.getIntegrandType() & Integrand::ELEMENT_CORNERS) - fe.h = this->getElementCorners(iEl+1, fe.XC); - - if (integrand.getIntegrandType() & Integrand::G_MATRIX) - { - // Element size in parametric space - dXidu[0] = el->getParmin(0); - dXidu[1] = el->getParmin(1); - dXidu[2] = el->getParmin(2); - } - else if (integrand.getIntegrandType() & Integrand::AVERAGE) - { - // --- Compute average value of basis functions over the element ----- - - fe.Navg.resize(elem_sizes[0],true); - double vol = 0.0; - for (int k = 0; k < nGauss; k++) - for (int j = 0; j < nGauss; j++) - for (int i = 0; i < nGauss; i++) - { - // Fetch basis function derivatives at current integration point - for (size_t b = 1; b <= m_basis.size(); ++b) - this->evaluateBasis(iEl, fe, dNxdu[b-1], b); - - // Compute Jacobian determinant of coordinate mapping - // and multiply by weight of current integration point - double detJac = utl::Jacobian(Jac,fe.grad(geoBasis), - Xnod,dNxdu[geoBasis-1],false); - double weight = 0.125*vol*wg[i]*wg[j]*wg[k]; - - // Numerical quadrature - fe.Navg.add(fe.N,detJac*weight); - vol += detJac*weight; + std::vector dNxdu(m_basis.size()); + Matrix Xnod, Jac; + std::vector d2Nxdu2(m_basis.size()); + Matrix3D Hess; + double dXidu[3]; + double param[3] = { 0.0, 0.0, 0.0 }; + Vec4 X(param); + // Get element volume in the parameter space + double du = el->umax() - el->umin(); + double dv = el->vmax() - el->vmin(); + double dw = el->wmax() - el->wmin(); + double vol = el->volume(); + if (vol < 0.0) + { + ok = false; // topology error (probably logic error) + break; } - // Divide by element volume - fe.Navg /= vol; - } + // Set up control point (nodal) coordinates for current element + if (!this->getElementCoordinates(Xnod,iEl+1)) + { + ok = false; + break; + } - else if (integrand.getIntegrandType() & Integrand::ELEMENT_CENTER) - { - // Compute the element center - Go::Point X0; - double u0 = 0.5*(el->getParmin(0) + el->getParmax(0)); - double v0 = 0.5*(el->getParmin(1) + el->getParmax(1)); - double w0 = 0.5*(el->getParmin(2) + el->getParmax(2)); - this->getBasis(geoBasis)->point(X0,u0,v0,w0); - X = SplineUtils::toVec3(X0); - } + // Compute parameter values of the Gauss points over the whole element + std::array gpar, redpar; + for (int d = 0; d < 3; d++) + { + this->getGaussPointParameters(gpar[d],d,nGauss,iEl+1,xg); + if (xr) + this->getGaussPointParameters(redpar[d],d,nRed,iEl+1,xr); + } + + + if (integrand.getIntegrandType() & Integrand::ELEMENT_CORNERS) + fe.h = this->getElementCorners(iEl+1, fe.XC); + + if (integrand.getIntegrandType() & Integrand::G_MATRIX) + { + // Element size in parametric space + dXidu[0] = el->getParmin(0); + dXidu[1] = el->getParmin(1); + dXidu[2] = el->getParmin(2); + } + else if (integrand.getIntegrandType() & Integrand::AVERAGE) + { + // --- Compute average value of basis functions over the element ----- + + fe.Navg.resize(elem_sizes[0],true); + double vol = 0.0; + for (int k = 0; k < nGauss; k++) + for (int j = 0; j < nGauss; j++) + for (int i = 0; i < nGauss; i++) + { + // Fetch basis function derivatives at current integration point + for (size_t b = 1; b <= m_basis.size(); ++b) + this->evaluateBasis(iEl, fe, dNxdu[b-1], b); + + // Compute Jacobian determinant of coordinate mapping + // and multiply by weight of current integration point + double detJac = utl::Jacobian(Jac,fe.grad(geoBasis), + Xnod,dNxdu[geoBasis-1],false); + double weight = 0.125*vol*wg[i]*wg[j]*wg[k]; + + // Numerical quadrature + fe.Navg.add(fe.N,detJac*weight); + vol += detJac*weight; + } + + // Divide by element volume + fe.Navg /= vol; + } + + else if (integrand.getIntegrandType() & Integrand::ELEMENT_CENTER) + { + // Compute the element center + Go::Point X0; + double u0 = 0.5*(el->getParmin(0) + el->getParmax(0)); + double v0 = 0.5*(el->getParmin(1) + el->getParmax(1)); + double w0 = 0.5*(el->getParmin(2) + el->getParmax(2)); + this->getBasis(geoBasis)->point(X0,u0,v0,w0); + X = SplineUtils::toVec3(X0); + } + + // Initialize element quantities + LocalIntegral* A = integrand.getLocalIntegral(elem_sizes,fe.iel); + if (!integrand.initElement(MNPC[iEl],elem_sizes,nb,*A)) + { + A->destruct(); + ok = false; + break; + } + + if (xr) + { + std::cerr << "Haven't really figured out what this part does yet\n"; + exit(42142); + } + + // --- Integration loop over all Gauss points in each direction -------- + + fe.iGP = iEl*nGauss*nGauss*nGauss; // Global integration point counter + + std::vector B(m_basis.size()); + size_t ig = 1; + for (int k = 0; k < nGauss; k++) + for (int j = 0; j < nGauss; j++) + for (int i = 0; i < nGauss; i++, fe.iGP++, ig++) + { + // Local element coordinates of current integration point + fe.xi = xg[i]; + fe.eta = xg[j]; + fe.zeta = xg[k]; + + // Parameter values of current integration point + fe.u = param[0] = gpar[0][i]; + fe.v = param[1] = gpar[1][j]; + fe.w = param[2] = gpar[2][k]; + + // Extract bezier basis functions + for (size_t b = 0; b < m_basis.size(); ++b) { + Matrix B(BN[b].rows(), 4); + B.fillColumn(1, BN[b].getColumn(ig)); + B.fillColumn(2, BdNdu[b].getColumn(ig)*2.0/du); + B.fillColumn(3, BdNdv[b].getColumn(ig)*2.0/dv); + B.fillColumn(4, BdNdw[b].getColumn(ig)*2.0/dw); + + // Fetch basis function derivatives at current integration point + if (integrand.getIntegrandType() & Integrand::SECOND_DERIVATIVES) + this->evaluateBasis(els[b]-1, fe, dNxdu[b], d2Nxdu2[b], b+1); + else + this->evaluateBasis(fe, dNxdu[b], bezierExtractmx[b][els[b]-1], B, b+1); + } + + // Compute Jacobian inverse of coordinate mapping and derivatives + fe.detJxW = utl::Jacobian(Jac,fe.grad(geoBasis),Xnod,dNxdu[geoBasis-1]); + if (fe.detJxW == 0.0) continue; // skip singular points + for (size_t b = 0; b < m_basis.size(); ++b) + if (b != (size_t)geoBasis-1) + fe.grad(b+1).multiply(dNxdu[b],Jac); + + // Compute Hessian of coordinate mapping and 2nd order derivatives + if (integrand.getIntegrandType() & Integrand::SECOND_DERIVATIVES) { + if (!utl::Hessian(Hess,fe.hess(geoBasis),Jac,Xnod, + d2Nxdu2[geoBasis-1],dNxdu[geoBasis-1])) + return false; + + for (size_t b = 0; b < m_basis.size() && ok; ++b) + if ((int)b != geoBasis) + if (!utl::Hessian(Hess,fe.hess(b+1),Jac,Xnod, + d2Nxdu2[b],fe.grad(b+1),false)) + return false; + } + + // Compute G-matrix + if (integrand.getIntegrandType() & Integrand::G_MATRIX) + utl::getGmat(Jac,dXidu,fe.G); + + // Cartesian coordinates of current integration point + X.assign(Xnod * fe.basis(geoBasis)); + X.t = time.t; + + // Evaluate the integrand and accumulate element contributions + fe.detJxW *= 0.125*vol*wg[i]*wg[j]*wg[k]; + if (!integrand.evalIntMx(*A,fe,time,X)) + ok = false; + + } // end gauss integrand + + // Finalize the element quantities + if (ok && !integrand.finalizeElement(*A,time,0)) + ok = false; + + // Assembly of global system integral + if (ok && !glInt.assemble(A->ref(),fe.iel)) + ok = false; - // Initialize element quantities - LocalIntegral* A = integrand.getLocalIntegral(elem_sizes,fe.iel); - if (!integrand.initElement(MNPC[iEl],elem_sizes,nb,*A)) - { A->destruct(); - ok = false; - break; } - if (xr) - { - std::cerr << "Haven't really figured out what this part does yet\n"; - exit(42142); - } - - // --- Integration loop over all Gauss points in each direction -------- - - fe.iGP = iEl*nGauss*nGauss*nGauss; // Global integration point counter - - std::vector B(m_basis.size()); - size_t ig = 1; - for (int k = 0; k < nGauss; k++) - for (int j = 0; j < nGauss; j++) - for (int i = 0; i < nGauss; i++, fe.iGP++, ig++) - { - // Local element coordinates of current integration point - fe.xi = xg[i]; - fe.eta = xg[j]; - fe.zeta = xg[k]; - - // Parameter values of current integration point - fe.u = param[0] = gpar[0][i]; - fe.v = param[1] = gpar[1][j]; - fe.w = param[2] = gpar[2][k]; - - // Extract bezier basis functions - for (size_t b = 0; b < m_basis.size(); ++b) { - Matrix B(BN[b].rows(), 4); - B.fillColumn(1, BN[b].getColumn(ig)); - B.fillColumn(2, BdNdu[b].getColumn(ig)*2.0/du); - B.fillColumn(3, BdNdv[b].getColumn(ig)*2.0/dv); - B.fillColumn(4, BdNdw[b].getColumn(ig)*2.0/dw); - - // Fetch basis function derivatives at current integration point - if (integrand.getIntegrandType() & Integrand::SECOND_DERIVATIVES) - this->evaluateBasis(els[b]-1, fe, dNxdu[b], d2Nxdu2[b], b+1); - else - this->evaluateBasis(fe, dNxdu[b], bezierExtractmx[b][els[b]-1], B, b+1); - } - - // Compute Jacobian inverse of coordinate mapping and derivatives - fe.detJxW = utl::Jacobian(Jac,fe.grad(geoBasis),Xnod,dNxdu[geoBasis-1]); - if (fe.detJxW == 0.0) continue; // skip singular points - for (size_t b = 0; b < m_basis.size(); ++b) - if (b != (size_t)geoBasis-1) - fe.grad(b+1).multiply(dNxdu[b],Jac); - - // Compute Hessian of coordinate mapping and 2nd order derivatives - if (integrand.getIntegrandType() & Integrand::SECOND_DERIVATIVES) { - if (!utl::Hessian(Hess,fe.hess(geoBasis),Jac,Xnod, - d2Nxdu2[geoBasis-1],dNxdu[geoBasis-1])) - return false; - - for (size_t b = 0; b < m_basis.size() && ok; ++b) - if ((int)b != geoBasis) - if (!utl::Hessian(Hess,fe.hess(b+1),Jac,Xnod, - d2Nxdu2[b],fe.grad(b+1),false)) - return false; - } - - // Compute G-matrix - if (integrand.getIntegrandType() & Integrand::G_MATRIX) - utl::getGmat(Jac,dXidu,fe.G); - - // Cartesian coordinates of current integration point - X.assign(Xnod * fe.basis(geoBasis)); - X.t = time.t; - - // Evaluate the integrand and accumulate element contributions - fe.detJxW *= 0.125*vol*wg[i]*wg[j]*wg[k]; - if (!integrand.evalIntMx(*A,fe,time,X)) - ok = false; - - } // end gauss integrand - - // Finalize the element quantities - if (ok && !integrand.finalizeElement(*A,time,0)) - ok = false; - - // Assembly of global system integral - if (ok && !glInt.assemble(A->ref(),fe.iel)) - ok = false; - - A->destruct(); - } - return ok; } diff --git a/src/ASM/SAMpatchPETSc.C b/src/ASM/SAMpatchPETSc.C index ebd2110c..f08e39bb 100644 --- a/src/ASM/SAMpatchPETSc.C +++ b/src/ASM/SAMpatchPETSc.C @@ -54,6 +54,9 @@ Real SAMpatchPETSc::dot (const Vector& x, const Vector& y, char dofType) const { #ifdef HAVE_MPI if (adm.isParallel()) { + if (adm.dd.isPartitioned()) + return this->SAMpatch::dot(x,y,dofType); + DofIS& dis = this->getIS(dofType); Vec lx, ly; @@ -94,7 +97,7 @@ Real SAMpatchPETSc::dot (const Vector& x, const Vector& y, char dofType) const Real SAMpatchPETSc::normL2 (const Vector& x, char dofType) const { #ifdef HAVE_MPI - if (adm.isParallel()) { + if (adm.isParallel() && !adm.dd.isPartitioned()) { DofIS& dis = this->getIS(dofType); Vec lx; @@ -130,7 +133,7 @@ Real SAMpatchPETSc::normInf (const Vector& x, size_t& comp, char dofType) const { Real max = this->SAM::normInf(x,comp,dofType); #ifdef HAVE_MPI - if (adm.isParallel()) + if (adm.isParallel() && !adm.dd.isPartitioned()) max = adm.allReduce(max, MPI_MAX); // TODO: comp (node index of the max value) is not necessarily correct here #endif @@ -187,23 +190,33 @@ bool SAMpatchPETSc::expandSolution(const SystemVector& solVec, #ifdef HAVE_MPI if (adm.isParallel()) { - if (!glob2LocEq) { + if (!glob2LocEq && !adm.dd.isPartitioned()) { IntVec mlgeq(adm.dd.getMLGEQ()); for (int& ieq : mlgeq) --ieq; ISCreateGeneral(*adm.getCommunicator(),adm.dd.getMLGEQ().size(), mlgeq.data(), PETSC_COPY_VALUES, &glob2LocEq); } - Vec solution; - VecCreateSeq(PETSC_COMM_SELF, Bptr->dim(), &solution); - VecScatter ctx; - VecScatterCreate(Bptr->getVector(), glob2LocEq, solution, nullptr, &ctx); + VecScatter ctx; + Vec solution; + if (adm.dd.isPartitioned()) + VecScatterCreateToAll(Bptr->getVector(), &ctx, &solution); + else { + VecCreateSeq(PETSC_COMM_SELF, Bptr->dim(), &solution); + VecScatterCreate(Bptr->getVector(), glob2LocEq, solution, nullptr, &ctx); + } + VecScatterBegin(ctx, Bptr->getVector(), solution, INSERT_VALUES, SCATTER_FORWARD); VecScatterEnd(ctx, Bptr->getVector(),solution,INSERT_VALUES,SCATTER_FORWARD); VecScatterDestroy(&ctx); PetscScalar* data; VecGetArray(solution, &data); - std::copy(data, data + Bptr->dim(), Bptr->getPtr()); + if (adm.dd.isPartitioned()) { + for (const auto& it : adm.dd.getG2LEQ(0)) + (*Bptr)(it.second) = data[it.first-1]; + } else + std::copy(data, data + Bptr->dim(), Bptr->getPtr()); + VecDestroy(&solution); } else #endif diff --git a/src/IFEM.C b/src/IFEM.C index 0ab0f78d..a1cfe3f6 100644 --- a/src/IFEM.C +++ b/src/IFEM.C @@ -93,7 +93,7 @@ int IFEM::Init (int arg_c, char** arg_v, const char* title) #endif std::cout <<"\n MPI support: "; -#if HAVE_MPI +#ifdef HAVE_MPI std::cout <<"enabled"; #else std::cout <<"disabled"; diff --git a/src/LinAlg/PETScMatrix.C b/src/LinAlg/PETScMatrix.C index 23c80100..a676be22 100644 --- a/src/LinAlg/PETScMatrix.C +++ b/src/LinAlg/PETScMatrix.C @@ -91,8 +91,8 @@ void PETScVector::redim(size_t n) bool PETScVector::beginAssembly() { - for (size_t i = 0; i < size(); ++i) - VecSetValue(x , adm.dd.getGlobalEq(i+1)-1, (*this)[i], ADD_VALUES); + for (size_t i = 0; i < this->size(); ++i) + VecSetValue(x, adm.dd.getGlobalEq(i+1)-1, (*this)[i], ADD_VALUES); VecAssemblyBegin(x); return true; @@ -180,31 +180,32 @@ PETScMatrix::~PETScMatrix () void PETScMatrix::initAssembly (const SAM& sam, bool delayLocking) { - SparseMatrix::initAssembly(sam, delayLocking); - SparseMatrix::preAssemble(sam, delayLocking); + if (!adm.dd.isPartitioned()) { + SparseMatrix::initAssembly(sam, delayLocking); + SparseMatrix::preAssemble(sam, delayLocking); + } else + this->resize(sam.neq,sam.neq); const SAMpatchPETSc* samp = dynamic_cast(&sam); if (!samp) return; // Get number of local equations in linear system - const PetscInt neq = adm.dd.getMaxEq()- adm.dd.getMinEq() + 1; - + PetscInt neq; + neq = adm.dd.getMaxEq() - adm.dd.getMinEq() + 1; // Set correct number of rows and columns for matrix. - MatSetSizes(pA,neq,neq,PETSC_DECIDE,PETSC_DECIDE); + MatSetSizes(pA,neq,neq,PETSC_DETERMINE,PETSC_DETERMINE); // Allocate sparsity pattern std::vector> dofc; - sam.getDofCouplings(dofc); + if (!adm.dd.isPartitioned()) + sam.getDofCouplings(dofc); if (matvec.empty()) { - // Set correct number of rows and columns for matrix. - MatSetSizes(pA,neq,neq,PETSC_DECIDE,PETSC_DECIDE); - MatSetFromOptions(pA); // Allocate sparsity pattern - if (adm.isParallel()) { + if (adm.isParallel() && !adm.dd.isPartitioned()) { int ifirst = adm.dd.getMinEq(); int ilast = adm.dd.getMaxEq(); PetscIntVec d_nnz(neq, 0); @@ -235,6 +236,34 @@ void PETScMatrix::initAssembly (const SAM& sam, bool delayLocking) MatMPIAIJSetPreallocation(pA,PETSC_DEFAULT,d_nnz.data(), PETSC_DEFAULT,o_nnz.data()); + } else if (adm.dd.isPartitioned()) { + // Setup sparsity pattern for global matrix + SparseMatrix* lA = new SparseMatrix(neq, sam.getNoEquations()); + for (int elm : adm.dd.getElms()) { + IntVec meen; + sam.getElmEqns(meen,elm+1); + for (int i : meen) + if (i > 0 && i >= adm.dd.getMinEq(0) && i <= adm.dd.getMaxEq(0)) + for (int j : meen) + if (j > 0) + (*lA)(i-adm.dd.getMinEq(0)+1,adm.dd.getGlobalEq(j)) = 0.0; + } + IntVec iA, jA; + SparseMatrix::calcCSR(iA, jA, neq, lA->getValues()); + delete lA; + MatMPIAIJSetPreallocationCSR(pA, iA.data(), jA.data(), nullptr); + + // Setup sparsity pattern for local matrix + for (int elm : adm.dd.getElms()) { + IntVec meen; + sam.getElmEqns(meen,elm+1); + for (int i : meen) + if (i > 0) + for (int j : meen) + if (j > 0) + (*this)(i,j) = 0.0; + } + this->optimiseSLU(); } else { PetscIntVec Nnz; for (const auto& it : dofc) diff --git a/src/SIM/SIMbase.C b/src/SIM/SIMbase.C index 0e47e51c..097b3e31 100644 --- a/src/SIM/SIMbase.C +++ b/src/SIM/SIMbase.C @@ -107,6 +107,7 @@ void SIMbase::clearProperties () myProps.clear(); myInts.clear(); mixedMADOFs.clear(); + adm.dd.setElms({},""); } @@ -173,13 +174,6 @@ bool SIMbase::preprocess (const IntVec& ignored, bool fixDup) return false; } - if (nProc > 1 && myPatches.empty() && adm.isParallel()) - { - std::cerr <<" *** SIMbase::preprocess: No partitioning information for " - << nProc <<" processors found."<< std::endl; - return false; - } - PROFILE1("Model preprocessing"); static int substep = 10; @@ -367,12 +361,23 @@ bool SIMbase::preprocess (const IntVec& ignored, bool fixDup) if (!static_cast(mySam)->init(myModel,ngnod,dofTypes)) return false; + if (nProc > 1 && myPatches.empty() && adm.isParallel()) + { + IFEM::cout <<" *** SIMbase::preprocess: No partitioning information for " + << nProc <<" processors found. Using graph partitioning\n"; + } + if (!adm.dd.setup(adm,*this)) { std::cerr <<"\n *** SIMbase::preprocess(): Failed to establish " <<" domain decomposition data."<< std::endl; return false; } + else if (adm.dd.isPartitioned()) // reuse thread groups for element list + { + for (ASMbase* pch : this->getFEModel()) + pch->generateThreadGroupsFromElms(adm.dd.getElms()); + } if (!myProblem) { @@ -1488,8 +1493,9 @@ bool SIMbase::solutionNorms (const TimeDomain& time, delete norm; - for (k = 0; k < gNorm.size(); k++) - adm.allReduceAsSum(gNorm[k]); + if (!adm.dd.isPartitioned()) + for (k = 0; k < gNorm.size(); k++) + adm.allReduceAsSum(gNorm[k]); if (ok) ok = this->postProcessNorms(gNorm, eNorm); diff --git a/src/SIM/SIMdummy.h b/src/SIM/SIMdummy.h index 31faba1f..76c0769c 100644 --- a/src/SIM/SIMdummy.h +++ b/src/SIM/SIMdummy.h @@ -41,7 +41,9 @@ public: virtual unsigned short int getNoParamDim() const { return 0; } //! \brief Creates the computational FEM model from the spline patches. virtual bool createFEMmodel(char) { return false; } - + //! \brief Element-element connectivities. + virtual std::vector> getElmConnectivities() const + { return std::vector>(); } protected: //! \brief Preprocesses a user-defined Dirichlet boundary property. virtual bool addConstraint(int,int,int,int,int,int&,char) diff --git a/src/SIM/SIMoutput.C b/src/SIM/SIMoutput.C index 328f1188..1e57e627 100644 --- a/src/SIM/SIMoutput.C +++ b/src/SIM/SIMoutput.C @@ -286,18 +286,29 @@ bool SIMoutput::writeGlvG (int& nBlock, const char* inpFile, bool doClear) // Open a new VTF-file char* vtfName = new char[strlen(inpFile)+10]; strtok(strcpy(vtfName,inpFile),"."); - if (nProc > 1) - sprintf(vtfName+strlen(vtfName),"_p%04d%s",myPid,ext); - else - strcat(vtfName,ext); + if (adm.dd.isPartitioned()) { + if (adm.getProcId() == 0) { + strcat(vtfName,ext); + IFEM::cout <<"\nWriting VTF-file "<< vtfName << std::endl; + myVtf = new VTF(vtfName,opt.format); + } + } else { + if (nProc > 1) + sprintf(vtfName+strlen(vtfName),"_p%04d%s",myPid,ext); + else + strcat(vtfName,ext); - IFEM::cout <<"\nWriting VTF-file "<< vtfName << std::endl; - myVtf = new VTF(vtfName,opt.format); + IFEM::cout <<"\nWriting VTF-file "<< vtfName << std::endl; + myVtf = new VTF(vtfName,opt.format); + } delete[] vtfName; } else if (doClear) myVtf->clearGeometryBlocks(); + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + ElementBlock* lvb; char pname[32]; @@ -337,6 +348,9 @@ bool SIMoutput::writeGlvG (int& nBlock, const char* inpFile, bool doClear) bool SIMoutput::writeGlvBC (int& nBlock, int iStep) const { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + if (!myVtf) return false; Matrix field; @@ -403,6 +417,9 @@ bool SIMoutput::writeGlvBC (int& nBlock, int iStep) const bool SIMoutput::writeGlvT (int iStep, int& geoBlk, int& nBlock) const { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + if (myProblem->hasTractionValues()) { if (msgLevel > 1) @@ -417,6 +434,9 @@ bool SIMoutput::writeGlvT (int iStep, int& geoBlk, int& nBlock) const bool SIMoutput::writeGlvV (const Vector& vec, const char* fieldName, int iStep, int& nBlock, int idBlock, int ncmp) const { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + if (vec.empty()) return true; else if (!myVtf) @@ -459,6 +479,9 @@ bool SIMoutput::writeGlvV (const Vector& vec, const char* fieldName, bool SIMoutput::writeGlvS (const Vector& scl, const char* fieldName, int iStep, int& nBlock, int idBlock) const { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + if (scl.empty()) return true; else if (!myVtf) @@ -496,6 +519,9 @@ bool SIMoutput::writeGlvS (const Vector& psol, int iStep, int& nBlock, double time, const char* pvecName, int idBlock, int psolComps) { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + idBlock = this->writeGlvS1(psol,iStep,nBlock,time, pvecName,idBlock,psolComps); if (idBlock < 0) @@ -526,6 +552,9 @@ int SIMoutput::writeGlvS1 (const Vector& psol, int iStep, int& nBlock, double time, const char* pvecName, int idBlock, int psolComps, bool scalarOnly) { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + if (psol.empty()) return 0; else if (!myVtf) @@ -680,6 +709,9 @@ int SIMoutput::writeGlvS1 (const Vector& psol, int iStep, int& nBlock, bool SIMoutput::writeGlvS2 (const Vector& psol, int iStep, int& nBlock, double time, int idBlock, int psolComps) { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + if (psol.empty()) return true; // No primary solution else if (!myVtf || !myProblem) @@ -860,6 +892,9 @@ bool SIMoutput::writeGlvP (const Vector& ssol, int iStep, int& nBlock, int idBlock, const char* prefix, std::vector* maxVal) { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + if (ssol.empty()) return true; else if (!myVtf) @@ -984,6 +1019,9 @@ bool SIMoutput::evalProjSolution (const Vector& ssol, bool SIMoutput::writeGlvF (const RealFunc& f, const char* fname, int iStep, int& nBlock, int idBlock, double time) { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + if (!myVtf) return false; IntVec sID; @@ -1009,6 +1047,9 @@ bool SIMoutput::writeGlvF (const RealFunc& f, const char* fname, bool SIMoutput::writeGlvStep (int iStep, double value, int itype) { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + myVtf->writeGeometryBlocks(iStep); if (itype == 0) @@ -1020,6 +1061,9 @@ bool SIMoutput::writeGlvStep (int iStep, double value, int itype) bool SIMoutput::writeGlvM (const Mode& mode, bool freq, int& nBlock) { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + if (mode.eigVec.empty()) return true; else if (!myVtf) @@ -1067,6 +1111,9 @@ bool SIMoutput::writeGlvM (const Mode& mode, bool freq, int& nBlock) bool SIMoutput::writeGlvN (const Matrix& norms, int iStep, int& nBlock, const std::vector& prefix, int idBlock) { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + if (norms.empty()) return true; else if (!myVtf) @@ -1139,6 +1186,9 @@ bool SIMoutput::writeGlvN (const Matrix& norms, int iStep, int& nBlock, bool SIMoutput::writeGlvE (const Vector& vec, int iStep, int& nBlock, const char* name) { + if (adm.dd.isPartitioned() && adm.getProcId() != 0) + return true; + if (!myVtf) return false; diff --git a/src/Utility/HDF5Writer.C b/src/Utility/HDF5Writer.C index 7347879d..6cfe647e 100644 --- a/src/Utility/HDF5Writer.C +++ b/src/Utility/HDF5Writer.C @@ -309,6 +309,8 @@ void HDF5Writer::writeSIM (int level, const DataEntry& entry, size_t projOfs = 0; for (int i = 0; i < sim->getNoPatches(); ++i) { int loc = sim->getLocalPatchIndex(i+1); + if (sim->getProcessAdm().dd.isPartitioned() && sim->getProcessAdm().getProcId() != 0) + loc = 0; if (loc > 0 && (!(results & DataExporter::REDUNDANT) || sim->getGlobalProcessID() == 0)) // we own the patch {