added: support for partitioning based parallel computations

automatically partition mesh and distribute in parallel.
this is rather suboptimal in many ways, but still useful.
This commit is contained in:
Arne Morten Kvarving 2019-04-09 17:37:45 +02:00
parent 41ce8928de
commit 4b35c00f0d
22 changed files with 733 additions and 257 deletions

View File

@ -425,6 +425,8 @@ public:
virtual void generateThreadGroups(const Integrand&, bool, bool) {}
//! \brief Generates element groups for multi-threading of boundary integrals.
virtual void generateThreadGroups(char, bool, bool) {}
//! \brief Generate element-groups for multi-threading based on a partition.
virtual void generateThreadGroupsFromElms(const std::vector<int>&) {}
// Methods for integration of finite element quantities.
@ -839,6 +841,7 @@ protected:
IntVec myMLGE; //!< The actual Matrix of Local to Global Element numbers
IntVec myMLGN; //!< The actual Matrix of Local to Global Node numbers
IntMat myMNPC; //!< The actual Matrix of Nodal Point Correspondance
IntVec myElms; //!< Elements on patch - used with partitioning
//! \brief Numerical integration scheme for this patch.
//! \details A value in the range [1,10] means use that number of Gauss

View File

@ -2318,6 +2318,10 @@ bool ASMs2D::integrate (Integrand& integrand, int lIndex,
fe.iel = abs(MLGE[doXelms+iel-1]);
if (fe.iel < 1) continue; // zero-area element
if (!myElms.empty() && !glInt.threadSafe() &&
std::find(myElms.begin(), myElms.end(), iel-1) == myElms.end())
continue;
#ifdef SP_DEBUG
if (dbgElm < 0 && iel != -dbgElm)
continue; // Skipping all elements, except for -dbgElm
@ -3119,3 +3123,14 @@ void ASMs2D::getBoundaryElms (int lIndex, int, IntVec& elms) const
elms.push_back(MLGE[i + (lIndex-3)*N1m*(N2m-1)] - 1);
}
}
void ASMs2D::generateThreadGroupsFromElms(const std::vector<int>& elms)
{
myElms.clear();
for (int elm : elms)
if (this->getElmIndex(elm+1) > 0)
myElms.push_back(this->getElmIndex(elm+1) - 1);
threadGroups = threadGroups.filter(myElms);
}

View File

@ -588,6 +588,9 @@ protected:
void generateThreadGroups(size_t strip1, size_t strip2,
bool silence, bool ignoreGlobalLM);
//! \brief Generate element groups from a partition.
virtual void generateThreadGroupsFromElms(const std::vector<int>& elms);
public:
//! \brief Auxilliary function for computation of basis function indices.
static void scatterInd(int n1, int n2, int p1, int p2,

View File

@ -528,13 +528,17 @@ bool ASMs2Dmx::integrate (Integrand& integrand,
const int n1 = surf->numCoefs_u();
const int nel1 = n1 - p1 + 1;
ThreadGroups oneGroup;
if (glInt.threadSafe()) oneGroup.oneStripe(nel);
const ThreadGroups& groups = glInt.threadSafe() ? oneGroup : threadGroups;
// === Assembly loop over all elements in the patch ==========================
bool ok=true;
for (size_t g=0;g<threadGroups.size() && ok;++g) {
for (size_t g = 0; g < groups.size() && ok; g++) {
#pragma omp parallel for schedule(static)
for (size_t t=0;t<threadGroups[g].size();++t) {
for (size_t t = 0; t < groups[g].size(); t++) {
MxFiniteElement fe(elem_size);
std::vector<Matrix> dNxdu(m_basis.size());
std::vector<Matrix3D> d2Nxdu2(m_basis.size());
@ -543,9 +547,9 @@ bool ASMs2Dmx::integrate (Integrand& integrand,
Matrix Xnod, Jac;
double param[3] = { 0.0, 0.0, 0.0 };
Vec4 X(param);
for (size_t i = 0; i < threadGroups[g][t].size() && ok; ++i)
for (size_t i = 0; i < groups[g][t].size() && ok; ++i)
{
int iel = threadGroups[g][t][i];
int iel = groups[g][t][i];
fe.iel = MLGE[iel];
if (fe.iel < 1) continue; // zero-area element
@ -737,6 +741,10 @@ bool ASMs2Dmx::integrate (Integrand& integrand, int lIndex,
fe.iel = MLGE[iel-1];
if (fe.iel < 1) continue; // zero-area element
if (!myElms.empty() && !glInt.threadSafe() &&
std::find(myElms.begin(), myElms.end(), iel-1) == myElms.end())
continue;
// Skip elements that are not on current boundary edge
bool skipMe = false;
switch (edgeDir)
@ -866,6 +874,10 @@ bool ASMs2Dmx::integrate (Integrand& integrand,
fe.iel = abs(MLGE[iel]);
if (fe.iel < 1) continue; // zero-area element
if (!myElms.empty() && !glInt.threadSafe() &&
std::find(myElms.begin(), myElms.end(), iel-1) == myElms.end())
continue;
short int status = iChk.hasContribution(iel,i1,i2);
if (!status) continue; // no interface contributions for this element

View File

@ -2706,6 +2706,10 @@ bool ASMs3D::integrateEdge (Integrand& integrand, int lEdge,
fe.iel = MLGE[iel-1];
if (fe.iel < 1) continue; // zero-volume element
if (!myElms.empty() &&
std::find(myElms.begin(), myElms.end(), fe.iel-1) == myElms.end())
continue;
// Skip elements that are not on current boundary edge
bool skipMe = false;
switch (lEdge)
@ -3533,3 +3537,17 @@ void ASMs3D::getBoundaryElms (int lIndex, int, IntVec& elms) const
elms.push_back(MLGE[i + j*N1m + (lIndex-5)*(N1m*N2m*(N3m-1))] - 1);
}
}
void ASMs3D::generateThreadGroupsFromElms(const std::vector<int>& elms)
{
myElms.clear();
for (int elm : elms)
if (this->getElmIndex(elm+1) > 0)
myElms.push_back(this->getElmIndex(elm+1)-1);
threadGroupsVol = threadGroupsVol.filter(myElms);
for (auto& group : threadGroupsFace)
group.second = group.second.filter(myElms);
}

View File

@ -653,6 +653,9 @@ protected:
//! \param[in] silence If \e true, suppress threading group outprint
virtual void generateThreadGroups(char lIndex, bool silence, bool);
//! \brief Generate element groups from a partition.
virtual void generateThreadGroupsFromElms(const std::vector<int>& elms);
public:
//! \brief Auxilliary function for computation of basis function indices.
static void scatterInd(int n1, int n2, int n3, int p1, int p2, int p3,

View File

@ -483,12 +483,17 @@ bool ASMs3Dmx::integrate (Integrand& integrand,
const int nel1 = n1 - p1 + 1;
const int nel2 = n2 - p2 + 1;
ThreadGroups oneGroup;
if (glInt.threadSafe()) oneGroup.oneStripe(nel);
const ThreadGroups& groups = glInt.threadSafe() ? oneGroup : threadGroupsVol;
// === Assembly loop over all elements in the patch ==========================
bool ok=true;
for (size_t g=0;g<threadGroupsVol.size() && ok;++g) {
bool ok = true;
for (size_t g = 0; g < groups.size() && ok; ++g) {
#pragma omp parallel for schedule(static)
for (size_t t=0;t<threadGroupsVol[g].size();++t) {
for (size_t t = 0; t < groups[g].size(); ++t) {
MxFiniteElement fe(elem_size);
std::vector<Matrix> dNxdu(m_basis.size());
std::vector<Matrix3D> d2Nxdu2(m_basis.size());
@ -497,9 +502,9 @@ bool ASMs3Dmx::integrate (Integrand& integrand,
Matrix Xnod, Jac;
double param[3];
Vec4 X(param);
for (size_t l = 0; l < threadGroupsVol[g][t].size() && ok; ++l)
for (size_t l = 0; l < groups[g][t].size() && ok; ++l)
{
int iel = threadGroupsVol[g][t][l];
int iel = groups[g][t][l];
fe.iel = MLGE[iel];
if (fe.iel < 1) continue; // zero-volume element

View File

@ -18,6 +18,7 @@
#include "ASMunstruct.h"
#include "LinSolParams.h"
#include "ProcessAdm.h"
#include "Profiler.h"
#include "SAMpatch.h"
#include "SIMbase.h"
#include "Utilities.h"
@ -26,6 +27,14 @@
#include <functional>
#include <numeric>
#ifdef HAS_ZOLTAN
#define OLD_MPI HAVE_MPI
#undef HAVE_MPI
#include <zoltan.h>
#undef HAVE_MPI
#define HAVE_MPI OLD_MPI
#endif
DomainDecomposition::
OrientIterator::OrientIterator(const ASMbase* pch,
@ -170,6 +179,95 @@ OrientIterator::OrientIterator(const ASMbase* pch, int orient, int lIdx)
}
#ifdef HAS_ZOLTAN
static int getNumElements(void* mesh, int* err)
{
*err = ZOLTAN_OK;
IntMat& neigh = *static_cast<IntMat*>(mesh);
return neigh.size();
}
static void getElementList(void* mesh, int numGlobalEntries,
int, ZOLTAN_ID_PTR gids,
ZOLTAN_ID_PTR lids, int,
float*, int* err)
{
IntMat& neigh = *static_cast<IntMat*>(mesh);
std::iota(gids, gids+neigh.size(), 0);
std::iota(lids, lids+neigh.size(), 0);
*err = ZOLTAN_OK;
}
static void getNumEdges(void* mesh, int sizeGID, int sizeLID,
int numCells, ZOLTAN_ID_PTR globalID,
ZOLTAN_ID_PTR localID, int* numEdges, int* err)
{
IntMat& neigh = *static_cast<IntMat*>(mesh);
int* ne = numEdges;
for (const std::vector<int>& n : neigh)
*ne++ = std::accumulate(n.begin(), n.end(), 0,
[](const int& a, const int& b)
{
return b != -1 ? a + 1 : a;
});
*err = ZOLTAN_OK;
}
static void getEdges(void* mesh, int sizeGID, int sizeLID, int numCells,
ZOLTAN_ID_PTR globalID, ZOLTAN_ID_PTR localID,
int* numEdges, ZOLTAN_ID_PTR nborGID, int* nborProc,
int wgtDim, float* egts, int* err)
{
IntMat& neigh = *static_cast<IntMat*>(mesh);
memset(nborProc, 0, numCells*sizeof(int));
for (const std::vector<int>& elm : neigh)
for (int n : elm)
if (n != -1)
*nborGID++ = n;
*err = ZOLTAN_OK;
}
static int getNullElements(void*, int* err)
{
*err = ZOLTAN_OK;
return 0;
}
static void getNullList(void*, int,
int, ZOLTAN_ID_PTR,
ZOLTAN_ID_PTR, int,
float*, int* err)
{
*err = ZOLTAN_OK;
}
static void getNumNullEdges(void*, int, int, int,
ZOLTAN_ID_PTR, ZOLTAN_ID_PTR,
int*, int* err)
{
*err = ZOLTAN_OK;
}
static void getNullEdges(void*, int, int, int,
ZOLTAN_ID_PTR, ZOLTAN_ID_PTR,
int*, ZOLTAN_ID_PTR, int*,
int, float*, int* err)
{
*err = ZOLTAN_OK;
}
#endif
std::vector<std::set<int>> DomainDecomposition::getSubdomains(int nx, int ny, int nz,
int overlap, size_t block) const
{
@ -811,6 +909,47 @@ bool DomainDecomposition::calcGlobalEqNumbers(const ProcessAdm& adm,
}
bool DomainDecomposition::calcGlobalEqNumbersPart(const ProcessAdm& adm,
const SIMbase& sim)
{
#ifdef HAVE_MPI
blocks[0].MLGEQ.clear();
blocks[0].MLGEQ.resize(sim.getSAM()->getNoEquations(), -1);
blocks[0].minEq = 1;
blocks[0].maxEq = 0;
if (adm.getProcId() != 0) {
adm.receive(blocks[0].MLGEQ, adm.getProcId()-1);
adm.receive(blocks[0].minEq, adm.getProcId()-1);
blocks[0].maxEq = blocks[0].minEq;
blocks[0].minEq++;
}
for (size_t i = 0; i < myElms.size(); ++i) {
IntVec meen;
sim.getSAM()->getElmEqns(meen, myElms[i]+1);
for (int eq : meen)
if (eq > 0 && blocks[0].MLGEQ[eq-1] == -1)
blocks[0].MLGEQ[eq-1] = ++blocks[0].maxEq;
}
if (adm.getProcId() < adm.getNoProcs()-1) {
adm.send(blocks[0].MLGEQ, adm.getProcId()+1);
adm.send(blocks[0].maxEq, adm.getProcId()+1);
}
MPI_Bcast(&blocks[0].MLGEQ[0], blocks[0].MLGEQ.size(), MPI_INT,
adm.getNoProcs()-1, *adm.getCommunicator());
for (size_t i = 0; i < blocks[0].MLGEQ.size(); ++i)
blocks[0].G2LEQ[blocks[0].MLGEQ[i]] = i+1;
#endif
return true;
}
int DomainDecomposition::getGlobalEq(int lEq, size_t idx) const
{
if (lEq < 1 || idx >= blocks.size())
@ -933,10 +1072,17 @@ bool DomainDecomposition::setup(const ProcessAdm& adm, const SIMbase& sim)
#endif
#endif
if (ghostConnections.empty() && adm.getNoProcs() > 1 && myElms.empty())
if (!this->graphPartition(adm, sim))
return false;
sam = dynamic_cast<const SAMpatch*>(sim.getSAM());
int ok = 1;
#ifdef HAVE_MPI
int lok = ok;
#endif
if (myElms.empty()) {
// Establish global node numbers
if (!calcGlobalNodeNumbers(adm, sim))
ok = 0;
@ -949,7 +1095,6 @@ bool DomainDecomposition::setup(const ProcessAdm& adm, const SIMbase& sim)
}
#ifdef HAVE_MPI
int lok = ok;
MPI_Allreduce(&lok, &ok, 1, MPI_INT, MPI_SUM, *adm.getCommunicator());
#endif
@ -963,6 +1108,7 @@ bool DomainDecomposition::setup(const ProcessAdm& adm, const SIMbase& sim)
#ifdef HAVE_MPI
ok = 1;
#endif
}
// Establish local equation mappings for each block.
if (sim.getSolParams() && sim.getSolParams()->getNoBlocks() > 1) {
@ -1011,7 +1157,10 @@ bool DomainDecomposition::setup(const ProcessAdm& adm, const SIMbase& sim)
}
// Establish global equation numbers for all blocks.
if (!calcGlobalEqNumbers(adm, sim))
if (!myElms.empty()) {
if (!calcGlobalEqNumbersPart(adm, sim))
return false;
} else if (!calcGlobalEqNumbers(adm, sim))
#ifdef HAVE_MPI
ok = 0;
#else
@ -1072,3 +1221,91 @@ int DomainDecomposition::getPatchOwner(size_t p) const
return it->second;
}
bool DomainDecomposition::graphPartition(const ProcessAdm& adm, const SIMbase& sim)
{
PROFILE("Mesh partition");
myElms.clear();
#ifdef HAS_ZOLTAN
static bool inited = false;
if (!inited)
{
float ver;
Zoltan_Initialize(0, nullptr, &ver);
inited = true;
}
struct Zoltan_Struct* zz = Zoltan_Create(*adm.getCommunicator());
IntMat neigh;
if (adm.getProcId() == 0) {
neigh = sim.getElmConnectivities();
Zoltan_Set_Num_Obj_Fn(zz, getNumElements, &neigh);
Zoltan_Set_Obj_List_Fn(zz, getElementList, &neigh);
Zoltan_Set_Num_Edges_Multi_Fn(zz, getNumEdges, &neigh);
Zoltan_Set_Edge_List_Multi_Fn(zz, getEdges, &neigh);
} else {
Zoltan_Set_Num_Obj_Fn(zz, getNullElements, nullptr);
Zoltan_Set_Obj_List_Fn(zz, getNullList, nullptr);
Zoltan_Set_Num_Edges_Multi_Fn(zz, getNumNullEdges, nullptr);
Zoltan_Set_Edge_List_Multi_Fn(zz, getNullEdges, nullptr);
}
Zoltan_Set_Param(zz, "DEBUG_LEVEL", "0");
Zoltan_Set_Param(zz, "LB_METHOD", "GRAPH");
Zoltan_Set_Param(zz, "GRAPH_PACKAGE", "Scotch");
Zoltan_Set_Param(zz, "LB_APPROACH", "PARTITION");
Zoltan_Set_Param(zz, "NUM_GID_ENTRIES", "1");
Zoltan_Set_Param(zz, "NUM_LID_ENTRIES", "1");
Zoltan_Set_Param(zz, "RETURN_LISTS", "ALL");
Zoltan_Set_Param(zz, "CHECK_GRAPH", "2");
Zoltan_Set_Param(zz,"EDGE_WEIGHT_DIM","0");
Zoltan_Set_Param(zz, "OBJ_WEIGHT_DIM", "0");
Zoltan_Set_Param(zz, "PHG_EDGE_SIZE_THRESHOLD", ".35"); /* 0-remove all, 1-remove none */
int changes, numGidEntries, numLidEntries, numImport, numExport;
ZOLTAN_ID_PTR importGlobalGids, importLocalGids, exportGlobalGids, exportLocalGids;
int* importProcs, *importToPart, *exportProcs, *exportToPart;
Zoltan_LB_Partition(zz, /* input (all remaining fields are output) */
&changes, /* 1 if partitioning was changed, 0 otherwise */
&numGidEntries, /* Number of integers used for a global ID */
&numLidEntries, /* Number of integers used for a local ID */
&numImport, /* Number of vertices to be sent to me */
&importGlobalGids, /* Global IDs of vertices to be sent to me */
&importLocalGids, /* Local IDs of vertices to be sent to me */
&importProcs, /* Process rank for source of each incoming vertex */
&importToPart, /* New partition for each incoming vertex */
&numExport, /* Number of vertices I must send to other processes*/
&exportGlobalGids, /* Global IDs of the vertices I must send */
&exportLocalGids, /* Local IDs of the vertices I must send */
&exportProcs, /* Process to which I send each of the vertices */
&exportToPart); /* Partition to which each vertex will belong */
if (sim.getProcessAdm().getProcId() == 0) {
std::vector<std::vector<int>> toExp(adm.getNoProcs());
std::vector<bool> offProc(sim.getNoElms(), false);
for (int i = 0; i < numExport; ++i) {
int gid = exportGlobalGids[i];
offProc[gid] = true;
}
myElms.reserve(numExport);
for (size_t i = 0; i < sim.getNoElms(); ++i)
if (!offProc[i])
myElms.push_back(i);
}
else {
myElms.resize(numImport);
std::copy(importGlobalGids, importGlobalGids+numImport, myElms.begin());
}
Zoltan_LB_Free_Part(&importGlobalGids, &importLocalGids, &importProcs, &importToPart);
Zoltan_LB_Free_Part(&exportGlobalGids, &exportLocalGids, &exportProcs, &exportToPart);
Zoltan_Destroy(&zz);
#else
std::cerr << "*** DomainDecompositon::graphPartition: Compiled without Zoltan support. No partitioning available." << std::endl;
return false;
#endif
if (!myElms.empty())
IFEM::cout << "Graph partitioning: " << myElms.size() << " elements in partition." << std::endl;
partitioned = !myElms.empty();
return partitioned;
}

View File

@ -174,6 +174,12 @@ public:
//! \brief Returns number of matrix blocks.
size_t getNoBlocks() const { return blocks.size()-1; }
//! \brief Returns whether a graph based partition is used.
bool isPartitioned() const { return partitioned; }
//! \brief Returns elements in partition.
const std::vector<int>& getElms() const { return myElms; }
private:
//! \brief Calculates a 1D partitioning with a given overlap.
//! \param[in] nel1 Number of knot-spans in first parameter direction.
@ -237,11 +243,17 @@ private:
//! \brief Calculate the global equation numbers for given finite element model.
bool calcGlobalEqNumbers(const ProcessAdm& adm, const SIMbase& sim);
//! \brief Calculate the global equation numbers for a partitioned model.
bool calcGlobalEqNumbersPart(const ProcessAdm& adm, const SIMbase& sim);
//! \brief Sanity check model.
//! \details Collects the corners of all patches in the model and make sure nodes
//! with matching coordinates have the same global node ID.
bool sanityCheckCorners(const SIMbase& sim);
//! \brief Setup domain decomposition based on graph partitioning.
bool graphPartition(const ProcessAdm& adm, const SIMbase& sim);
std::map<int,int> patchOwner; //!< Process that owns a particular patch
//! \brief Struct with information per matrix block.
@ -258,12 +270,14 @@ private:
std::vector<int> MLGN; //!< Process-local-to-global node numbers
std::vector<BlockInfo> blocks; //!< Equation mappings for all matrix blocks.
std::vector<int> myElms; //!< Elements in partition
int minDof = 0; //!< First DOF we own
int maxDof = 0; //!< Last DOF we own
int minNode = 0; //!< First node we own
int maxNode = 0; //!< Last node we own
const SAMpatch* sam = nullptr; //!< The assembly handler the DD is constructed for.
bool partitioned = false; //!< \e true if graph based decomposition
};
#endif

View File

@ -1574,6 +1574,12 @@ bool ASMu2D::integrate (Integrand& integrand, int lIndex,
int iel = 0;
for (const LR::Element* el : lrspline->getAllElements())
{
if (!myElms.empty() && !glInt.threadSafe() &&
std::find(myElms.begin(), myElms.end(), iel) == myElms.end()) {
++iel;
continue;
}
fe.iel = MLGE[iel++];
#ifdef SP_DEBUG
if (dbgElm < 0 && iel != -dbgElm)
@ -2705,3 +2711,14 @@ void ASMu2D::getBoundaryElms (int lIndex, int orient, IntVec& elms) const
for (const LR::Element* elem : elements)
elms.push_back(MLGE[elem->getId()]-1);
}
void ASMu2D::generateThreadGroupsFromElms(const std::vector<int>& elms)
{
myElms.clear();
for (int elm : elms)
if (this->getElmIndex(elm+1) > 0)
myElms.push_back(this->getElmIndex(elm+1)-1);
threadGroups = threadGroups.filter(myElms);
}

View File

@ -591,6 +591,9 @@ protected:
void generateThreadGroups(const Integrand& integrand, bool silence,
bool ignoreGlobalLM);
//! \brief Generate element groups from a partition.
virtual void generateThreadGroupsFromElms(const std::vector<int>& elms);
//! \brief Remap element wise errors to basis functions.
//! \param errors The remapped errors
//! \param[in] origErr The element wise errors on the geometry mesh

View File

@ -301,16 +301,21 @@ bool ASMu2Dmx::integrate (Integrand& integrand,
if (!xg || !wg) return false;
bool use2ndDer = integrand.getIntegrandType() & Integrand::SECOND_DERIVATIVES;
ThreadGroups oneGroup;
if (glInt.threadSafe()) oneGroup.oneGroup(nel);
const IntMat& groups = glInt.threadSafe() ? oneGroup[0] : threadGroups[0];
// === Assembly loop over all elements in the patch ==========================
bool ok = true;
for (size_t t = 0; t < threadGroups[0].size() && ok; ++t)
for (size_t t = 0; t < groups.size() && ok; ++t)
{
//#pragma omp parallel for schedule(static)
for (size_t e = 0; e < threadGroups[0][t].size(); ++e)
for (size_t e = 0; e < groups[t].size(); ++e)
{
if (!ok)
continue;
int iel = threadGroups[0][t][e] + 1;
int iel = groups[t][e] + 1;
auto el1 = threadBasis->elementBegin()+iel-1;
double uh = ((*el1)->umin()+(*el1)->umax())/2.0;
double vh = ((*el1)->vmin()+(*el1)->vmax())/2.0;
@ -524,6 +529,12 @@ bool ASMu2Dmx::integrate (Integrand& integrand, int lIndex,
}
if (skipMe) continue;
if (!myElms.empty() && !glInt.threadSafe() &&
std::find(myElms.begin(), myElms.end(), iel-1) == myElms.end()) {
++iel;
continue;
}
double uh = ((*el1)->umin()+(*el1)->umax())/2.0;
double vh = ((*el1)->vmin()+(*el1)->vmax())/2.0;
std::vector<size_t> els;
@ -644,6 +655,12 @@ bool ASMu2Dmx::integrate (Integrand& integrand,
for (int iel = 1; el1 != m_basis[0]->elementEnd(); ++el1, ++iel) {
short int status = iChk.hasContribution(iel);
if (!status) continue; // no interface contributions for this element
if (!myElms.empty() && !glInt.threadSafe() &&
std::find(myElms.begin(), myElms.end(), iel-1) == myElms.end()) {
++iel, ++el1;
continue;
}
// first push the hosting elements
std::vector<size_t> els(1,iel);
std::vector<size_t> elem_sizes(1,(*el1)->nBasisFunctions());

View File

@ -1209,6 +1209,9 @@ bool ASMu3D::integrate (Integrand& integrand, int lIndex,
if (dbgElm < 0 && iEl+1 != -dbgElm)
continue; // Skipping all elements, except for -dbgElm
#endif
if (!myElms.empty() && !glInt.threadSafe() &&
std::find(myElms.begin(), myElms.end(), iEl) == myElms.end())
continue;
FiniteElement fe;
fe.iel = MLGE[iEl];
@ -2324,3 +2327,14 @@ void ASMu3D::getBoundaryElms (int lIndex, int orient, IntVec& elms) const
for (const LR::Element* elem : elements)
elms.push_back(MLGE[elem->getId()]-1);
}
void ASMu3D::generateThreadGroupsFromElms(const std::vector<int>& elms)
{
myElms.clear();
for (int elm : elms)
if (this->getElmIndex(elm+1) > 0)
myElms.push_back(this->getElmIndex(elm+1)-1);
threadGroups = threadGroups.filter(myElms);
}

View File

@ -553,6 +553,9 @@ protected:
void generateThreadGroups(const Integrand& integrand, bool silence,
bool ignoreGlobalLM);
//! \brief Generate element groups from a partition.
virtual void generateThreadGroupsFromElms(const std::vector<int>& elms);
//! \brief Remap element wise errors to basis functions.
//! \param errors The remapped errors
//! \param[in] origErr The element wise errors on the geometry mesh
@ -587,6 +590,7 @@ protected:
Matrices myBezierExtract; //!< Bezier extraction matrices
ThreadGroups threadGroups; //!< Element groups for multi-threaded assembly
IntVec myElms; //!< Elements on patch - used with partitioning
mutable double vMin; //!< Minimum element volume for adaptive refinement
};

View File

@ -360,11 +360,20 @@ bool ASMu3Dmx::integrate (Integrand& integrand,
else if (nRed < 0)
nRed = nGauss; // The integrand needs to know nGauss
ThreadGroups oneGroup;
if (glInt.threadSafe()) oneGroup.oneGroup(nel);
const IntMat& group = glInt.threadSafe() ? oneGroup[0] : threadGroups[0];
// === Assembly loop over all elements in the patch ==========================
bool ok=true;
for(LR::Element *el : this->getBasis(geoBasis)->getAllElements()) {
bool ok = true;
for (size_t t = 0; t < group.size() && ok; t++)
//#pragma omp parallel for schedule(static)
for (size_t e = 0; e < group[t].size(); e++)
{
int iel = group[t][e] + 1;
const LR::Element* el = lrspline->getElement(iel-1);
double uh = (el->umin()+el->umax())/2.0;
double vh = (el->vmin()+el->vmax())/2.0;
double wh = (el->wmin()+el->wmax())/2.0;

View File

@ -54,6 +54,9 @@ Real SAMpatchPETSc::dot (const Vector& x, const Vector& y, char dofType) const
{
#ifdef HAVE_MPI
if (adm.isParallel()) {
if (adm.dd.isPartitioned())
return this->SAMpatch::dot(x,y,dofType);
DofIS& dis = this->getIS(dofType);
Vec lx, ly;
@ -94,7 +97,7 @@ Real SAMpatchPETSc::dot (const Vector& x, const Vector& y, char dofType) const
Real SAMpatchPETSc::normL2 (const Vector& x, char dofType) const
{
#ifdef HAVE_MPI
if (adm.isParallel()) {
if (adm.isParallel() && !adm.dd.isPartitioned()) {
DofIS& dis = this->getIS(dofType);
Vec lx;
@ -130,7 +133,7 @@ Real SAMpatchPETSc::normInf (const Vector& x, size_t& comp, char dofType) const
{
Real max = this->SAM::normInf(x,comp,dofType);
#ifdef HAVE_MPI
if (adm.isParallel())
if (adm.isParallel() && !adm.dd.isPartitioned())
max = adm.allReduce(max, MPI_MAX);
// TODO: comp (node index of the max value) is not necessarily correct here
#endif
@ -187,23 +190,33 @@ bool SAMpatchPETSc::expandSolution(const SystemVector& solVec,
#ifdef HAVE_MPI
if (adm.isParallel()) {
if (!glob2LocEq) {
if (!glob2LocEq && !adm.dd.isPartitioned()) {
IntVec mlgeq(adm.dd.getMLGEQ());
for (int& ieq : mlgeq) --ieq;
ISCreateGeneral(*adm.getCommunicator(),adm.dd.getMLGEQ().size(),
mlgeq.data(), PETSC_COPY_VALUES, &glob2LocEq);
}
Vec solution;
VecCreateSeq(PETSC_COMM_SELF, Bptr->dim(), &solution);
VecScatter ctx;
Vec solution;
if (adm.dd.isPartitioned())
VecScatterCreateToAll(Bptr->getVector(), &ctx, &solution);
else {
VecCreateSeq(PETSC_COMM_SELF, Bptr->dim(), &solution);
VecScatterCreate(Bptr->getVector(), glob2LocEq, solution, nullptr, &ctx);
}
VecScatterBegin(ctx, Bptr->getVector(), solution, INSERT_VALUES, SCATTER_FORWARD);
VecScatterEnd(ctx, Bptr->getVector(),solution,INSERT_VALUES,SCATTER_FORWARD);
VecScatterDestroy(&ctx);
PetscScalar* data;
VecGetArray(solution, &data);
if (adm.dd.isPartitioned()) {
for (const auto& it : adm.dd.getG2LEQ(0))
(*Bptr)(it.second) = data[it.first-1];
} else
std::copy(data, data + Bptr->dim(), Bptr->getPtr());
VecDestroy(&solution);
} else
#endif

View File

@ -93,7 +93,7 @@ int IFEM::Init (int arg_c, char** arg_v, const char* title)
#endif
std::cout <<"\n MPI support: ";
#if HAVE_MPI
#ifdef HAVE_MPI
std::cout <<"enabled";
#else
std::cout <<"disabled";

View File

@ -91,8 +91,8 @@ void PETScVector::redim(size_t n)
bool PETScVector::beginAssembly()
{
for (size_t i = 0; i < size(); ++i)
VecSetValue(x , adm.dd.getGlobalEq(i+1)-1, (*this)[i], ADD_VALUES);
for (size_t i = 0; i < this->size(); ++i)
VecSetValue(x, adm.dd.getGlobalEq(i+1)-1, (*this)[i], ADD_VALUES);
VecAssemblyBegin(x);
return true;
@ -180,31 +180,32 @@ PETScMatrix::~PETScMatrix ()
void PETScMatrix::initAssembly (const SAM& sam, bool delayLocking)
{
if (!adm.dd.isPartitioned()) {
SparseMatrix::initAssembly(sam, delayLocking);
SparseMatrix::preAssemble(sam, delayLocking);
} else
this->resize(sam.neq,sam.neq);
const SAMpatchPETSc* samp = dynamic_cast<const SAMpatchPETSc*>(&sam);
if (!samp)
return;
// Get number of local equations in linear system
const PetscInt neq = adm.dd.getMaxEq()- adm.dd.getMinEq() + 1;
PetscInt neq;
neq = adm.dd.getMaxEq() - adm.dd.getMinEq() + 1;
// Set correct number of rows and columns for matrix.
MatSetSizes(pA,neq,neq,PETSC_DECIDE,PETSC_DECIDE);
MatSetSizes(pA,neq,neq,PETSC_DETERMINE,PETSC_DETERMINE);
// Allocate sparsity pattern
std::vector<std::set<int>> dofc;
if (!adm.dd.isPartitioned())
sam.getDofCouplings(dofc);
if (matvec.empty()) {
// Set correct number of rows and columns for matrix.
MatSetSizes(pA,neq,neq,PETSC_DECIDE,PETSC_DECIDE);
MatSetFromOptions(pA);
// Allocate sparsity pattern
if (adm.isParallel()) {
if (adm.isParallel() && !adm.dd.isPartitioned()) {
int ifirst = adm.dd.getMinEq();
int ilast = adm.dd.getMaxEq();
PetscIntVec d_nnz(neq, 0);
@ -235,6 +236,34 @@ void PETScMatrix::initAssembly (const SAM& sam, bool delayLocking)
MatMPIAIJSetPreallocation(pA,PETSC_DEFAULT,d_nnz.data(),
PETSC_DEFAULT,o_nnz.data());
} else if (adm.dd.isPartitioned()) {
// Setup sparsity pattern for global matrix
SparseMatrix* lA = new SparseMatrix(neq, sam.getNoEquations());
for (int elm : adm.dd.getElms()) {
IntVec meen;
sam.getElmEqns(meen,elm+1);
for (int i : meen)
if (i > 0 && i >= adm.dd.getMinEq(0) && i <= adm.dd.getMaxEq(0))
for (int j : meen)
if (j > 0)
(*lA)(i-adm.dd.getMinEq(0)+1,adm.dd.getGlobalEq(j)) = 0.0;
}
IntVec iA, jA;
SparseMatrix::calcCSR(iA, jA, neq, lA->getValues());
delete lA;
MatMPIAIJSetPreallocationCSR(pA, iA.data(), jA.data(), nullptr);
// Setup sparsity pattern for local matrix
for (int elm : adm.dd.getElms()) {
IntVec meen;
sam.getElmEqns(meen,elm+1);
for (int i : meen)
if (i > 0)
for (int j : meen)
if (j > 0)
(*this)(i,j) = 0.0;
}
this->optimiseSLU();
} else {
PetscIntVec Nnz;
for (const auto& it : dofc)

View File

@ -107,6 +107,7 @@ void SIMbase::clearProperties ()
myProps.clear();
myInts.clear();
mixedMADOFs.clear();
adm.dd.setElms({},"");
}
@ -173,13 +174,6 @@ bool SIMbase::preprocess (const IntVec& ignored, bool fixDup)
return false;
}
if (nProc > 1 && myPatches.empty() && adm.isParallel())
{
std::cerr <<" *** SIMbase::preprocess: No partitioning information for "
<< nProc <<" processors found."<< std::endl;
return false;
}
PROFILE1("Model preprocessing");
static int substep = 10;
@ -367,12 +361,23 @@ bool SIMbase::preprocess (const IntVec& ignored, bool fixDup)
if (!static_cast<SAMpatch*>(mySam)->init(myModel,ngnod,dofTypes))
return false;
if (nProc > 1 && myPatches.empty() && adm.isParallel())
{
IFEM::cout <<" *** SIMbase::preprocess: No partitioning information for "
<< nProc <<" processors found. Using graph partitioning\n";
}
if (!adm.dd.setup(adm,*this))
{
std::cerr <<"\n *** SIMbase::preprocess(): Failed to establish "
<<" domain decomposition data."<< std::endl;
return false;
}
else if (adm.dd.isPartitioned()) // reuse thread groups for element list
{
for (ASMbase* pch : this->getFEModel())
pch->generateThreadGroupsFromElms(adm.dd.getElms());
}
if (!myProblem)
{
@ -1488,6 +1493,7 @@ bool SIMbase::solutionNorms (const TimeDomain& time,
delete norm;
if (!adm.dd.isPartitioned())
for (k = 0; k < gNorm.size(); k++)
adm.allReduceAsSum(gNorm[k]);

View File

@ -41,7 +41,9 @@ public:
virtual unsigned short int getNoParamDim() const { return 0; }
//! \brief Creates the computational FEM model from the spline patches.
virtual bool createFEMmodel(char) { return false; }
//! \brief Element-element connectivities.
virtual std::vector<std::vector<int>> getElmConnectivities() const
{ return std::vector<std::vector<int>>(); }
protected:
//! \brief Preprocesses a user-defined Dirichlet boundary property.
virtual bool addConstraint(int,int,int,int,int,int&,char)

View File

@ -286,6 +286,13 @@ bool SIMoutput::writeGlvG (int& nBlock, const char* inpFile, bool doClear)
// Open a new VTF-file
char* vtfName = new char[strlen(inpFile)+10];
strtok(strcpy(vtfName,inpFile),".");
if (adm.dd.isPartitioned()) {
if (adm.getProcId() == 0) {
strcat(vtfName,ext);
IFEM::cout <<"\nWriting VTF-file "<< vtfName << std::endl;
myVtf = new VTF(vtfName,opt.format);
}
} else {
if (nProc > 1)
sprintf(vtfName+strlen(vtfName),"_p%04d%s",myPid,ext);
else
@ -293,11 +300,15 @@ bool SIMoutput::writeGlvG (int& nBlock, const char* inpFile, bool doClear)
IFEM::cout <<"\nWriting VTF-file "<< vtfName << std::endl;
myVtf = new VTF(vtfName,opt.format);
}
delete[] vtfName;
}
else if (doClear)
myVtf->clearGeometryBlocks();
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
ElementBlock* lvb;
char pname[32];
@ -337,6 +348,9 @@ bool SIMoutput::writeGlvG (int& nBlock, const char* inpFile, bool doClear)
bool SIMoutput::writeGlvBC (int& nBlock, int iStep) const
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
if (!myVtf) return false;
Matrix field;
@ -403,6 +417,9 @@ bool SIMoutput::writeGlvBC (int& nBlock, int iStep) const
bool SIMoutput::writeGlvT (int iStep, int& geoBlk, int& nBlock) const
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
if (myProblem->hasTractionValues())
{
if (msgLevel > 1)
@ -417,6 +434,9 @@ bool SIMoutput::writeGlvT (int iStep, int& geoBlk, int& nBlock) const
bool SIMoutput::writeGlvV (const Vector& vec, const char* fieldName,
int iStep, int& nBlock, int idBlock, int ncmp) const
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
if (vec.empty())
return true;
else if (!myVtf)
@ -459,6 +479,9 @@ bool SIMoutput::writeGlvV (const Vector& vec, const char* fieldName,
bool SIMoutput::writeGlvS (const Vector& scl, const char* fieldName,
int iStep, int& nBlock, int idBlock) const
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
if (scl.empty())
return true;
else if (!myVtf)
@ -496,6 +519,9 @@ bool SIMoutput::writeGlvS (const Vector& psol, int iStep, int& nBlock,
double time, const char* pvecName,
int idBlock, int psolComps)
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
idBlock = this->writeGlvS1(psol,iStep,nBlock,time,
pvecName,idBlock,psolComps);
if (idBlock < 0)
@ -526,6 +552,9 @@ int SIMoutput::writeGlvS1 (const Vector& psol, int iStep, int& nBlock,
double time, const char* pvecName,
int idBlock, int psolComps, bool scalarOnly)
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
if (psol.empty())
return 0;
else if (!myVtf)
@ -680,6 +709,9 @@ int SIMoutput::writeGlvS1 (const Vector& psol, int iStep, int& nBlock,
bool SIMoutput::writeGlvS2 (const Vector& psol, int iStep, int& nBlock,
double time, int idBlock, int psolComps)
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
if (psol.empty())
return true; // No primary solution
else if (!myVtf || !myProblem)
@ -860,6 +892,9 @@ bool SIMoutput::writeGlvP (const Vector& ssol, int iStep, int& nBlock,
int idBlock, const char* prefix,
std::vector<PointValue>* maxVal)
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
if (ssol.empty())
return true;
else if (!myVtf)
@ -984,6 +1019,9 @@ bool SIMoutput::evalProjSolution (const Vector& ssol,
bool SIMoutput::writeGlvF (const RealFunc& f, const char* fname,
int iStep, int& nBlock, int idBlock, double time)
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
if (!myVtf) return false;
IntVec sID;
@ -1009,6 +1047,9 @@ bool SIMoutput::writeGlvF (const RealFunc& f, const char* fname,
bool SIMoutput::writeGlvStep (int iStep, double value, int itype)
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
myVtf->writeGeometryBlocks(iStep);
if (itype == 0)
@ -1020,6 +1061,9 @@ bool SIMoutput::writeGlvStep (int iStep, double value, int itype)
bool SIMoutput::writeGlvM (const Mode& mode, bool freq, int& nBlock)
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
if (mode.eigVec.empty())
return true;
else if (!myVtf)
@ -1067,6 +1111,9 @@ bool SIMoutput::writeGlvM (const Mode& mode, bool freq, int& nBlock)
bool SIMoutput::writeGlvN (const Matrix& norms, int iStep, int& nBlock,
const std::vector<std::string>& prefix, int idBlock)
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
if (norms.empty())
return true;
else if (!myVtf)
@ -1139,6 +1186,9 @@ bool SIMoutput::writeGlvN (const Matrix& norms, int iStep, int& nBlock,
bool SIMoutput::writeGlvE (const Vector& vec, int iStep, int& nBlock,
const char* name)
{
if (adm.dd.isPartitioned() && adm.getProcId() != 0)
return true;
if (!myVtf)
return false;

View File

@ -309,6 +309,8 @@ void HDF5Writer::writeSIM (int level, const DataEntry& entry,
size_t projOfs = 0;
for (int i = 0; i < sim->getNoPatches(); ++i) {
int loc = sim->getLocalPatchIndex(i+1);
if (sim->getProcessAdm().dd.isPartitioned() && sim->getProcessAdm().getProcId() != 0)
loc = 0;
if (loc > 0 && (!(results & DataExporter::REDUNDANT) ||
sim->getGlobalProcessID() == 0)) // we own the patch
{