Changed: Moved generation of thread groups to SIMbase::preprocess. For volume faces, only generate thread groups for those subjected to boundary integrals (i.e., the Neumann boundaries).

git-svn-id: http://svn.sintef.no/trondheim/IFEM/trunk@1431 e10b68d5-8a6e-419e-a041-bce267b0401d
This commit is contained in:
kmo 2012-01-24 12:33:36 +00:00 committed by Knut Morten Okstad
parent 3b65885640
commit 8a33cad300
16 changed files with 376 additions and 250 deletions

View File

@ -252,6 +252,11 @@ public:
//! \brief Computes the number of boundary integration points in this patch.
virtual void getNoBouPoints(size_t& nPt, char ldim, char lindx) = 0;
//! \brief Generates element groups for multi-threading of interior integrals.
virtual void generateThreadGroups() {}
//! \brief Generates element groups for multi-threading of boundary integrals.
virtual void generateThreadGroups(char) {}
// Methods for integration of finite element quantities.
// These are the main computational methods of the ASM class hierarchy.

View File

@ -1008,9 +1008,6 @@ bool ASMs2D::integrate (Integrand& integrand,
const int n1 = surf->numCoefs_u();
const int nel1 = n1 - p1 + 1;
if (threadGroups.empty())
generateThreadGroups();
// === Assembly loop over all elements in the patch ==========================
@ -1046,10 +1043,17 @@ bool ASMs2D::integrate (Integrand& integrand,
break;
}
// Compute characteristic element length, if needed
if (integrand.getIntegrandType() == 2)
{
// Compute characteristic element length
fe.h = getElmSize(p1,p2,Xnod);
// Element size in parametric space
int inod = MNPC[iel-1].back();
dXidu[0] = surf->knotSpan(0,nodeInd[inod].I);
dXidu[1] = surf->knotSpan(1,nodeInd[inod].J);
}
else if (integrand.getIntegrandType() == 3)
{
// --- Compute average value of basis functions over the element -----
@ -1088,11 +1092,6 @@ bool ASMs2D::integrate (Integrand& integrand,
X[i] = X0[i];
}
// TODO: Do this conditionally (only when the CFD solver requires it)
int inod = MNPC[iel-1].back();
dXidu[0] = surf->knotSpan(0,nodeInd[inod].I);
dXidu[1] = surf->knotSpan(1,nodeInd[inod].J);
// Initialize element quantities
LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel);
if (!integrand.initElement(MNPC[iel-1],X,nRed*nRed,*A))
@ -1167,14 +1166,15 @@ bool ASMs2D::integrate (Integrand& integrand,
// Compute Hessian of coordinate mapping and 2nd order derivatives
if (integrand.getIntegrandType() == 2)
{
if (!utl::Hessian(Hess,fe.d2NdX2,Jac,Xnod,d2Ndu2,dNdu))
{
ok = false;
break;
}
// RUNAR: Do this conditionally (when the CFD solver requires it)
utl::getGmat(Jac,dXidu,fe.G);
utl::getGmat(Jac,dXidu,fe.G);
}
#if SP_DEBUG > 4
std::cout <<"\niel, ip = "<< iel <<" "<< ip
@ -1716,23 +1716,26 @@ bool ASMs2D::evalSolution (Matrix& sField, const Integrand& integrand,
}
void ASMs2D::generateThreadGroups()
void ASMs2D::generateThreadGroups ()
{
const int p1 = surf->order_u();
const int p2 = surf->order_v();
const int n1 = surf->numCoefs_u();
const int n2 = surf->numCoefs_v();
const int nel1 = n1 - p1 + 1;
const int nel2 = n2 - p2 + 1;
utl::calcThreadGroups(nel1,nel2,threadGroups);
if (threadGroups.size() < 2) return;
if (threadGroups.size() > 1)
for (size_t i = 0; i < threadGroups.size(); i++) {
std::cout <<" Thread group "<< i+1;
for (size_t j = 0; j < threadGroups[i].size(); j++)
std::cout <<"\n\t thread "<< j+1
<< ": "<< threadGroups[i][j].size() <<" elements";
std::cout << std::endl;
}
std::cout <<"\nMultiple threads are utilized during element assembly.";
for (size_t i = 0; i < threadGroups.size(); i++)
{
std::cout <<"\n Thread group "<< i+1;
for (size_t j = 0; j < threadGroups[i].size(); j++)
std::cout <<"\n\tthread "<< j+1
<< ": "<< threadGroups[i][j].size() <<" elements";
}
std::cout << std::endl;
}

View File

@ -101,7 +101,7 @@ public:
//! This is used to reinitialize the patch after it has been refined.
virtual void clear(bool retainGeometry = false);
//! \brief Returns a matrix with nodal coordinates for an element.
//! \brief Returns a matrix with nodal coordinates for an element.
//! \param[in] iel Element index
//! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
//! in one element
@ -346,6 +346,9 @@ protected:
//! \param[in] basis Which basis to return size parameters for (mixed methods)
virtual bool getSize(int& n1, int& n2, int basis = 0) const;
//! \brief Generates element groups for multi-threading of interior integrals.
virtual void generateThreadGroups();
//! \brief Establishes matrices with basis functions and 1st derivatives.
static void extractBasis(const Go::BasisDerivsSf& spline,
Vector& N, Matrix& dNdu);
@ -377,11 +380,8 @@ protected:
const IndexVec& nodeInd; //!< IJ-pairs for the control points (nodes)
IndexVec myNodeInd; //!< The actual IJ-pair container
//! Element groups for multithreaded assembly
//! Element groups for multi-threaded assembly
utl::ThreadGroups threadGroups;
//! \brief Generate thread groups
virtual void generateThreadGroups();
};
#endif

View File

@ -187,6 +187,27 @@ bool ASMs2DLag::getSize (int& n1, int& n2, int) const
}
size_t ASMs2DLag::getNoBoundaryElms (char lIndex, char ldim) const
{
if (!surf) return 0;
if (ldim < 1 && lIndex > 0)
return 1;
switch (lIndex)
{
case 1:
case 2:
return (ny-1)/(surf->order_v()-1);
case 3:
case 4:
return (nx-1)/(surf->order_u()-1);
}
return 0;
}
bool ASMs2DLag::integrate (Integrand& integrand,
GlobalIntegral& glInt,
const TimeDomain& time)
@ -218,9 +239,6 @@ bool ASMs2DLag::integrate (Integrand& integrand,
const int p1 = surf->order_u();
const int p2 = surf->order_v();
if (threadGroups.empty())
generateThreadGroups();
// === Assembly loop over all elements in the patch ==========================
@ -631,18 +649,13 @@ bool ASMs2DLag::evalSolution (Matrix& sField, const Integrand& integrand,
}
void ASMs2DLag::generateThreadGroups()
void ASMs2DLag::generateThreadGroups ()
{
const int p1 = surf->order_u();
const int p2 = surf->order_v();
// Evaluate the parametric values
RealArray gpar[2];
getGridParameters(gpar[0],0,p1-1);
getGridParameters(gpar[1],1,p2-1);
const int nel1 = (gpar[0].size()-1)/(p1-1);
const int nel2 = (gpar[1].size()-1)/(p2-1);
const int nel1 = (nx-1)/(p1-1);
const int nel2 = (ny-1)/(p2-1);
utl::calcThreadGroups(nel1,nel2,threadGroups);
}

View File

@ -48,9 +48,9 @@ public:
virtual void clear(bool retainGeometry = false);
//! \brief Returns a matrix with nodal coordinates for an element.
//! \param[in] iel Element index
//! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
//! in one element
//! \param[in] iel Element index
virtual bool getElementCoordinates(Matrix& X, int iel) const;
//! \brief Returns a matrix with all nodal coordinates within the patch.
@ -137,15 +137,17 @@ public:
virtual bool evalSolution(Matrix& sField, const Integrand& integrand,
const RealArray* gpar, bool regular = true) const;
protected:
//! \brief Generate thread groups
void generateThreadGroups();
public:
//! \brief Returns the number of nodal points in each parameter direction.
//! \param[out] n1 Number of nodes in first (u) direction
//! \param[out] n2 Number of nodes in second (v) direction
virtual bool getSize(int& n1, int& n2, int = 0) const;
//! \brief Generates element groups for multi-threading of interior integrals.
virtual void generateThreadGroups();
//! \brief Returns the number of elements on a boundary.
virtual size_t getNoBoundaryElms(char lIndex, char ldim) const;
private:
size_t nx; //!< Number of nodes in first parameter direction
size_t ny; //!< Number of nodes in second parameter direction

View File

@ -96,9 +96,6 @@ bool ASMs2DSpec::integrate (Integrand& integrand,
if (!Legendre::basisDerivatives(p1,D1)) return false;
if (!Legendre::basisDerivatives(p2,D2)) return false;
if (threadGroups.empty())
generateThreadGroups();
// === Assembly loop over all elements in the patch ==========================
@ -113,7 +110,8 @@ bool ASMs2DSpec::integrate (Integrand& integrand,
int iel = threadGroups[g][t][e]+1;
// Set up control point coordinates for current element
if (!this->getElementCoordinates(Xnod,iel)) {
if (!this->getElementCoordinates(Xnod,iel))
{
ok = false;
break;
}
@ -121,11 +119,13 @@ bool ASMs2DSpec::integrate (Integrand& integrand,
// Initialize element quantities
fe.iel = MLGE[iel-1];
LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel);
if (!integrand.initElement(MNPC[iel-1],*A)) {
if (!integrand.initElement(MNPC[iel-1],*A))
{
ok = false;
break;
}
// --- Integration loop over all Gauss points in each direction --------
int count = 1;
@ -147,14 +147,16 @@ bool ASMs2DSpec::integrate (Integrand& integrand,
// Evaluate the integrand and accumulate element contributions
fe.detJxW *= wg1(i)*wg2(j);
if (!integrand.evalInt(*A,fe,time,X)) {
if (!integrand.evalInt(*A,fe,time,X))
{
ok = false;
break;
}
}
// Assembly of global system integral
if (!glInt.assemble(A->ref(),fe.iel)) {
if (!glInt.assemble(A->ref(),fe.iel))
{
ok = false;
break;
}
@ -235,6 +237,7 @@ bool ASMs2DSpec::integrate (Integrand& integrand, int lIndex,
LocalIntegral* A = integrand.getLocalIntegral(nen,fe.iel,true);
if (!integrand.initElementBou(MNPC[iel-1],*A)) return false;
// --- Integration loop over all Gauss points along the edge -------------
for (int i = 0; i < p[t2-1]; i++)

View File

@ -506,9 +506,6 @@ bool ASMs2Dmx::integrate (Integrand& integrand,
const int n1 = surf->numCoefs_u();
const int nel1 = n1 - p1 + 1;
if (threadGroups.empty())
generateThreadGroups();
// === Assembly loop over all elements in the patch ==========================

View File

@ -231,9 +231,6 @@ bool ASMs2DmxLag::integrate (Integrand& integrand,
const int q1 = p1 - 1;
const int q2 = p2 - 1;
if (threadGroups.empty())
generateThreadGroups();
// === Assembly loop over all elements in the patch ==========================

View File

@ -1025,6 +1025,8 @@ bool ASMs3D::getSize (int& n1, int& n2, int& n3, int) const
size_t ASMs3D::getNoBoundaryElms (char lIndex, char ldim) const
{
if (!svol) return 0;
if (ldim < 1 && lIndex > 0)
return 1;
else if (ldim < 2 && lIndex > 0 && lIndex <= 12)
@ -1222,9 +1224,6 @@ bool ASMs3D::integrate (Integrand& integrand,
const int nel1 = n1 - p1 + 1;
const int nel2 = n2 - p2 + 1;
if (threadGroupsVol.empty())
generateThreadGroups();
// === Assembly loop over all elements in the patch ==========================
@ -1261,10 +1260,18 @@ bool ASMs3D::integrate (Integrand& integrand,
break;
}
// Compute characteristic element length, if needed
if (integrand.getIntegrandType() == 2)
{
// Compute characteristic element length
fe.h = getElmSize(p1,p2,p3,Xnod);
// Element size in parametric space
int inod = MNPC[iel-1].back();
dXidu[0] = svol->knotSpan(0,nodeInd[inod].I);
dXidu[1] = svol->knotSpan(1,nodeInd[inod].J);
dXidu[2] = svol->knotSpan(2,nodeInd[inod].K);
}
else if (integrand.getIntegrandType() == 3)
{
// --- Compute average value of basis functions over the element -----
@ -1306,13 +1313,7 @@ bool ASMs3D::integrate (Integrand& integrand,
X.z = X0[2];
}
// TODO: Do this conditionally (only when the CFD solver requires it)
int inod = MNPC[iel-1].back();
dXidu[0] = svol->knotSpan(0,nodeInd[inod].I);
dXidu[1] = svol->knotSpan(1,nodeInd[inod].J);
dXidu[2] = svol->knotSpan(2,nodeInd[inod].K);
// Initialize element quantities
// Initialize element quantities
LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel);
if (!integrand.initElement(MNPC[iel-1],X,nRed*nRed*nRed,*A))
{
@ -1391,14 +1392,15 @@ bool ASMs3D::integrate (Integrand& integrand,
// Compute Hessian of coordinate mapping and 2nd order derivatives
if (integrand.getIntegrandType() == 2)
{
if (!utl::Hessian(Hess,fe.d2NdX2,Jac,Xnod,d2Ndu2,dNdu))
{
ok = false;
break;
}
// RUNAR: Do this conditionally (when the CFD solver requires it)
utl::getGmat(Jac,dXidu,fe.G);
utl::getGmat(Jac,dXidu,fe.G);
}
#if SP_DEBUG > 4
std::cout <<"\niel, ip = "<< iel <<" "<< ip
@ -1418,16 +1420,16 @@ bool ASMs3D::integrate (Integrand& integrand,
}
}
// Finalize the element quantities
if (!integrand.finalizeElement(*A,time,firstIp+jp))
{
// Finalize the element quantities
if (!integrand.finalizeElement(*A,time,firstIp+jp))
{
ok = false;
break;
}
// Assembly of global system integral
if (!glInt.assemble(A->ref(),fe.iel))
{
{
ok = false;
break;
}
@ -1449,6 +1451,15 @@ bool ASMs3D::integrate (Integrand& integrand, int lIndex,
PROFILE2("ASMs3D::integrate(B)");
std::map<char,utl::ThreadGroups>::const_iterator tit;
if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end())
{
std::cerr <<" *** ASMs3D::integrate: No thread groups for face "<< lIndex
<< std::endl;
return false;
}
const utl::ThreadGroups& threadGrp = tit->second;
// Get Gaussian quadrature points and weights
const double* xg = GaussQuadrature::getCoord(nGauss);
const double* wg = GaussQuadrature::getWeight(nGauss);
@ -1493,16 +1504,13 @@ bool ASMs3D::integrate (Integrand& integrand, int lIndex,
const int nel1 = n1 - p1 + 1;
const int nel2 = n2 - p2 + 1;
if (threadGroupsFace.empty())
generateThreadGroups();
// === Assembly loop over all elements on the patch face =====================
bool ok=true;
for (size_t g=0;g<threadGroupsFace[lIndex-1].size() && ok;++g) {
bool ok = true;
for (size_t g = 0; g < threadGrp.size() && ok; ++g) {
#pragma omp parallel for schedule(static)
for (size_t t=0;t<threadGroupsFace[lIndex-1][g].size();++t) {
for (size_t t = 0; t < threadGrp[g].size(); ++t) {
FiniteElement fe(p1*p2*p3);
fe.xi = fe.eta = fe.zeta = faceDir < 0 ? -1.0 : 1.0;
fe.u = gpar[0](1,1);
@ -1512,8 +1520,8 @@ bool ASMs3D::integrate (Integrand& integrand, int lIndex,
Matrix dNdu, Xnod, Jac;
Vec4 X;
Vec3 normal;
for (size_t l=0;l<threadGroupsFace[lIndex-1][g][t].size();++l) {
int iel = threadGroupsFace[lIndex-1][g][t][l];
for (size_t l = 0; l < threadGrp[g][t].size(); ++l) {
int iel = threadGrp[g][t][l];
fe.iel = MLGE[iel];
if (fe.iel < 1) continue; // zero-volume element
@ -2170,60 +2178,81 @@ bool ASMs3D::evalSolution (Matrix& sField, const Integrand& integrand,
}
void ASMs3D::generateThreadGroups()
void ASMs3D::generateThreadGroups ()
{
const int nel1 = svol->numCoefs(0) - svol->order(0) + 1;
const int nel2 = svol->numCoefs(1) - svol->order(1) + 1;
const int nel3 = svol->numCoefs(2) - svol->order(2) + 1;
utl::calcThreadGroups(nel1,nel2,nel3,threadGroupsVol);
if (threadGroupsVol.size() < 2) return;
std::cout <<"\nMultiple threads are utilized during element assembly.";
for (size_t i = 0; i < threadGroupsVol.size(); i++)
{
std::cout <<"\n Thread group "<< i+1;
for (size_t j = 0; j < threadGroupsVol[i].size(); j++)
std::cout <<"\n\tthread "<< j+1
<< ": "<< threadGroupsVol[i][j].size() <<" elements";
}
std::cout << std::endl;
}
void ASMs3D::generateThreadGroups (char lIndex)
{
if (threadGroupsFace.find(lIndex) != threadGroupsFace.end()) return;
const int p1 = svol->order(0);
const int p2 = svol->order(1);
const int p3 = svol->order(2);
const int n1 = svol->numCoefs(0);
const int n2 = svol->numCoefs(1);
const int n3 = svol->numCoefs(2);
const int nel1 = n1 - p1 + 1;
const int nel2 = n2 - p2 + 1;
const int nel3 = n3 - p3 + 1;
utl::calcThreadGroups(nel1,nel2,nel3,threadGroupsVol);
// now the faces
threadGroupsFace.resize(6);
for (int face=0;face<6;++face) {
const int faceDir = (face+2)/((face+1)%2 ? -2 : 2);
std::vector<int> map;
int iel=0;
for (int i3 = p3; i3 <= n3; i3++) {
for (int i2 = p2; i2 <= n2; i2++) {
for (int i1 = p1; i1 <= n1; i1++, iel++) {
// Skip elements that are not on current boundary face
bool skipMe = false;
switch (faceDir)
// Find elements that are on the boundary face 'lIndex'
IntVec map; map.reserve(this->getNoBoundaryElms(lIndex,2));
int d1, d2, iel = 0;
for (int i3 = p3; i3 <= n3; i3++)
for (int i2 = p2; i2 <= n2; i2++)
for (int i1 = p1; i1 <= n1; i1++, iel++)
switch (lIndex)
{
case -1: if (i1 > p1) skipMe = true; break;
case 1: if (i1 < n1) skipMe = true; break;
case -2: if (i2 > p2) skipMe = true; break;
case 2: if (i2 < n2) skipMe = true; break;
case -3: if (i3 > p3) skipMe = true; break;
case 3: if (i3 < n3) skipMe = true; break;
case 1: if (i1 == p1) map.push_back(iel); break;
case 2: if (i1 == n1) map.push_back(iel); break;
case 3: if (i2 == p2) map.push_back(iel); break;
case 4: if (i2 == n2) map.push_back(iel); break;
case 5: if (i3 == p3) map.push_back(iel); break;
case 6: if (i3 == n3) map.push_back(iel); break;
}
if (skipMe)
continue;
map.push_back(iel);
}
}
switch (lIndex)
{
case 1:
case 2:
d1 = n2 - p2 + 1;
d2 = n3 - p3 + 1;
break;
case 3:
case 4:
d1 = n1 - p1 + 1;
d2 = n3 - p3 + 1;
break;
default:
d1 = n1 - p1 + 1;
d2 = n2 - p2 + 1;
}
int d1, d2;
if (face < 2) {
d1 = nel2;
d2 = nel3;
} else if (face < 4) {
d1 = nel1;
d2 = nel3;
} else {
d1 = nel1;
d2 = nel2;
utl::ThreadGroups& fGrp = threadGroupsFace[lIndex];
utl::calcThreadGroups(d1,d2,fGrp);
utl::mapThreadGroups(fGrp,map);
if (fGrp.size() > 1)
for (size_t i = 0; i < threadGroupsVol.size(); i++)
{
std::cout <<"\n Thread group "<< i+1 <<" for boundary face "<<(int)lIndex;
for (size_t j = 0; j < fGrp.size(); j++)
std::cout <<"\n\tthread "<< j+1
<< ": "<< fGrp[i][j].size() <<" elements";
}
utl::calcThreadGroups(d1,d2,threadGroupsFace[face]);
utl::mapThreadGroups(threadGroupsFace[face],map);
}
}

View File

@ -337,6 +337,7 @@ public:
//! \param[in] integrand Object with problem-specific data and methods
Go::SplineVolume* projectSolution(const Integrand& integrand) const;
//! \brief Projects the secondary solution field onto the primary basis.
//! \param[in] integrand Object with problem-specific data and methods
virtual Go::GeomObject* evalSolution(const Integrand& integrand) const;
//! \brief Evaluates the secondary solution field at the given points.
@ -352,8 +353,8 @@ public:
//! \a gpar[0].size() \a X \a gpar[1].size() \a X \a gpar[2].size().
//! Otherwise, we assume that it contains the \a u, \a v and \a w parameters
//! directly for each sampling point.
bool evalSolution(Matrix& sField, const Integrand& integrand,
const RealArray* gpar, bool regular = true) const;
virtual bool evalSolution(Matrix& sField, const Integrand& integrand,
const RealArray* gpar, bool regular = true) const;
//! \brief Calculates parameter values for visualization nodal points.
//! \param[out] prm Parameter values in given direction for all points
@ -405,6 +406,12 @@ protected:
//! \param[in] basis Which basis to return size parameters for (mixed methods)
virtual bool getSize(int& n1, int& n2, int& n3, int basis = 0) const;
//! \brief Generates element groups for multi-threading of interior integrals.
virtual void generateThreadGroups();
//! \brief Generates element groups for multi-threading of boundary integrals.
//! \param[in] lIndex Local index [1,6] of the boundary face
virtual void generateThreadGroups(char lIndex);
//! \brief Establishes matrices with basis functions and 1st derivatives.
static void extractBasis(const Go::BasisDerivs& spline,
Vector& N, Matrix& dNdu);
@ -438,13 +445,10 @@ protected:
const IndexVec& nodeInd; //!< IJK-triplets for the control points (nodes)
IndexVec myNodeInd; //!< The actual IJK-triplet container
//! Element groups for multithreaded volume assembly
//! Element groups for multi-threaded volume assembly
utl::ThreadGroups threadGroupsVol;
//! Element groups for multithreaded face assembly
std::vector<utl::ThreadGroups> threadGroupsFace;
//! \brief Generate thread groups
virtual void generateThreadGroups();
//! Element groups for multi-threaded face assembly
std::map<char,utl::ThreadGroups> threadGroupsFace;
};
#endif

View File

@ -201,6 +201,38 @@ bool ASMs3DLag::getSize (int& n1, int& n2, int& n3, int) const
}
size_t ASMs3DLag::getNoBoundaryElms (char lIndex, char ldim) const
{
if (!svol) return 0;
if (ldim < 1 && lIndex > 0)
return 1;
int nel[3]; // Number of elements in each direction
nel[0] = (nx-1)/(svol->order(0)-1);
nel[1] = (ny-1)/(svol->order(1)-1);
nel[2] = (nz-1)/(svol->order(2)-1);
if (ldim < 2 && lIndex > 0 && lIndex <= 12)
return nel[(lIndex-1)/4];
switch (lIndex)
{
case 1:
case 2:
return nel[1]*nel[2];
case 3:
case 4:
return nel[0]*nel[2];
case 5:
case 6:
return nel[0]*nel[1];
}
return 0;
}
bool ASMs3DLag::integrate (Integrand& integrand,
GlobalIntegral& glInt,
const TimeDomain& time)
@ -235,9 +267,6 @@ bool ASMs3DLag::integrate (Integrand& integrand,
const int p2 = svol->order(1);
const int p3 = svol->order(2);
if (threadGroupsVol.empty())
generateThreadGroups();
// === Assembly loop over all elements in the patch ==========================
@ -272,7 +301,7 @@ bool ASMs3DLag::integrate (Integrand& integrand,
X *= 1.0/(double)Xnod.cols();
}
// Initialize element quantities
// Initialize element quantities
fe.iel = MLGE[iel-1];
LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel);
if (!integrand.initElement(MNPC[iel-1],X,nRed*nRed*nRed,*A))
@ -367,8 +396,8 @@ bool ASMs3DLag::integrate (Integrand& integrand,
}
}
// Finalize the element quantities
if (!integrand.finalizeElement(*A,time,firstIp+jp))
// Finalize the element quantities
if (!integrand.finalizeElement(*A,time,firstIp+jp))
{
ok = false;
break;
@ -396,6 +425,15 @@ bool ASMs3DLag::integrate (Integrand& integrand, int lIndex,
{
if (!svol) return true; // silently ignore empty patches
std::map<char,utl::ThreadGroups>::const_iterator tit;
if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end())
{
std::cerr <<" *** ASMs3DLag::integrate: No thread groups for face "<< lIndex
<< std::endl;
return false;
}
const utl::ThreadGroups& threadGrp = tit->second;
// Get Gaussian quadrature points and weights
const double* xg = GaussQuadrature::getCoord(nGauss);
const double* wg = GaussQuadrature::getWeight(nGauss);
@ -430,16 +468,13 @@ bool ASMs3DLag::integrate (Integrand& integrand, int lIndex,
if (vpar.empty()) this->getGridParameters(vpar,1,1);
if (wpar.empty()) this->getGridParameters(wpar,2,1);
if (threadGroupsFace.empty())
generateThreadGroups();
// === Assembly loop over all elements on the patch face =====================
bool ok=true;
for (size_t g=0;g<threadGroupsFace[lIndex-1].size() && ok;++g) {
bool ok = true;
for (size_t g = 0; g < threadGrp.size() && ok; ++g) {
#pragma omp parallel for schedule(static)
for (size_t t=0;t<threadGroupsFace[lIndex-1][g].size();++t) {
for (size_t t = 0; t < threadGrp[g].size(); ++t) {
FiniteElement fe(p1*p2*p3);
fe.u = upar.front();
fe.v = vpar.front();
@ -449,8 +484,8 @@ bool ASMs3DLag::integrate (Integrand& integrand, int lIndex,
Vec4 X;
Vec3 normal;
double xi[3];
for (size_t l=0;l<threadGroupsFace[lIndex-1][g][t].size();++l) {
int iel = threadGroupsFace[lIndex-1][g][t][l];
for (size_t l = 0; l < threadGrp[g][t].size(); ++l) {
int iel = threadGrp[g][t][l];
int i1 = iel % nel1;
int i2 = (iel / nel1) % nel2;
int i3 = iel / (nel1*nel2);
@ -542,8 +577,8 @@ bool ASMs3DLag::integrate (Integrand& integrand, int lIndex,
}
}
// Assembly of global system integral
if (!glInt.assemble(A->ref(),fe.iel))
// Assembly of global system integral
if (!glInt.assemble(A->ref(),fe.iel))
{
ok = false;
break;
@ -840,63 +875,64 @@ bool ASMs3DLag::evalSolution (Matrix& sField, const Integrand& integrand,
}
void ASMs3DLag::generateThreadGroups()
void ASMs3DLag::generateThreadGroups ()
{
const int p1 = svol->order(0);
const int p2 = svol->order(1);
const int p3 = svol->order(2);
// Evaluate the parametric values
RealArray gpar[3];
getGridParameters(gpar[0],0,p1-1);
getGridParameters(gpar[1],1,p2-1);
getGridParameters(gpar[2],2,p3-1);
const int nel1 = (gpar[0].size()-1)/(p1-1);
const int nel2 = (gpar[1].size()-1)/(p2-1);
const int nel3 = (gpar[2].size()-1)/(p3-1);
const int nel1 = (nx-1)/(p1-1);
const int nel2 = (ny-1)/(p2-1);
const int nel3 = (nz-1)/(p3-1);
utl::calcThreadGroups(nel1,nel2,nel3,threadGroupsVol);
// now the faces
threadGroupsFace.resize(6);
for (int face=0;face<6;++face) {
const int faceDir = (face+2)/((face+1)%2 ? -2 : 2);
std::vector<int> map;
int iel=0;
for (int i3 = 0; i3 < nel3; i3++) {
for (int i2 = 0; i2 < nel2; i2++) {
for (int i1 = 0; i1 < nel1; i1++, iel++) {
// Skip elements that are not on current boundary face
bool skipMe = false;
switch (faceDir)
{
case -1: if (i1 > 0) skipMe = true; break;
case 1: if (i1 < nel1-1) skipMe = true; break;
case -2: if (i2 > 0) skipMe = true; break;
case 2: if (i2 < nel2-1) skipMe = true; break;
case -3: if (i3 > 0) skipMe = true; break;
case 3: if (i3 < nel3-1) skipMe = true; break;
}
if (skipMe)
continue;
map.push_back(iel);
}
}
}
int d1, d2;
if (face < 2) {
d1 = nel2;
d2 = nel3;
} else if (face < 4) {
d1 = nel1;
d2 = nel3;
} else {
d1 = nel1;
d2 = nel2;
}
utl::calcThreadGroups(d1,d2,threadGroupsFace[face]);
utl::mapThreadGroups(threadGroupsFace[face],map);
}
}
void ASMs3DLag::generateThreadGroups (char lIndex)
{
if (threadGroupsFace.find(lIndex) != threadGroupsFace.end()) return;
const int p1 = svol->order(0);
const int p2 = svol->order(1);
const int p3 = svol->order(2);
const int n1 = (nx-1)/(p1-1);
const int n2 = (ny-1)/(p2-1);
const int n3 = (nz-1)/(p3-1);
// Find elements that are on the boundary face 'lIndex'
IntVec map; map.reserve(this->getNoBoundaryElms(lIndex,2));
int d1, d2, iel = 0;
for (int i3 = 1; i3 <= n3; i3++)
for (int i2 = 1; i2 <= n2; i2++)
for (int i1 = 1; i1 <= n1; i1++, iel++)
switch (lIndex)
{
case 1: if (i1 == 1) map.push_back(iel); break;
case 2: if (i1 == n1) map.push_back(iel); break;
case 3: if (i2 == 1) map.push_back(iel); break;
case 4: if (i2 == n2) map.push_back(iel); break;
case 5: if (i3 == 1) map.push_back(iel); break;
case 6: if (i3 == n3) map.push_back(iel); break;
}
switch (lIndex)
{
case 1:
case 2:
d1 = n2;
d2 = n3;
break;
case 3:
case 4:
d1 = n1;
d2 = n3;
break;
default:
d1 = n1;
d2 = n2;
}
utl::calcThreadGroups(d1,d2,threadGroupsFace[lIndex]);
utl::mapThreadGroups(threadGroupsFace[lIndex],map);
}

View File

@ -47,6 +47,17 @@ public:
//! This is used to reinitialize the patch after it has been refined.
virtual void clear(bool retainGeometry = false);
//! \brief Returns a matrix with nodal coordinates for an element.
//! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
//! in one element
//! \param[in] iel Element index
virtual bool getElementCoordinates(Matrix& X, int iel) const;
//! \brief Returns a matrix with all nodal coordinates within the patch.
//! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
//! in the patch
virtual void getNodalCoordinates(Matrix& X) const;
//! \brief Returns the global coordinates for the given node.
//! \param[in] inod 1-based node index local to current patch
virtual Vec3 getCoord(size_t inod) const;
@ -134,26 +145,21 @@ public:
virtual bool evalSolution(Matrix& sField, const Integrand& integrand,
const RealArray* gpar, bool regular = true) const;
//! \brief Returns a matrix with nodal coordinates for an element.
//! \param[in] iel Element index
//! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
//! in one element
virtual bool getElementCoordinates(Matrix& X, int iel) const;
//! \brief Generate thread groups
void generateThreadGroups();
public:
//! \brief Returns a matrix with all nodal coordinates within the patch.
//! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
//! in the patch
virtual void getNodalCoordinates(Matrix& X) const;
//! \brief Returns the number of nodal points in each parameter direction.
//! \param[out] n1 Number of nodes in first (u) direction
//! \param[out] n2 Number of nodes in second (v) direction
//! \param[out] n3 Number of nodes in third (w) direction
virtual bool getSize(int& n1, int& n2, int& n3, int = 0) const;
//! \brief Generates element groups for multi-threading of interior integrals.
virtual void generateThreadGroups();
//! \brief Generates element groups for multi-threading of boundary integrals.
//! \param[in] lIndex Local index [1,6] of the boundary face
virtual void generateThreadGroups(char lIndex);
//! \brief Returns the number of elements on a boundary.
virtual size_t getNoBoundaryElms(char lIndex, char ldim) const;
private:
size_t nx; //!< Number of nodes in first parameter direction
size_t ny; //!< Number of nodes in second parameter direction

View File

@ -101,9 +101,6 @@ bool ASMs3DSpec::integrate (Integrand& integrand,
if (!Legendre::basisDerivatives(p2,D2)) return false;
if (!Legendre::basisDerivatives(p3,D3)) return false;
if (threadGroupsVol.empty())
generateThreadGroups();
// === Assembly loop over all elements in the patch ==========================
@ -133,6 +130,7 @@ bool ASMs3DSpec::integrate (Integrand& integrand,
break;
}
// --- Integration loop over all Gauss points in each direction --------
int count = 1;
@ -185,6 +183,15 @@ bool ASMs3DSpec::integrate (Integrand& integrand, int lIndex,
{
if (!svol) return true; // silently ignore empty patches
std::map<char,utl::ThreadGroups>::const_iterator tit;
if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end())
{
std::cerr <<" *** ASMs3DSpec::integrate: No thread groups for face "<<lIndex
<< std::endl;
return false;
}
const utl::ThreadGroups& threadGrp = tit->second;
// Find the parametric direction of the face normal {-3,-2,-1, 1, 2, 3}
const int faceDir = (lIndex+1)/(lIndex%2 ? -2 : 2);
@ -212,33 +219,38 @@ bool ASMs3DSpec::integrate (Integrand& integrand, int lIndex,
int nen = p[0]*p[1]*p[2];
// === Assembly loop over all elements on the patch face =====================
bool ok=true;
for (size_t g=0;g<threadGroupsFace[lIndex-1].size() && ok;++g) {
bool ok = true;
for (size_t g = 0; g < threadGrp.size() && ok; ++g) {
#pragma omp parallel for schedule(static)
for (size_t t=0;t<threadGroupsFace[lIndex-1][g].size();++t) {
for (size_t t = 0; t < threadGrp[g].size(); ++t) {
FiniteElement fe(nen);
Matrix dNdu(nen,3), Xnod, Jac;
Vec4 X;
Vec3 normal;
int xi[3];
for (size_t l=0;l<threadGroupsFace[lIndex-1][g][t].size();++l) {
int iel = threadGroupsFace[lIndex-1][g][t][l];
for (size_t l = 0; l < threadGrp[g][t].size(); ++l) {
int iel = threadGrp[g][t][l];
// Set up nodal point coordinates for current element
if (!this->getElementCoordinates(Xnod,++iel)) {
if (!this->getElementCoordinates(Xnod,++iel))
{
ok = false;
break;
}
// Initialize element quantities
fe.iel = MLGE[iel-1];
fe.iel = MLGE[iel-1];
LocalIntegral* A = integrand.getLocalIntegral(nen,fe.iel,true);
if (!integrand.initElementBou(MNPC[iel-1],*A)) {
if (!integrand.initElementBou(MNPC[iel-1],*A))
{
ok = false;
break;
}
// --- Integration loop over all Gauss points in each direction --------
for (int j = 0; j < p[t2-1]; j++)
@ -265,14 +277,16 @@ bool ASMs3DSpec::integrate (Integrand& integrand, int lIndex,
// Evaluate the integrand and accumulate element contributions
fe.detJxW *= wg[t1-1][i]*wg[t2-1][j];
if (!integrand.evalBou(*A,fe,time,X,normal)) {
if (!integrand.evalBou(*A,fe,time,X,normal))
{
ok = false;
break;
}
}
// Assembly of global system integral
if (!glInt.assemble(A->ref(),fe.iel)) {
if (!glInt.assemble(A->ref(),fe.iel))
{
ok = false;
break;
}
@ -379,6 +393,7 @@ bool ASMs3DSpec::integrateEdge (Integrand& integrand, int lEdge,
LocalIntegral* A = integrand.getLocalIntegral(nen,fe.iel,true);
if (!integrand.initElementBou(MNPC[iel-1],*A)) return false;
// --- Integration loop over all Gauss points along the edge -----------
for (int i = 0; i < pe; i++)

View File

@ -538,9 +538,6 @@ bool ASMs3Dmx::integrate (Integrand& integrand,
const int nel2 = n2 - p2 + 1;
const int nel3 = n3 - p3 + 1;
if (threadGroupsVol.empty())
generateThreadGroups();
// === Assembly loop over all elements in the patch ==========================
@ -664,6 +661,15 @@ bool ASMs3Dmx::integrate (Integrand& integrand, int lIndex,
PROFILE2("ASMs3Dmx::integrate(B)");
std::map<char,utl::ThreadGroups>::const_iterator tit;
if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end())
{
std::cerr <<" *** ASMs3D::integrate: No thread groups for face "<< lIndex
<< std::endl;
return false;
}
const utl::ThreadGroups& threadGrp = tit->second;
// Get Gaussian quadrature points and weights
const double* xg = GaussQuadrature::getCoord(nGauss);
const double* wg = GaussQuadrature::getWeight(nGauss);
@ -706,16 +712,13 @@ bool ASMs3Dmx::integrate (Integrand& integrand, int lIndex,
const int nel1 = n1 - p1 + 1;
const int nel2 = n2 - p2 + 1;
if (threadGroupsFace.empty())
generateThreadGroups();
// === Assembly loop over all elements on the patch face =====================
bool ok=true;
for (size_t g=0;g<threadGroupsFace[lIndex-1].size() && ok;++g) {
bool ok = true;
for (size_t g = 0; g < threadGrp.size() && ok; ++g) {
#pragma omp parallel for schedule(static)
for (size_t t=0;t<threadGroupsFace[lIndex-1][g].size();++t) {
for (size_t t = 0; t < threadGrp[g].size(); ++t) {
MxFiniteElement fe(basis1->order(0)*basis1->order(1)*basis1->order(2),
basis2->order(0)*basis2->order(1)*basis2->order(2));
fe.xi = fe.eta = fe.zeta = faceDir < 0 ? -1.0 : 1.0;
@ -726,8 +729,8 @@ bool ASMs3Dmx::integrate (Integrand& integrand, int lIndex,
Matrix dN1du, dN2du, Xnod, Jac;
Vec4 X;
Vec3 normal;
for (size_t l=0;l<threadGroupsFace[lIndex-1][g][t].size();++l) {
int iel = threadGroupsFace[lIndex-1][g][t][l];
for (size_t l = 0; l < threadGrp[g][t].size(); ++l) {
int iel = threadGrp[g][t][l];
fe.iel = MLGE[iel];
if (fe.iel < 1) continue; // zero-volume element

View File

@ -256,9 +256,6 @@ bool ASMs3DmxLag::integrate (Integrand& integrand,
const int q2 = p2 - 1;
const int q3 = p3 - 1;
if (threadGroupsVol.empty())
generateThreadGroups();
// === Assembly loop over all elements in the patch ==========================
@ -364,6 +361,15 @@ bool ASMs3DmxLag::integrate (Integrand& integrand, int lIndex,
{
if (!svol) return true; // silently ignore empty patches
std::map<char,utl::ThreadGroups>::const_iterator tit;
if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end())
{
std::cerr <<" *** ASMs3DLag::integrate: No thread groups for face "<< lIndex
<< std::endl;
return false;
}
const utl::ThreadGroups& threadGrp = tit->second;
// Get Gaussian quadrature points and weights
const double* xg = GaussQuadrature::getCoord(nGauss);
const double* wg = GaussQuadrature::getWeight(nGauss);
@ -389,23 +395,20 @@ bool ASMs3DmxLag::integrate (Integrand& integrand, int lIndex,
const int nel1 = (nx2-1)/(p1-1);
const int nel2 = (ny2-1)/(p2-1);
if (threadGroupsFace.empty())
generateThreadGroups();
// === Assembly loop over all elements on the patch face =====================
bool ok=true;
for (size_t g=0;g<threadGroupsFace[lIndex-1].size() && ok;++g) {
bool ok = true;
for (size_t g = 0; g < threadGrp.size() && ok; ++g) {
#pragma omp parallel for schedule(static)
for (size_t t=0;t<threadGroupsFace[lIndex-1][g].size();++t) {
for (size_t t = 0; t < threadGrp[g].size(); ++t) {
MxFiniteElement fe(p1*p2*p3,q1*q2*q3);
Matrix dN1du, dN2du, Xnod, Jac;
Vec4 X;
Vec3 normal;
double xi[3];
for (size_t l=0;l<threadGroupsFace[lIndex-1][g][t].size();++l) {
int iel = threadGroupsFace[lIndex-1][g][t][l];
for (size_t l = 0; l < threadGrp[g][t].size(); ++l) {
int iel = threadGrp[g][t][l];
int i1 = iel % nel1;
int i2 = (iel / nel1) % nel2;
int i3 = iel / (nel1*nel2);

View File

@ -667,18 +667,19 @@ bool SIMbase::preprocess (const std::vector<int>& ignored, bool fixDup)
std::cout <<"\nResolving Dirichlet boundary conditions"<< std::endl;
bool ok = true;
for (PropertyVec::const_iterator p = myProps.begin(); p != myProps.end(); p++)
switch (p->pcode) {
PropertyVec::const_iterator q;
for (q = myProps.begin(); q != myProps.end(); q++)
switch (q->pcode) {
case Property::DIRICHLET:
if (this->addConstraint(p->patch,p->lindx,p->ldim,p->pindx))
if (this->addConstraint(q->patch,q->lindx,q->ldim,q->pindx))
std::cout << std::endl;
else
ok = false;
break;
case Property::DIRICHLET_INHOM:
if (this->addConstraint(p->patch,p->lindx,p->ldim,p->pindx%1000,p->pindx))
if (this->addConstraint(q->patch,q->lindx,q->ldim,q->pindx%1000,q->pindx))
std::cout << std::endl;
else
ok = false;
@ -700,6 +701,15 @@ bool SIMbase::preprocess (const std::vector<int>& ignored, bool fixDup)
// Resolve possibly chaining of the MPC equations
if (!allMPCs.empty()) ASMbase::resolveMPCchains(allMPCs);
// Generate element groups for multi-threading
for (mit = myModel.begin(); mit != myModel.end(); mit++)
(*mit)->generateThreadGroups();
for (q = myProps.begin(); q != myProps.end(); q++)
if (q->pcode == Property::NEUMANN)
if (q->patch > 0 && q->patch <= myModel.size())
if (q->ldim+1 == myModel[q->patch-1]->getNoParamDim())
myModel[q->patch-1]->generateThreadGroups(q->lindx);
// Preprocess the result points
for (ResPointVec::iterator p = myPoints.begin(); p != myPoints.end();)
{