diff --git a/src/ASM/ASMbase.h b/src/ASM/ASMbase.h index b6ad84a3..f329b630 100644 --- a/src/ASM/ASMbase.h +++ b/src/ASM/ASMbase.h @@ -252,6 +252,11 @@ public: //! \brief Computes the number of boundary integration points in this patch. virtual void getNoBouPoints(size_t& nPt, char ldim, char lindx) = 0; + //! \brief Generates element groups for multi-threading of interior integrals. + virtual void generateThreadGroups() {} + //! \brief Generates element groups for multi-threading of boundary integrals. + virtual void generateThreadGroups(char) {} + // Methods for integration of finite element quantities. // These are the main computational methods of the ASM class hierarchy. diff --git a/src/ASM/ASMs2D.C b/src/ASM/ASMs2D.C index a71f652c..4813fee1 100644 --- a/src/ASM/ASMs2D.C +++ b/src/ASM/ASMs2D.C @@ -1008,9 +1008,6 @@ bool ASMs2D::integrate (Integrand& integrand, const int n1 = surf->numCoefs_u(); const int nel1 = n1 - p1 + 1; - if (threadGroups.empty()) - generateThreadGroups(); - // === Assembly loop over all elements in the patch ========================== @@ -1046,10 +1043,17 @@ bool ASMs2D::integrate (Integrand& integrand, break; } - // Compute characteristic element length, if needed if (integrand.getIntegrandType() == 2) + { + // Compute characteristic element length fe.h = getElmSize(p1,p2,Xnod); + // Element size in parametric space + int inod = MNPC[iel-1].back(); + dXidu[0] = surf->knotSpan(0,nodeInd[inod].I); + dXidu[1] = surf->knotSpan(1,nodeInd[inod].J); + } + else if (integrand.getIntegrandType() == 3) { // --- Compute average value of basis functions over the element ----- @@ -1088,11 +1092,6 @@ bool ASMs2D::integrate (Integrand& integrand, X[i] = X0[i]; } - // TODO: Do this conditionally (only when the CFD solver requires it) - int inod = MNPC[iel-1].back(); - dXidu[0] = surf->knotSpan(0,nodeInd[inod].I); - dXidu[1] = surf->knotSpan(1,nodeInd[inod].J); - // Initialize element quantities LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel); if (!integrand.initElement(MNPC[iel-1],X,nRed*nRed,*A)) @@ -1167,14 +1166,15 @@ bool ASMs2D::integrate (Integrand& integrand, // Compute Hessian of coordinate mapping and 2nd order derivatives if (integrand.getIntegrandType() == 2) + { if (!utl::Hessian(Hess,fe.d2NdX2,Jac,Xnod,d2Ndu2,dNdu)) { ok = false; break; } - // RUNAR: Do this conditionally (when the CFD solver requires it) - utl::getGmat(Jac,dXidu,fe.G); + utl::getGmat(Jac,dXidu,fe.G); + } #if SP_DEBUG > 4 std::cout <<"\niel, ip = "<< iel <<" "<< ip @@ -1716,23 +1716,26 @@ bool ASMs2D::evalSolution (Matrix& sField, const Integrand& integrand, } -void ASMs2D::generateThreadGroups() +void ASMs2D::generateThreadGroups () { const int p1 = surf->order_u(); const int p2 = surf->order_v(); const int n1 = surf->numCoefs_u(); const int n2 = surf->numCoefs_v(); + const int nel1 = n1 - p1 + 1; const int nel2 = n2 - p2 + 1; utl::calcThreadGroups(nel1,nel2,threadGroups); + if (threadGroups.size() < 2) return; - if (threadGroups.size() > 1) - for (size_t i = 0; i < threadGroups.size(); i++) { - std::cout <<" Thread group "<< i+1; - for (size_t j = 0; j < threadGroups[i].size(); j++) - std::cout <<"\n\t thread "<< j+1 - << ": "<< threadGroups[i][j].size() <<" elements"; - std::cout << std::endl; - } + std::cout <<"\nMultiple threads are utilized during element assembly."; + for (size_t i = 0; i < threadGroups.size(); i++) + { + std::cout <<"\n Thread group "<< i+1; + for (size_t j = 0; j < threadGroups[i].size(); j++) + std::cout <<"\n\tthread "<< j+1 + << ": "<< threadGroups[i][j].size() <<" elements"; + } + std::cout << std::endl; } diff --git a/src/ASM/ASMs2D.h b/src/ASM/ASMs2D.h index 479417b0..22afad0a 100644 --- a/src/ASM/ASMs2D.h +++ b/src/ASM/ASMs2D.h @@ -101,7 +101,7 @@ public: //! This is used to reinitialize the patch after it has been refined. virtual void clear(bool retainGeometry = false); - //! \brief Returns a matrix with nodal coordinates for an element. + //! \brief Returns a matrix with nodal coordinates for an element. //! \param[in] iel Element index //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes //! in one element @@ -346,6 +346,9 @@ protected: //! \param[in] basis Which basis to return size parameters for (mixed methods) virtual bool getSize(int& n1, int& n2, int basis = 0) const; + //! \brief Generates element groups for multi-threading of interior integrals. + virtual void generateThreadGroups(); + //! \brief Establishes matrices with basis functions and 1st derivatives. static void extractBasis(const Go::BasisDerivsSf& spline, Vector& N, Matrix& dNdu); @@ -377,11 +380,8 @@ protected: const IndexVec& nodeInd; //!< IJ-pairs for the control points (nodes) IndexVec myNodeInd; //!< The actual IJ-pair container - //! Element groups for multithreaded assembly + //! Element groups for multi-threaded assembly utl::ThreadGroups threadGroups; - - //! \brief Generate thread groups - virtual void generateThreadGroups(); }; #endif diff --git a/src/ASM/ASMs2DLag.C b/src/ASM/ASMs2DLag.C index e0341070..643fd795 100644 --- a/src/ASM/ASMs2DLag.C +++ b/src/ASM/ASMs2DLag.C @@ -187,6 +187,27 @@ bool ASMs2DLag::getSize (int& n1, int& n2, int) const } +size_t ASMs2DLag::getNoBoundaryElms (char lIndex, char ldim) const +{ + if (!surf) return 0; + + if (ldim < 1 && lIndex > 0) + return 1; + + switch (lIndex) + { + case 1: + case 2: + return (ny-1)/(surf->order_v()-1); + case 3: + case 4: + return (nx-1)/(surf->order_u()-1); + } + + return 0; +} + + bool ASMs2DLag::integrate (Integrand& integrand, GlobalIntegral& glInt, const TimeDomain& time) @@ -218,9 +239,6 @@ bool ASMs2DLag::integrate (Integrand& integrand, const int p1 = surf->order_u(); const int p2 = surf->order_v(); - if (threadGroups.empty()) - generateThreadGroups(); - // === Assembly loop over all elements in the patch ========================== @@ -631,18 +649,13 @@ bool ASMs2DLag::evalSolution (Matrix& sField, const Integrand& integrand, } -void ASMs2DLag::generateThreadGroups() +void ASMs2DLag::generateThreadGroups () { const int p1 = surf->order_u(); const int p2 = surf->order_v(); - // Evaluate the parametric values - RealArray gpar[2]; - getGridParameters(gpar[0],0,p1-1); - getGridParameters(gpar[1],1,p2-1); - - const int nel1 = (gpar[0].size()-1)/(p1-1); - const int nel2 = (gpar[1].size()-1)/(p2-1); + const int nel1 = (nx-1)/(p1-1); + const int nel2 = (ny-1)/(p2-1); utl::calcThreadGroups(nel1,nel2,threadGroups); } diff --git a/src/ASM/ASMs2DLag.h b/src/ASM/ASMs2DLag.h index d8b6bb4c..6cc554bd 100644 --- a/src/ASM/ASMs2DLag.h +++ b/src/ASM/ASMs2DLag.h @@ -48,9 +48,9 @@ public: virtual void clear(bool retainGeometry = false); //! \brief Returns a matrix with nodal coordinates for an element. - //! \param[in] iel Element index //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes //! in one element + //! \param[in] iel Element index virtual bool getElementCoordinates(Matrix& X, int iel) const; //! \brief Returns a matrix with all nodal coordinates within the patch. @@ -137,15 +137,17 @@ public: virtual bool evalSolution(Matrix& sField, const Integrand& integrand, const RealArray* gpar, bool regular = true) const; -protected: - //! \brief Generate thread groups - void generateThreadGroups(); -public: //! \brief Returns the number of nodal points in each parameter direction. //! \param[out] n1 Number of nodes in first (u) direction //! \param[out] n2 Number of nodes in second (v) direction virtual bool getSize(int& n1, int& n2, int = 0) const; + //! \brief Generates element groups for multi-threading of interior integrals. + virtual void generateThreadGroups(); + + //! \brief Returns the number of elements on a boundary. + virtual size_t getNoBoundaryElms(char lIndex, char ldim) const; + private: size_t nx; //!< Number of nodes in first parameter direction size_t ny; //!< Number of nodes in second parameter direction diff --git a/src/ASM/ASMs2DSpec.C b/src/ASM/ASMs2DSpec.C index fe35db10..8f7ebe96 100644 --- a/src/ASM/ASMs2DSpec.C +++ b/src/ASM/ASMs2DSpec.C @@ -96,9 +96,6 @@ bool ASMs2DSpec::integrate (Integrand& integrand, if (!Legendre::basisDerivatives(p1,D1)) return false; if (!Legendre::basisDerivatives(p2,D2)) return false; - if (threadGroups.empty()) - generateThreadGroups(); - // === Assembly loop over all elements in the patch ========================== @@ -113,7 +110,8 @@ bool ASMs2DSpec::integrate (Integrand& integrand, int iel = threadGroups[g][t][e]+1; // Set up control point coordinates for current element - if (!this->getElementCoordinates(Xnod,iel)) { + if (!this->getElementCoordinates(Xnod,iel)) + { ok = false; break; } @@ -121,11 +119,13 @@ bool ASMs2DSpec::integrate (Integrand& integrand, // Initialize element quantities fe.iel = MLGE[iel-1]; LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel); - if (!integrand.initElement(MNPC[iel-1],*A)) { + if (!integrand.initElement(MNPC[iel-1],*A)) + { ok = false; break; } + // --- Integration loop over all Gauss points in each direction -------- int count = 1; @@ -147,14 +147,16 @@ bool ASMs2DSpec::integrate (Integrand& integrand, // Evaluate the integrand and accumulate element contributions fe.detJxW *= wg1(i)*wg2(j); - if (!integrand.evalInt(*A,fe,time,X)) { + if (!integrand.evalInt(*A,fe,time,X)) + { ok = false; break; } } // Assembly of global system integral - if (!glInt.assemble(A->ref(),fe.iel)) { + if (!glInt.assemble(A->ref(),fe.iel)) + { ok = false; break; } @@ -235,6 +237,7 @@ bool ASMs2DSpec::integrate (Integrand& integrand, int lIndex, LocalIntegral* A = integrand.getLocalIntegral(nen,fe.iel,true); if (!integrand.initElementBou(MNPC[iel-1],*A)) return false; + // --- Integration loop over all Gauss points along the edge ------------- for (int i = 0; i < p[t2-1]; i++) diff --git a/src/ASM/ASMs2Dmx.C b/src/ASM/ASMs2Dmx.C index 7ce25789..51f5c8f5 100644 --- a/src/ASM/ASMs2Dmx.C +++ b/src/ASM/ASMs2Dmx.C @@ -506,9 +506,6 @@ bool ASMs2Dmx::integrate (Integrand& integrand, const int n1 = surf->numCoefs_u(); const int nel1 = n1 - p1 + 1; - if (threadGroups.empty()) - generateThreadGroups(); - // === Assembly loop over all elements in the patch ========================== diff --git a/src/ASM/ASMs2DmxLag.C b/src/ASM/ASMs2DmxLag.C index 80f6b14f..2dd3f324 100644 --- a/src/ASM/ASMs2DmxLag.C +++ b/src/ASM/ASMs2DmxLag.C @@ -231,9 +231,6 @@ bool ASMs2DmxLag::integrate (Integrand& integrand, const int q1 = p1 - 1; const int q2 = p2 - 1; - if (threadGroups.empty()) - generateThreadGroups(); - // === Assembly loop over all elements in the patch ========================== diff --git a/src/ASM/ASMs3D.C b/src/ASM/ASMs3D.C index 23d616db..aa045411 100644 --- a/src/ASM/ASMs3D.C +++ b/src/ASM/ASMs3D.C @@ -1025,6 +1025,8 @@ bool ASMs3D::getSize (int& n1, int& n2, int& n3, int) const size_t ASMs3D::getNoBoundaryElms (char lIndex, char ldim) const { + if (!svol) return 0; + if (ldim < 1 && lIndex > 0) return 1; else if (ldim < 2 && lIndex > 0 && lIndex <= 12) @@ -1222,9 +1224,6 @@ bool ASMs3D::integrate (Integrand& integrand, const int nel1 = n1 - p1 + 1; const int nel2 = n2 - p2 + 1; - if (threadGroupsVol.empty()) - generateThreadGroups(); - // === Assembly loop over all elements in the patch ========================== @@ -1261,10 +1260,18 @@ bool ASMs3D::integrate (Integrand& integrand, break; } - // Compute characteristic element length, if needed if (integrand.getIntegrandType() == 2) + { + // Compute characteristic element length fe.h = getElmSize(p1,p2,p3,Xnod); + // Element size in parametric space + int inod = MNPC[iel-1].back(); + dXidu[0] = svol->knotSpan(0,nodeInd[inod].I); + dXidu[1] = svol->knotSpan(1,nodeInd[inod].J); + dXidu[2] = svol->knotSpan(2,nodeInd[inod].K); + } + else if (integrand.getIntegrandType() == 3) { // --- Compute average value of basis functions over the element ----- @@ -1306,13 +1313,7 @@ bool ASMs3D::integrate (Integrand& integrand, X.z = X0[2]; } - // TODO: Do this conditionally (only when the CFD solver requires it) - int inod = MNPC[iel-1].back(); - dXidu[0] = svol->knotSpan(0,nodeInd[inod].I); - dXidu[1] = svol->knotSpan(1,nodeInd[inod].J); - dXidu[2] = svol->knotSpan(2,nodeInd[inod].K); - - // Initialize element quantities + // Initialize element quantities LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel); if (!integrand.initElement(MNPC[iel-1],X,nRed*nRed*nRed,*A)) { @@ -1391,14 +1392,15 @@ bool ASMs3D::integrate (Integrand& integrand, // Compute Hessian of coordinate mapping and 2nd order derivatives if (integrand.getIntegrandType() == 2) + { if (!utl::Hessian(Hess,fe.d2NdX2,Jac,Xnod,d2Ndu2,dNdu)) { ok = false; break; } - // RUNAR: Do this conditionally (when the CFD solver requires it) - utl::getGmat(Jac,dXidu,fe.G); + utl::getGmat(Jac,dXidu,fe.G); + } #if SP_DEBUG > 4 std::cout <<"\niel, ip = "<< iel <<" "<< ip @@ -1418,16 +1420,16 @@ bool ASMs3D::integrate (Integrand& integrand, } } - // Finalize the element quantities - if (!integrand.finalizeElement(*A,time,firstIp+jp)) - { + // Finalize the element quantities + if (!integrand.finalizeElement(*A,time,firstIp+jp)) + { ok = false; break; } // Assembly of global system integral if (!glInt.assemble(A->ref(),fe.iel)) - { + { ok = false; break; } @@ -1449,6 +1451,15 @@ bool ASMs3D::integrate (Integrand& integrand, int lIndex, PROFILE2("ASMs3D::integrate(B)"); + std::map::const_iterator tit; + if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end()) + { + std::cerr <<" *** ASMs3D::integrate: No thread groups for face "<< lIndex + << std::endl; + return false; + } + const utl::ThreadGroups& threadGrp = tit->second; + // Get Gaussian quadrature points and weights const double* xg = GaussQuadrature::getCoord(nGauss); const double* wg = GaussQuadrature::getWeight(nGauss); @@ -1493,16 +1504,13 @@ bool ASMs3D::integrate (Integrand& integrand, int lIndex, const int nel1 = n1 - p1 + 1; const int nel2 = n2 - p2 + 1; - if (threadGroupsFace.empty()) - generateThreadGroups(); - // === Assembly loop over all elements on the patch face ===================== - bool ok=true; - for (size_t g=0;gnumCoefs(0) - svol->order(0) + 1; + const int nel2 = svol->numCoefs(1) - svol->order(1) + 1; + const int nel3 = svol->numCoefs(2) - svol->order(2) + 1; + + utl::calcThreadGroups(nel1,nel2,nel3,threadGroupsVol); + if (threadGroupsVol.size() < 2) return; + + std::cout <<"\nMultiple threads are utilized during element assembly."; + for (size_t i = 0; i < threadGroupsVol.size(); i++) + { + std::cout <<"\n Thread group "<< i+1; + for (size_t j = 0; j < threadGroupsVol[i].size(); j++) + std::cout <<"\n\tthread "<< j+1 + << ": "<< threadGroupsVol[i][j].size() <<" elements"; + } + std::cout << std::endl; +} + + +void ASMs3D::generateThreadGroups (char lIndex) +{ + if (threadGroupsFace.find(lIndex) != threadGroupsFace.end()) return; + const int p1 = svol->order(0); const int p2 = svol->order(1); const int p3 = svol->order(2); const int n1 = svol->numCoefs(0); const int n2 = svol->numCoefs(1); const int n3 = svol->numCoefs(2); - const int nel1 = n1 - p1 + 1; - const int nel2 = n2 - p2 + 1; - const int nel3 = n3 - p3 + 1; - utl::calcThreadGroups(nel1,nel2,nel3,threadGroupsVol); - - // now the faces - threadGroupsFace.resize(6); - for (int face=0;face<6;++face) { - const int faceDir = (face+2)/((face+1)%2 ? -2 : 2); - std::vector map; - int iel=0; - for (int i3 = p3; i3 <= n3; i3++) { - for (int i2 = p2; i2 <= n2; i2++) { - for (int i1 = p1; i1 <= n1; i1++, iel++) { - // Skip elements that are not on current boundary face - bool skipMe = false; - switch (faceDir) + // Find elements that are on the boundary face 'lIndex' + IntVec map; map.reserve(this->getNoBoundaryElms(lIndex,2)); + int d1, d2, iel = 0; + for (int i3 = p3; i3 <= n3; i3++) + for (int i2 = p2; i2 <= n2; i2++) + for (int i1 = p1; i1 <= n1; i1++, iel++) + switch (lIndex) { - case -1: if (i1 > p1) skipMe = true; break; - case 1: if (i1 < n1) skipMe = true; break; - case -2: if (i2 > p2) skipMe = true; break; - case 2: if (i2 < n2) skipMe = true; break; - case -3: if (i3 > p3) skipMe = true; break; - case 3: if (i3 < n3) skipMe = true; break; + case 1: if (i1 == p1) map.push_back(iel); break; + case 2: if (i1 == n1) map.push_back(iel); break; + case 3: if (i2 == p2) map.push_back(iel); break; + case 4: if (i2 == n2) map.push_back(iel); break; + case 5: if (i3 == p3) map.push_back(iel); break; + case 6: if (i3 == n3) map.push_back(iel); break; } - if (skipMe) - continue; - map.push_back(iel); - } - } + switch (lIndex) + { + case 1: + case 2: + d1 = n2 - p2 + 1; + d2 = n3 - p3 + 1; + break; + case 3: + case 4: + d1 = n1 - p1 + 1; + d2 = n3 - p3 + 1; + break; + default: + d1 = n1 - p1 + 1; + d2 = n2 - p2 + 1; } - int d1, d2; - if (face < 2) { - d1 = nel2; - d2 = nel3; - } else if (face < 4) { - d1 = nel1; - d2 = nel3; - } else { - d1 = nel1; - d2 = nel2; + utl::ThreadGroups& fGrp = threadGroupsFace[lIndex]; + utl::calcThreadGroups(d1,d2,fGrp); + utl::mapThreadGroups(fGrp,map); + + if (fGrp.size() > 1) + for (size_t i = 0; i < threadGroupsVol.size(); i++) + { + std::cout <<"\n Thread group "<< i+1 <<" for boundary face "<<(int)lIndex; + for (size_t j = 0; j < fGrp.size(); j++) + std::cout <<"\n\tthread "<< j+1 + << ": "<< fGrp[i][j].size() <<" elements"; } - utl::calcThreadGroups(d1,d2,threadGroupsFace[face]); - utl::mapThreadGroups(threadGroupsFace[face],map); - } } diff --git a/src/ASM/ASMs3D.h b/src/ASM/ASMs3D.h index 213edfd8..ff37af04 100644 --- a/src/ASM/ASMs3D.h +++ b/src/ASM/ASMs3D.h @@ -337,6 +337,7 @@ public: //! \param[in] integrand Object with problem-specific data and methods Go::SplineVolume* projectSolution(const Integrand& integrand) const; //! \brief Projects the secondary solution field onto the primary basis. + //! \param[in] integrand Object with problem-specific data and methods virtual Go::GeomObject* evalSolution(const Integrand& integrand) const; //! \brief Evaluates the secondary solution field at the given points. @@ -352,8 +353,8 @@ public: //! \a gpar[0].size() \a X \a gpar[1].size() \a X \a gpar[2].size(). //! Otherwise, we assume that it contains the \a u, \a v and \a w parameters //! directly for each sampling point. - bool evalSolution(Matrix& sField, const Integrand& integrand, - const RealArray* gpar, bool regular = true) const; + virtual bool evalSolution(Matrix& sField, const Integrand& integrand, + const RealArray* gpar, bool regular = true) const; //! \brief Calculates parameter values for visualization nodal points. //! \param[out] prm Parameter values in given direction for all points @@ -405,6 +406,12 @@ protected: //! \param[in] basis Which basis to return size parameters for (mixed methods) virtual bool getSize(int& n1, int& n2, int& n3, int basis = 0) const; + //! \brief Generates element groups for multi-threading of interior integrals. + virtual void generateThreadGroups(); + //! \brief Generates element groups for multi-threading of boundary integrals. + //! \param[in] lIndex Local index [1,6] of the boundary face + virtual void generateThreadGroups(char lIndex); + //! \brief Establishes matrices with basis functions and 1st derivatives. static void extractBasis(const Go::BasisDerivs& spline, Vector& N, Matrix& dNdu); @@ -438,13 +445,10 @@ protected: const IndexVec& nodeInd; //!< IJK-triplets for the control points (nodes) IndexVec myNodeInd; //!< The actual IJK-triplet container - //! Element groups for multithreaded volume assembly + //! Element groups for multi-threaded volume assembly utl::ThreadGroups threadGroupsVol; - //! Element groups for multithreaded face assembly - std::vector threadGroupsFace; - - //! \brief Generate thread groups - virtual void generateThreadGroups(); + //! Element groups for multi-threaded face assembly + std::map threadGroupsFace; }; #endif diff --git a/src/ASM/ASMs3DLag.C b/src/ASM/ASMs3DLag.C index a470297a..2560ef62 100644 --- a/src/ASM/ASMs3DLag.C +++ b/src/ASM/ASMs3DLag.C @@ -201,6 +201,38 @@ bool ASMs3DLag::getSize (int& n1, int& n2, int& n3, int) const } +size_t ASMs3DLag::getNoBoundaryElms (char lIndex, char ldim) const +{ + if (!svol) return 0; + + if (ldim < 1 && lIndex > 0) + return 1; + + int nel[3]; // Number of elements in each direction + nel[0] = (nx-1)/(svol->order(0)-1); + nel[1] = (ny-1)/(svol->order(1)-1); + nel[2] = (nz-1)/(svol->order(2)-1); + + if (ldim < 2 && lIndex > 0 && lIndex <= 12) + return nel[(lIndex-1)/4]; + + switch (lIndex) + { + case 1: + case 2: + return nel[1]*nel[2]; + case 3: + case 4: + return nel[0]*nel[2]; + case 5: + case 6: + return nel[0]*nel[1]; + } + + return 0; +} + + bool ASMs3DLag::integrate (Integrand& integrand, GlobalIntegral& glInt, const TimeDomain& time) @@ -235,9 +267,6 @@ bool ASMs3DLag::integrate (Integrand& integrand, const int p2 = svol->order(1); const int p3 = svol->order(2); - if (threadGroupsVol.empty()) - generateThreadGroups(); - // === Assembly loop over all elements in the patch ========================== @@ -272,7 +301,7 @@ bool ASMs3DLag::integrate (Integrand& integrand, X *= 1.0/(double)Xnod.cols(); } - // Initialize element quantities + // Initialize element quantities fe.iel = MLGE[iel-1]; LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel); if (!integrand.initElement(MNPC[iel-1],X,nRed*nRed*nRed,*A)) @@ -367,8 +396,8 @@ bool ASMs3DLag::integrate (Integrand& integrand, } } - // Finalize the element quantities - if (!integrand.finalizeElement(*A,time,firstIp+jp)) + // Finalize the element quantities + if (!integrand.finalizeElement(*A,time,firstIp+jp)) { ok = false; break; @@ -396,6 +425,15 @@ bool ASMs3DLag::integrate (Integrand& integrand, int lIndex, { if (!svol) return true; // silently ignore empty patches + std::map::const_iterator tit; + if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end()) + { + std::cerr <<" *** ASMs3DLag::integrate: No thread groups for face "<< lIndex + << std::endl; + return false; + } + const utl::ThreadGroups& threadGrp = tit->second; + // Get Gaussian quadrature points and weights const double* xg = GaussQuadrature::getCoord(nGauss); const double* wg = GaussQuadrature::getWeight(nGauss); @@ -430,16 +468,13 @@ bool ASMs3DLag::integrate (Integrand& integrand, int lIndex, if (vpar.empty()) this->getGridParameters(vpar,1,1); if (wpar.empty()) this->getGridParameters(wpar,2,1); - if (threadGroupsFace.empty()) - generateThreadGroups(); - // === Assembly loop over all elements on the patch face ===================== - bool ok=true; - for (size_t g=0;gref(),fe.iel)) + // Assembly of global system integral + if (!glInt.assemble(A->ref(),fe.iel)) { ok = false; break; @@ -840,63 +875,64 @@ bool ASMs3DLag::evalSolution (Matrix& sField, const Integrand& integrand, } -void ASMs3DLag::generateThreadGroups() +void ASMs3DLag::generateThreadGroups () { const int p1 = svol->order(0); const int p2 = svol->order(1); const int p3 = svol->order(2); - // Evaluate the parametric values - RealArray gpar[3]; - getGridParameters(gpar[0],0,p1-1); - getGridParameters(gpar[1],1,p2-1); - getGridParameters(gpar[2],2,p3-1); - - const int nel1 = (gpar[0].size()-1)/(p1-1); - const int nel2 = (gpar[1].size()-1)/(p2-1); - const int nel3 = (gpar[2].size()-1)/(p3-1); + const int nel1 = (nx-1)/(p1-1); + const int nel2 = (ny-1)/(p2-1); + const int nel3 = (nz-1)/(p3-1); utl::calcThreadGroups(nel1,nel2,nel3,threadGroupsVol); - - // now the faces - threadGroupsFace.resize(6); - for (int face=0;face<6;++face) { - const int faceDir = (face+2)/((face+1)%2 ? -2 : 2); - std::vector map; - int iel=0; - for (int i3 = 0; i3 < nel3; i3++) { - for (int i2 = 0; i2 < nel2; i2++) { - for (int i1 = 0; i1 < nel1; i1++, iel++) { - // Skip elements that are not on current boundary face - bool skipMe = false; - switch (faceDir) - { - case -1: if (i1 > 0) skipMe = true; break; - case 1: if (i1 < nel1-1) skipMe = true; break; - case -2: if (i2 > 0) skipMe = true; break; - case 2: if (i2 < nel2-1) skipMe = true; break; - case -3: if (i3 > 0) skipMe = true; break; - case 3: if (i3 < nel3-1) skipMe = true; break; - } - if (skipMe) - continue; - - map.push_back(iel); - } - } - } - int d1, d2; - if (face < 2) { - d1 = nel2; - d2 = nel3; - } else if (face < 4) { - d1 = nel1; - d2 = nel3; - } else { - d1 = nel1; - d2 = nel2; - } - utl::calcThreadGroups(d1,d2,threadGroupsFace[face]); - utl::mapThreadGroups(threadGroupsFace[face],map); - } +} + + +void ASMs3DLag::generateThreadGroups (char lIndex) +{ + if (threadGroupsFace.find(lIndex) != threadGroupsFace.end()) return; + + const int p1 = svol->order(0); + const int p2 = svol->order(1); + const int p3 = svol->order(2); + const int n1 = (nx-1)/(p1-1); + const int n2 = (ny-1)/(p2-1); + const int n3 = (nz-1)/(p3-1); + + // Find elements that are on the boundary face 'lIndex' + IntVec map; map.reserve(this->getNoBoundaryElms(lIndex,2)); + int d1, d2, iel = 0; + for (int i3 = 1; i3 <= n3; i3++) + for (int i2 = 1; i2 <= n2; i2++) + for (int i1 = 1; i1 <= n1; i1++, iel++) + switch (lIndex) + { + case 1: if (i1 == 1) map.push_back(iel); break; + case 2: if (i1 == n1) map.push_back(iel); break; + case 3: if (i2 == 1) map.push_back(iel); break; + case 4: if (i2 == n2) map.push_back(iel); break; + case 5: if (i3 == 1) map.push_back(iel); break; + case 6: if (i3 == n3) map.push_back(iel); break; + } + + switch (lIndex) + { + case 1: + case 2: + d1 = n2; + d2 = n3; + break; + case 3: + case 4: + d1 = n1; + d2 = n3; + break; + default: + d1 = n1; + d2 = n2; + } + + utl::calcThreadGroups(d1,d2,threadGroupsFace[lIndex]); + utl::mapThreadGroups(threadGroupsFace[lIndex],map); } diff --git a/src/ASM/ASMs3DLag.h b/src/ASM/ASMs3DLag.h index 003ce70a..aea305ff 100644 --- a/src/ASM/ASMs3DLag.h +++ b/src/ASM/ASMs3DLag.h @@ -47,6 +47,17 @@ public: //! This is used to reinitialize the patch after it has been refined. virtual void clear(bool retainGeometry = false); + //! \brief Returns a matrix with nodal coordinates for an element. + //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes + //! in one element + //! \param[in] iel Element index + virtual bool getElementCoordinates(Matrix& X, int iel) const; + + //! \brief Returns a matrix with all nodal coordinates within the patch. + //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes + //! in the patch + virtual void getNodalCoordinates(Matrix& X) const; + //! \brief Returns the global coordinates for the given node. //! \param[in] inod 1-based node index local to current patch virtual Vec3 getCoord(size_t inod) const; @@ -134,26 +145,21 @@ public: virtual bool evalSolution(Matrix& sField, const Integrand& integrand, const RealArray* gpar, bool regular = true) const; - //! \brief Returns a matrix with nodal coordinates for an element. - //! \param[in] iel Element index - //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes - //! in one element - virtual bool getElementCoordinates(Matrix& X, int iel) const; - - //! \brief Generate thread groups - void generateThreadGroups(); -public: - //! \brief Returns a matrix with all nodal coordinates within the patch. - //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes - //! in the patch - virtual void getNodalCoordinates(Matrix& X) const; - //! \brief Returns the number of nodal points in each parameter direction. //! \param[out] n1 Number of nodes in first (u) direction //! \param[out] n2 Number of nodes in second (v) direction //! \param[out] n3 Number of nodes in third (w) direction virtual bool getSize(int& n1, int& n2, int& n3, int = 0) const; + //! \brief Generates element groups for multi-threading of interior integrals. + virtual void generateThreadGroups(); + //! \brief Generates element groups for multi-threading of boundary integrals. + //! \param[in] lIndex Local index [1,6] of the boundary face + virtual void generateThreadGroups(char lIndex); + + //! \brief Returns the number of elements on a boundary. + virtual size_t getNoBoundaryElms(char lIndex, char ldim) const; + private: size_t nx; //!< Number of nodes in first parameter direction size_t ny; //!< Number of nodes in second parameter direction diff --git a/src/ASM/ASMs3DSpec.C b/src/ASM/ASMs3DSpec.C index a380ab6c..184962ac 100644 --- a/src/ASM/ASMs3DSpec.C +++ b/src/ASM/ASMs3DSpec.C @@ -101,9 +101,6 @@ bool ASMs3DSpec::integrate (Integrand& integrand, if (!Legendre::basisDerivatives(p2,D2)) return false; if (!Legendre::basisDerivatives(p3,D3)) return false; - if (threadGroupsVol.empty()) - generateThreadGroups(); - // === Assembly loop over all elements in the patch ========================== @@ -133,6 +130,7 @@ bool ASMs3DSpec::integrate (Integrand& integrand, break; } + // --- Integration loop over all Gauss points in each direction -------- int count = 1; @@ -185,6 +183,15 @@ bool ASMs3DSpec::integrate (Integrand& integrand, int lIndex, { if (!svol) return true; // silently ignore empty patches + std::map::const_iterator tit; + if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end()) + { + std::cerr <<" *** ASMs3DSpec::integrate: No thread groups for face "<second; + // Find the parametric direction of the face normal {-3,-2,-1, 1, 2, 3} const int faceDir = (lIndex+1)/(lIndex%2 ? -2 : 2); @@ -212,33 +219,38 @@ bool ASMs3DSpec::integrate (Integrand& integrand, int lIndex, int nen = p[0]*p[1]*p[2]; + // === Assembly loop over all elements on the patch face ===================== - bool ok=true; - for (size_t g=0;ggetElementCoordinates(Xnod,++iel)) { + if (!this->getElementCoordinates(Xnod,++iel)) + { ok = false; break; } // Initialize element quantities - fe.iel = MLGE[iel-1]; + fe.iel = MLGE[iel-1]; LocalIntegral* A = integrand.getLocalIntegral(nen,fe.iel,true); - if (!integrand.initElementBou(MNPC[iel-1],*A)) { + if (!integrand.initElementBou(MNPC[iel-1],*A)) + { ok = false; break; } + // --- Integration loop over all Gauss points in each direction -------- for (int j = 0; j < p[t2-1]; j++) @@ -265,14 +277,16 @@ bool ASMs3DSpec::integrate (Integrand& integrand, int lIndex, // Evaluate the integrand and accumulate element contributions fe.detJxW *= wg[t1-1][i]*wg[t2-1][j]; - if (!integrand.evalBou(*A,fe,time,X,normal)) { + if (!integrand.evalBou(*A,fe,time,X,normal)) + { ok = false; break; } } // Assembly of global system integral - if (!glInt.assemble(A->ref(),fe.iel)) { + if (!glInt.assemble(A->ref(),fe.iel)) + { ok = false; break; } @@ -379,6 +393,7 @@ bool ASMs3DSpec::integrateEdge (Integrand& integrand, int lEdge, LocalIntegral* A = integrand.getLocalIntegral(nen,fe.iel,true); if (!integrand.initElementBou(MNPC[iel-1],*A)) return false; + // --- Integration loop over all Gauss points along the edge ----------- for (int i = 0; i < pe; i++) diff --git a/src/ASM/ASMs3Dmx.C b/src/ASM/ASMs3Dmx.C index 824458dc..035106fd 100644 --- a/src/ASM/ASMs3Dmx.C +++ b/src/ASM/ASMs3Dmx.C @@ -538,9 +538,6 @@ bool ASMs3Dmx::integrate (Integrand& integrand, const int nel2 = n2 - p2 + 1; const int nel3 = n3 - p3 + 1; - if (threadGroupsVol.empty()) - generateThreadGroups(); - // === Assembly loop over all elements in the patch ========================== @@ -664,6 +661,15 @@ bool ASMs3Dmx::integrate (Integrand& integrand, int lIndex, PROFILE2("ASMs3Dmx::integrate(B)"); + std::map::const_iterator tit; + if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end()) + { + std::cerr <<" *** ASMs3D::integrate: No thread groups for face "<< lIndex + << std::endl; + return false; + } + const utl::ThreadGroups& threadGrp = tit->second; + // Get Gaussian quadrature points and weights const double* xg = GaussQuadrature::getCoord(nGauss); const double* wg = GaussQuadrature::getWeight(nGauss); @@ -706,16 +712,13 @@ bool ASMs3Dmx::integrate (Integrand& integrand, int lIndex, const int nel1 = n1 - p1 + 1; const int nel2 = n2 - p2 + 1; - if (threadGroupsFace.empty()) - generateThreadGroups(); - // === Assembly loop over all elements on the patch face ===================== - bool ok=true; - for (size_t g=0;gorder(0)*basis1->order(1)*basis1->order(2), basis2->order(0)*basis2->order(1)*basis2->order(2)); fe.xi = fe.eta = fe.zeta = faceDir < 0 ? -1.0 : 1.0; @@ -726,8 +729,8 @@ bool ASMs3Dmx::integrate (Integrand& integrand, int lIndex, Matrix dN1du, dN2du, Xnod, Jac; Vec4 X; Vec3 normal; - for (size_t l=0;l::const_iterator tit; + if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end()) + { + std::cerr <<" *** ASMs3DLag::integrate: No thread groups for face "<< lIndex + << std::endl; + return false; + } + const utl::ThreadGroups& threadGrp = tit->second; + // Get Gaussian quadrature points and weights const double* xg = GaussQuadrature::getCoord(nGauss); const double* wg = GaussQuadrature::getWeight(nGauss); @@ -389,23 +395,20 @@ bool ASMs3DmxLag::integrate (Integrand& integrand, int lIndex, const int nel1 = (nx2-1)/(p1-1); const int nel2 = (ny2-1)/(p2-1); - if (threadGroupsFace.empty()) - generateThreadGroups(); - // === Assembly loop over all elements on the patch face ===================== - bool ok=true; - for (size_t g=0;g& ignored, bool fixDup) std::cout <<"\nResolving Dirichlet boundary conditions"<< std::endl; bool ok = true; - for (PropertyVec::const_iterator p = myProps.begin(); p != myProps.end(); p++) - switch (p->pcode) { + PropertyVec::const_iterator q; + for (q = myProps.begin(); q != myProps.end(); q++) + switch (q->pcode) { case Property::DIRICHLET: - if (this->addConstraint(p->patch,p->lindx,p->ldim,p->pindx)) + if (this->addConstraint(q->patch,q->lindx,q->ldim,q->pindx)) std::cout << std::endl; else ok = false; break; case Property::DIRICHLET_INHOM: - if (this->addConstraint(p->patch,p->lindx,p->ldim,p->pindx%1000,p->pindx)) + if (this->addConstraint(q->patch,q->lindx,q->ldim,q->pindx%1000,q->pindx)) std::cout << std::endl; else ok = false; @@ -700,6 +701,15 @@ bool SIMbase::preprocess (const std::vector& ignored, bool fixDup) // Resolve possibly chaining of the MPC equations if (!allMPCs.empty()) ASMbase::resolveMPCchains(allMPCs); + // Generate element groups for multi-threading + for (mit = myModel.begin(); mit != myModel.end(); mit++) + (*mit)->generateThreadGroups(); + for (q = myProps.begin(); q != myProps.end(); q++) + if (q->pcode == Property::NEUMANN) + if (q->patch > 0 && q->patch <= myModel.size()) + if (q->ldim+1 == myModel[q->patch-1]->getNoParamDim()) + myModel[q->patch-1]->generateThreadGroups(q->lindx); + // Preprocess the result points for (ResPointVec::iterator p = myPoints.begin(); p != myPoints.end();) {