Changed: Moved generation of thread groups to SIMbase::preprocess. For volume faces, only generate thread groups for those subjected to boundary integrals (i.e., the Neumann boundaries).

git-svn-id: http://svn.sintef.no/trondheim/IFEM/trunk@1431 e10b68d5-8a6e-419e-a041-bce267b0401d
2012-01-24 12:33:36 +00:00 · 2012-01-24 12:33:36 +00:00 · 8a33cad300
commit 8a33cad300
parent 3b65885640
16 changed files with 376 additions and 250 deletions
--- a/src/ASM/ASMbase.h
+++ b/src/ASM/ASMbase.h
@ -252,6 +252,11 @@ public:
  //! \brief Computes the number of boundary integration points in this patch.
  virtual void getNoBouPoints(size_t& nPt, char ldim, char lindx) = 0;

+  //! \brief Generates element groups for multi-threading of interior integrals.
+  virtual void generateThreadGroups() {}
+  //! \brief Generates element groups for multi-threading of boundary integrals.
+  virtual void generateThreadGroups(char) {}
+

  // Methods for integration of finite element quantities.
  // These are the main computational methods of the ASM class hierarchy.
--- a/src/ASM/ASMs2D.C
+++ b/src/ASM/ASMs2D.C
@ -1008,9 +1008,6 @@ bool ASMs2D::integrate (Integrand& integrand,
  const int n1 = surf->numCoefs_u();
  const int nel1 = n1 - p1 + 1;

-  if (threadGroups.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements in the patch ==========================

@ -1046,10 +1043,17 @@ bool ASMs2D::integrate (Integrand& integrand,
          break;
        }

-        // Compute characteristic element length, if needed
        if (integrand.getIntegrandType() == 2)
+        {
+          // Compute characteristic element length
          fe.h = getElmSize(p1,p2,Xnod);

+          // Element size in parametric space
+          int inod = MNPC[iel-1].back();
+          dXidu[0] = surf->knotSpan(0,nodeInd[inod].I);
+          dXidu[1] = surf->knotSpan(1,nodeInd[inod].J);
+        }
+
        else if (integrand.getIntegrandType() == 3)
        {
          // --- Compute average value of basis functions over the element -----
@ -1088,11 +1092,6 @@ bool ASMs2D::integrate (Integrand& integrand,
            X[i] = X0[i];
        }

-        // TODO: Do this conditionally (only when the CFD solver requires it)
-        int inod = MNPC[iel-1].back();
-        dXidu[0] = surf->knotSpan(0,nodeInd[inod].I);
-        dXidu[1] = surf->knotSpan(1,nodeInd[inod].J);
-
        // Initialize element quantities
        LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel);
        if (!integrand.initElement(MNPC[iel-1],X,nRed*nRed,*A))
@ -1167,14 +1166,15 @@ bool ASMs2D::integrate (Integrand& integrand,

            // Compute Hessian of coordinate mapping and 2nd order derivatives
            if (integrand.getIntegrandType() == 2)
+            {
              if (!utl::Hessian(Hess,fe.d2NdX2,Jac,Xnod,d2Ndu2,dNdu))
              {
                ok = false;
                break;
              }

-	    // RUNAR: Do this conditionally (when the CFD solver requires it)
-	    utl::getGmat(Jac,dXidu,fe.G);
+              utl::getGmat(Jac,dXidu,fe.G);
+            }

 #if SP_DEBUG > 4
            std::cout <<"\niel, ip = "<< iel <<" "<< ip
@ -1716,23 +1716,26 @@ bool ASMs2D::evalSolution (Matrix& sField, const Integrand& integrand,
 }


-void ASMs2D::generateThreadGroups()
+void ASMs2D::generateThreadGroups ()
 {
  const int p1 = surf->order_u();
  const int p2 = surf->order_v();
  const int n1 = surf->numCoefs_u();
  const int n2 = surf->numCoefs_v();
+
  const int nel1 = n1 - p1 + 1;
  const int nel2 = n2 - p2 + 1;

  utl::calcThreadGroups(nel1,nel2,threadGroups);
+  if (threadGroups.size() < 2) return;

-  if (threadGroups.size() > 1)
-    for (size_t i = 0; i < threadGroups.size(); i++) {
-      std::cout <<" Thread group "<< i+1;
-      for (size_t j = 0; j < threadGroups[i].size(); j++)
-	std::cout <<"\n\t thread "<< j+1
-		  << ": "<< threadGroups[i][j].size() <<" elements";
-      std::cout << std::endl;
-    }
+  std::cout <<"\nMultiple threads are utilized during element assembly.";
+  for (size_t i = 0; i < threadGroups.size(); i++)
+  {
+    std::cout <<"\n Thread group "<< i+1;
+    for (size_t j = 0; j < threadGroups[i].size(); j++)
+      std::cout <<"\n\tthread "<< j+1
+		<< ": "<< threadGroups[i][j].size() <<" elements";
+  }
+  std::cout << std::endl;
 }
--- a/src/ASM/ASMs2D.h
+++ b/src/ASM/ASMs2D.h
@ -101,7 +101,7 @@ public:
  //! This is used to reinitialize the patch after it has been refined.
  virtual void clear(bool retainGeometry = false);

-    //! \brief Returns a matrix with nodal coordinates for an element.
+  //! \brief Returns a matrix with nodal coordinates for an element.
  //! \param[in] iel Element index
  //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
  //! in one element
@ -346,6 +346,9 @@ protected:
  //! \param[in] basis Which basis to return size parameters for (mixed methods)
  virtual bool getSize(int& n1, int& n2, int basis = 0) const;

+  //! \brief Generates element groups for multi-threading of interior integrals.
+  virtual void generateThreadGroups();
+
  //! \brief Establishes matrices with basis functions and 1st derivatives.
  static void extractBasis(const Go::BasisDerivsSf& spline,
 			   Vector& N, Matrix& dNdu);
@ -377,11 +380,8 @@ protected:
  const IndexVec& nodeInd; //!< IJ-pairs for the control points (nodes)
  IndexVec      myNodeInd; //!< The actual IJ-pair container

-  //! Element groups for multithreaded assembly
+  //! Element groups for multi-threaded assembly
  utl::ThreadGroups threadGroups;
-
-  //! \brief Generate thread groups
-  virtual void generateThreadGroups();
 };

 #endif
--- a/src/ASM/ASMs2DLag.C
+++ b/src/ASM/ASMs2DLag.C
@ -187,6 +187,27 @@ bool ASMs2DLag::getSize (int& n1, int& n2, int) const
 }


+size_t ASMs2DLag::getNoBoundaryElms (char lIndex, char ldim) const
+{
+  if (!surf) return 0;
+
+  if (ldim < 1 && lIndex > 0)
+    return 1;
+
+  switch (lIndex)
+    {
+    case 1:
+    case 2:
+      return (ny-1)/(surf->order_v()-1);
+    case 3:
+    case 4:
+      return (nx-1)/(surf->order_u()-1);
+    }
+
+  return 0;
+}
+
+
 bool ASMs2DLag::integrate (Integrand& integrand,
 			   GlobalIntegral& glInt,
 			   const TimeDomain& time)
@ -218,9 +239,6 @@ bool ASMs2DLag::integrate (Integrand& integrand,
  const int p1 = surf->order_u();
  const int p2 = surf->order_v();

-  if (threadGroups.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements in the patch ==========================

@ -631,18 +649,13 @@ bool ASMs2DLag::evalSolution (Matrix& sField, const Integrand& integrand,
 }


-void ASMs2DLag::generateThreadGroups()
+void ASMs2DLag::generateThreadGroups ()
 {
  const int p1 = surf->order_u();
  const int p2 = surf->order_v();

-  // Evaluate the parametric values
-  RealArray gpar[2];
-  getGridParameters(gpar[0],0,p1-1);
-  getGridParameters(gpar[1],1,p2-1);
-
-  const int nel1 = (gpar[0].size()-1)/(p1-1);
-  const int nel2 = (gpar[1].size()-1)/(p2-1);
+  const int nel1 = (nx-1)/(p1-1);
+  const int nel2 = (ny-1)/(p2-1);

  utl::calcThreadGroups(nel1,nel2,threadGroups);
 }
--- a/src/ASM/ASMs2DLag.h
+++ b/src/ASM/ASMs2DLag.h
@ -48,9 +48,9 @@ public:
  virtual void clear(bool retainGeometry = false);

  //! \brief Returns a matrix with nodal coordinates for an element.
-  //! \param[in] iel Element index
  //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
  //! in one element
+  //! \param[in] iel Element index
  virtual bool getElementCoordinates(Matrix& X, int iel) const;

  //! \brief Returns a matrix with all nodal coordinates within the patch.
@ -137,15 +137,17 @@ public:
  virtual bool evalSolution(Matrix& sField, const Integrand& integrand,
 			    const RealArray* gpar, bool regular = true) const;

-protected:
-  //! \brief Generate thread groups
-  void generateThreadGroups();
-public:
  //! \brief Returns the number of nodal points in each parameter direction.
  //! \param[out] n1 Number of nodes in first (u) direction
  //! \param[out] n2 Number of nodes in second (v) direction
  virtual bool getSize(int& n1, int& n2, int = 0) const;

+  //! \brief Generates element groups for multi-threading of interior integrals.
+  virtual void generateThreadGroups();
+
+  //! \brief Returns the number of elements on a boundary.
+  virtual size_t getNoBoundaryElms(char lIndex, char ldim) const;
+
 private:
  size_t nx; //!< Number of nodes in first parameter direction
  size_t ny; //!< Number of nodes in second parameter direction
--- a/src/ASM/ASMs2DSpec.C
+++ b/src/ASM/ASMs2DSpec.C
@ -96,9 +96,6 @@ bool ASMs2DSpec::integrate (Integrand& integrand,
  if (!Legendre::basisDerivatives(p1,D1)) return false;
  if (!Legendre::basisDerivatives(p2,D2)) return false;

-  if (threadGroups.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements in the patch ==========================

@ -113,7 +110,8 @@ bool ASMs2DSpec::integrate (Integrand& integrand,
        int iel = threadGroups[g][t][e]+1;

        // Set up control point coordinates for current element
-        if (!this->getElementCoordinates(Xnod,iel)) {
+        if (!this->getElementCoordinates(Xnod,iel))
+        {
          ok = false;
          break;
        }
@ -121,11 +119,13 @@ bool ASMs2DSpec::integrate (Integrand& integrand,
        // Initialize element quantities
        fe.iel = MLGE[iel-1];
        LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel);
-        if (!integrand.initElement(MNPC[iel-1],*A)) {
+        if (!integrand.initElement(MNPC[iel-1],*A))
+        {
          ok = false;
          break;
        }

+
        // --- Integration loop over all Gauss points in each direction --------

        int count = 1;
@ -147,14 +147,16 @@ bool ASMs2DSpec::integrate (Integrand& integrand,

            // Evaluate the integrand and accumulate element contributions
            fe.detJxW *= wg1(i)*wg2(j);
-            if (!integrand.evalInt(*A,fe,time,X)) {
+            if (!integrand.evalInt(*A,fe,time,X))
+            {
              ok = false;
              break;
            }
          }

        // Assembly of global system integral
-        if (!glInt.assemble(A->ref(),fe.iel)) {
+        if (!glInt.assemble(A->ref(),fe.iel))
+        {
          ok = false;
          break;
        }
@ -235,6 +237,7 @@ bool ASMs2DSpec::integrate (Integrand& integrand, int lIndex,
      LocalIntegral* A = integrand.getLocalIntegral(nen,fe.iel,true);
      if (!integrand.initElementBou(MNPC[iel-1],*A)) return false;

+
      // --- Integration loop over all Gauss points along the edge -------------

      for (int i = 0; i < p[t2-1]; i++)
--- a/src/ASM/ASMs2Dmx.C
+++ b/src/ASM/ASMs2Dmx.C
@ -506,9 +506,6 @@ bool ASMs2Dmx::integrate (Integrand& integrand,
  const int n1 = surf->numCoefs_u();
  const int nel1 = n1 - p1 + 1;

-  if (threadGroups.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements in the patch ==========================

--- a/src/ASM/ASMs2DmxLag.C
+++ b/src/ASM/ASMs2DmxLag.C
@ -231,9 +231,6 @@ bool ASMs2DmxLag::integrate (Integrand& integrand,
  const int q1 = p1 - 1;
  const int q2 = p2 - 1;

-  if (threadGroups.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements in the patch ==========================

--- a/src/ASM/ASMs3D.C
+++ b/src/ASM/ASMs3D.C
@ -1025,6 +1025,8 @@ bool ASMs3D::getSize (int& n1, int& n2, int& n3, int) const

 size_t ASMs3D::getNoBoundaryElms (char lIndex, char ldim) const
 {
+  if (!svol) return 0;
+
  if (ldim < 1 && lIndex > 0)
    return 1;
  else if (ldim < 2 && lIndex > 0 && lIndex <= 12)
@ -1222,9 +1224,6 @@ bool ASMs3D::integrate (Integrand& integrand,
  const int nel1 = n1 - p1 + 1;
  const int nel2 = n2 - p2 + 1;

-  if (threadGroupsVol.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements in the patch ==========================

@ -1261,10 +1260,18 @@ bool ASMs3D::integrate (Integrand& integrand,
          break;
        }

-        // Compute characteristic element length, if needed
        if (integrand.getIntegrandType() == 2)
+        {
+          // Compute characteristic element length
          fe.h = getElmSize(p1,p2,p3,Xnod);

+          // Element size in parametric space
+          int inod = MNPC[iel-1].back();
+          dXidu[0] = svol->knotSpan(0,nodeInd[inod].I);
+          dXidu[1] = svol->knotSpan(1,nodeInd[inod].J);
+          dXidu[2] = svol->knotSpan(2,nodeInd[inod].K);
+        }
+
        else if (integrand.getIntegrandType() == 3)
        {
          // --- Compute average value of basis functions over the element -----
@ -1306,13 +1313,7 @@ bool ASMs3D::integrate (Integrand& integrand,
          X.z = X0[2];
        }

-	// TODO: Do this conditionally (only when the CFD solver requires it)
-	int inod = MNPC[iel-1].back();
-	dXidu[0] = svol->knotSpan(0,nodeInd[inod].I);
-	dXidu[1] = svol->knotSpan(1,nodeInd[inod].J);
-	dXidu[2] = svol->knotSpan(2,nodeInd[inod].K);
-
-	// Initialize element quantities
+        // Initialize element quantities
        LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel);
        if (!integrand.initElement(MNPC[iel-1],X,nRed*nRed*nRed,*A))
        {
@ -1391,14 +1392,15 @@ bool ASMs3D::integrate (Integrand& integrand,

              // Compute Hessian of coordinate mapping and 2nd order derivatives
              if (integrand.getIntegrandType() == 2)
+              {
                if (!utl::Hessian(Hess,fe.d2NdX2,Jac,Xnod,d2Ndu2,dNdu))
                {
                  ok = false;
                  break;
                }

-              // RUNAR: Do this conditionally (when the CFD solver requires it)
-              utl::getGmat(Jac,dXidu,fe.G);
+                utl::getGmat(Jac,dXidu,fe.G);
+              }

 #if SP_DEBUG > 4
              std::cout <<"\niel, ip = "<< iel <<" "<< ip
@ -1418,16 +1420,16 @@ bool ASMs3D::integrate (Integrand& integrand,
              }
            }

-	// Finalize the element quantities
-	if (!integrand.finalizeElement(*A,time,firstIp+jp))
-	{
+        // Finalize the element quantities
+        if (!integrand.finalizeElement(*A,time,firstIp+jp))
+        {
          ok = false;
          break;
        }

        // Assembly of global system integral
        if (!glInt.assemble(A->ref(),fe.iel))
-	{
+        {
          ok = false;
          break;
        }
@ -1449,6 +1451,15 @@ bool ASMs3D::integrate (Integrand& integrand, int lIndex,

  PROFILE2("ASMs3D::integrate(B)");

+  std::map<char,utl::ThreadGroups>::const_iterator tit;
+  if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end())
+  {
+    std::cerr <<" *** ASMs3D::integrate: No thread groups for face "<< lIndex
+	      << std::endl;
+    return false;
+  }
+  const utl::ThreadGroups& threadGrp = tit->second;
+
  // Get Gaussian quadrature points and weights
  const double* xg = GaussQuadrature::getCoord(nGauss);
  const double* wg = GaussQuadrature::getWeight(nGauss);
@ -1493,16 +1504,13 @@ bool ASMs3D::integrate (Integrand& integrand, int lIndex,
  const int nel1 = n1 - p1 + 1;
  const int nel2 = n2 - p2 + 1;

-  if (threadGroupsFace.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements on the patch face =====================

-  bool ok=true;
-  for (size_t g=0;g<threadGroupsFace[lIndex-1].size() && ok;++g) {
+  bool ok = true;
+  for (size_t g = 0; g < threadGrp.size() && ok; ++g) {
 #pragma omp parallel for schedule(static)
-    for (size_t t=0;t<threadGroupsFace[lIndex-1][g].size();++t) {
+    for (size_t t = 0; t < threadGrp[g].size(); ++t) {
      FiniteElement fe(p1*p2*p3);
      fe.xi = fe.eta = fe.zeta = faceDir < 0 ? -1.0 : 1.0;
      fe.u = gpar[0](1,1);
@ -1512,8 +1520,8 @@ bool ASMs3D::integrate (Integrand& integrand, int lIndex,
      Matrix dNdu, Xnod, Jac;
      Vec4   X;
      Vec3   normal;
-      for (size_t l=0;l<threadGroupsFace[lIndex-1][g][t].size();++l) {
-        int iel = threadGroupsFace[lIndex-1][g][t][l];
+      for (size_t l = 0; l < threadGrp[g][t].size(); ++l) {
+        int iel = threadGrp[g][t][l];
        fe.iel = MLGE[iel];
        if (fe.iel < 1) continue; // zero-volume element

@ -2170,60 +2178,81 @@ bool ASMs3D::evalSolution (Matrix& sField, const Integrand& integrand,
 }


-void ASMs3D::generateThreadGroups()
+void ASMs3D::generateThreadGroups ()
 {
+  const int nel1 = svol->numCoefs(0) - svol->order(0) + 1;
+  const int nel2 = svol->numCoefs(1) - svol->order(1) + 1;
+  const int nel3 = svol->numCoefs(2) - svol->order(2) + 1;
+
+  utl::calcThreadGroups(nel1,nel2,nel3,threadGroupsVol);
+  if (threadGroupsVol.size() < 2) return;
+
+  std::cout <<"\nMultiple threads are utilized during element assembly.";
+  for (size_t i = 0; i < threadGroupsVol.size(); i++)
+  {
+    std::cout <<"\n Thread group "<< i+1;
+    for (size_t j = 0; j < threadGroupsVol[i].size(); j++)
+      std::cout <<"\n\tthread "<< j+1
+		<< ": "<< threadGroupsVol[i][j].size() <<" elements";
+  }
+  std::cout << std::endl;
+}
+
+
+void ASMs3D::generateThreadGroups (char lIndex)
+{
+  if (threadGroupsFace.find(lIndex) != threadGroupsFace.end()) return;
+
  const int p1 = svol->order(0);
  const int p2 = svol->order(1);
  const int p3 = svol->order(2);
  const int n1 = svol->numCoefs(0);
  const int n2 = svol->numCoefs(1);
  const int n3 = svol->numCoefs(2);
-  const int nel1 = n1 - p1 + 1;
-  const int nel2 = n2 - p2 + 1;
-  const int nel3 = n3 - p3 + 1;

-  utl::calcThreadGroups(nel1,nel2,nel3,threadGroupsVol);
-
-  // now the faces
-  threadGroupsFace.resize(6);
-  for (int face=0;face<6;++face) {
-    const int faceDir = (face+2)/((face+1)%2 ? -2 : 2);
-    std::vector<int> map;
-    int iel=0;
-    for (int i3 = p3; i3 <= n3; i3++) {
-      for (int i2 = p2; i2 <= n2; i2++) {
-        for (int i1 = p1; i1 <= n1; i1++, iel++) {
-          // Skip elements that are not on current boundary face
-          bool skipMe = false;
-          switch (faceDir)
+  // Find elements that are on the boundary face 'lIndex'
+  IntVec map; map.reserve(this->getNoBoundaryElms(lIndex,2));
+  int d1, d2, iel = 0;
+  for (int i3 = p3; i3 <= n3; i3++)
+    for (int i2 = p2; i2 <= n2; i2++)
+      for (int i1 = p1; i1 <= n1; i1++, iel++)
+	switch (lIndex)
          {
-            case -1: if (i1 > p1) skipMe = true; break;
-            case  1: if (i1 < n1) skipMe = true; break;
-            case -2: if (i2 > p2) skipMe = true; break;
-            case  2: if (i2 < n2) skipMe = true; break;
-            case -3: if (i3 > p3) skipMe = true; break;
-            case  3: if (i3 < n3) skipMe = true; break;
+	  case 1: if (i1 == p1) map.push_back(iel); break;
+	  case 2: if (i1 == n1) map.push_back(iel); break;
+	  case 3: if (i2 == p2) map.push_back(iel); break;
+	  case 4: if (i2 == n2) map.push_back(iel); break;
+	  case 5: if (i3 == p3) map.push_back(iel); break;
+	  case 6: if (i3 == n3) map.push_back(iel); break;
          }
-          if (skipMe)
-            continue;

-          map.push_back(iel);
-        }
-      }
+  switch (lIndex)
+    {
+    case 1:
+    case 2:
+      d1 = n2 - p2 + 1;
+      d2 = n3 - p3 + 1;
+      break;
+    case 3:
+    case 4:
+      d1 = n1 - p1 + 1;
+      d2 = n3 - p3 + 1;
+      break;
+    default:
+      d1 = n1 - p1 + 1;
+      d2 = n2 - p2 + 1;
    }

-    int d1, d2;
-    if (face < 2) {
-      d1 = nel2;
-      d2 = nel3;
-    } else if (face < 4) {
-      d1 = nel1;
-      d2 = nel3;
-    } else {
-      d1 = nel1;
-      d2 = nel2;
+  utl::ThreadGroups& fGrp = threadGroupsFace[lIndex];
+  utl::calcThreadGroups(d1,d2,fGrp);
+  utl::mapThreadGroups(fGrp,map);
+
+  if (fGrp.size() > 1)
+    for (size_t i = 0; i < threadGroupsVol.size(); i++)
+    {
+      std::cout <<"\n Thread group "<< i+1 <<" for boundary face "<<(int)lIndex;
+      for (size_t j = 0; j < fGrp.size(); j++)
+	std::cout <<"\n\tthread "<< j+1
+		  << ": "<< fGrp[i][j].size() <<" elements";
    }
-    utl::calcThreadGroups(d1,d2,threadGroupsFace[face]);
-    utl::mapThreadGroups(threadGroupsFace[face],map);
-  }
 }
--- a/src/ASM/ASMs3D.h
+++ b/src/ASM/ASMs3D.h
@ -337,6 +337,7 @@ public:
  //! \param[in] integrand Object with problem-specific data and methods
  Go::SplineVolume* projectSolution(const Integrand& integrand) const;
  //! \brief Projects the secondary solution field onto the primary basis.
+  //! \param[in] integrand Object with problem-specific data and methods
  virtual Go::GeomObject* evalSolution(const Integrand& integrand) const;

  //! \brief Evaluates the secondary solution field at the given points.
@ -352,8 +353,8 @@ public:
  //! \a gpar[0].size() \a X \a gpar[1].size() \a X \a gpar[2].size().
  //! Otherwise, we assume that it contains the \a u, \a v and \a w parameters
  //! directly for each sampling point.
-  bool evalSolution(Matrix& sField, const Integrand& integrand,
-		    const RealArray* gpar, bool regular = true) const;
+  virtual bool evalSolution(Matrix& sField, const Integrand& integrand,
+			    const RealArray* gpar, bool regular = true) const;

  //! \brief Calculates parameter values for visualization nodal points.
  //! \param[out] prm Parameter values in given direction for all points
@ -405,6 +406,12 @@ protected:
  //! \param[in] basis Which basis to return size parameters for (mixed methods)
  virtual bool getSize(int& n1, int& n2, int& n3, int basis = 0) const;

+  //! \brief Generates element groups for multi-threading of interior integrals.
+  virtual void generateThreadGroups();
+  //! \brief Generates element groups for multi-threading of boundary integrals.
+  //! \param[in] lIndex Local index [1,6] of the boundary face
+  virtual void generateThreadGroups(char lIndex);
+
  //! \brief Establishes matrices with basis functions and 1st derivatives.
  static void extractBasis(const Go::BasisDerivs& spline,
 			   Vector& N, Matrix& dNdu);
@ -438,13 +445,10 @@ protected:
  const IndexVec& nodeInd; //!< IJK-triplets for the control points (nodes)
  IndexVec      myNodeInd; //!< The actual IJK-triplet container

-  //! Element groups for multithreaded volume assembly
+  //! Element groups for multi-threaded volume assembly
  utl::ThreadGroups threadGroupsVol;
-  //! Element groups for multithreaded face assembly
-  std::vector<utl::ThreadGroups> threadGroupsFace;
-
-  //! \brief Generate thread groups
-  virtual void generateThreadGroups();
+  //! Element groups for multi-threaded face assembly
+  std::map<char,utl::ThreadGroups> threadGroupsFace;
 };

 #endif
--- a/src/ASM/ASMs3DLag.C
+++ b/src/ASM/ASMs3DLag.C
@ -201,6 +201,38 @@ bool ASMs3DLag::getSize (int& n1, int& n2, int& n3, int) const
 }


+size_t ASMs3DLag::getNoBoundaryElms (char lIndex, char ldim) const
+{
+  if (!svol) return 0;
+
+  if (ldim < 1 && lIndex > 0)
+    return 1;
+
+  int nel[3]; // Number of elements in each direction
+  nel[0] = (nx-1)/(svol->order(0)-1);
+  nel[1] = (ny-1)/(svol->order(1)-1);
+  nel[2] = (nz-1)/(svol->order(2)-1);
+
+  if (ldim < 2 && lIndex > 0 && lIndex <= 12)
+    return nel[(lIndex-1)/4];
+
+  switch (lIndex)
+    {
+    case 1:
+    case 2:
+      return nel[1]*nel[2];
+    case 3:
+    case 4:
+      return nel[0]*nel[2];
+    case 5:
+    case 6:
+      return nel[0]*nel[1];
+    }
+
+  return 0;
+}
+
+
 bool ASMs3DLag::integrate (Integrand& integrand,
 			   GlobalIntegral& glInt,
 			   const TimeDomain& time)
@ -235,9 +267,6 @@ bool ASMs3DLag::integrate (Integrand& integrand,
  const int p2 = svol->order(1);
  const int p3 = svol->order(2);

-  if (threadGroupsVol.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements in the patch ==========================

@ -272,7 +301,7 @@ bool ASMs3DLag::integrate (Integrand& integrand,
          X *= 1.0/(double)Xnod.cols();
        }

-	// Initialize element quantities
+        // Initialize element quantities
        fe.iel = MLGE[iel-1];
        LocalIntegral* A = integrand.getLocalIntegral(fe.N.size(),fe.iel);
        if (!integrand.initElement(MNPC[iel-1],X,nRed*nRed*nRed,*A))
@ -367,8 +396,8 @@ bool ASMs3DLag::integrate (Integrand& integrand,
              }
            }

-	// Finalize the element quantities
- 	if (!integrand.finalizeElement(*A,time,firstIp+jp))
+        // Finalize the element quantities
+        if (!integrand.finalizeElement(*A,time,firstIp+jp))
        {
          ok = false;
          break;
@ -396,6 +425,15 @@ bool ASMs3DLag::integrate (Integrand& integrand, int lIndex,
 {
  if (!svol) return true; // silently ignore empty patches

+  std::map<char,utl::ThreadGroups>::const_iterator tit;
+  if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end())
+  {
+    std::cerr <<" *** ASMs3DLag::integrate: No thread groups for face "<< lIndex
+	      << std::endl;
+    return false;
+  }
+  const utl::ThreadGroups& threadGrp = tit->second;
+
  // Get Gaussian quadrature points and weights
  const double* xg = GaussQuadrature::getCoord(nGauss);
  const double* wg = GaussQuadrature::getWeight(nGauss);
@ -430,16 +468,13 @@ bool ASMs3DLag::integrate (Integrand& integrand, int lIndex,
  if (vpar.empty()) this->getGridParameters(vpar,1,1);
  if (wpar.empty()) this->getGridParameters(wpar,2,1);

-  if (threadGroupsFace.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements on the patch face =====================

-  bool ok=true;
-  for (size_t g=0;g<threadGroupsFace[lIndex-1].size() && ok;++g) {
+  bool ok = true;
+  for (size_t g = 0; g < threadGrp.size() && ok; ++g) {
 #pragma omp parallel for schedule(static)
-    for (size_t t=0;t<threadGroupsFace[lIndex-1][g].size();++t) {
+    for (size_t t = 0; t < threadGrp[g].size(); ++t) {
      FiniteElement fe(p1*p2*p3);
      fe.u = upar.front();
      fe.v = vpar.front();
@ -449,8 +484,8 @@ bool ASMs3DLag::integrate (Integrand& integrand, int lIndex,
      Vec4   X;
      Vec3   normal;
      double xi[3];
-      for (size_t l=0;l<threadGroupsFace[lIndex-1][g][t].size();++l) {
-        int iel = threadGroupsFace[lIndex-1][g][t][l];
+      for (size_t l = 0; l < threadGrp[g][t].size(); ++l) {
+        int iel = threadGrp[g][t][l];
        int i1  =  iel % nel1;
        int i2  = (iel / nel1) % nel2;
        int i3  =  iel / (nel1*nel2);
@ -542,8 +577,8 @@ bool ASMs3DLag::integrate (Integrand& integrand, int lIndex,
            }
 	  }

-	// Assembly of global system integral
-	if (!glInt.assemble(A->ref(),fe.iel))
+        // Assembly of global system integral
+        if (!glInt.assemble(A->ref(),fe.iel))
        {
          ok = false;
          break;
@ -840,63 +875,64 @@ bool ASMs3DLag::evalSolution (Matrix& sField, const Integrand& integrand,
 }


-void ASMs3DLag::generateThreadGroups()
+void ASMs3DLag::generateThreadGroups ()
 {
  const int p1 = svol->order(0);
  const int p2 = svol->order(1);
  const int p3 = svol->order(2);

-  // Evaluate the parametric values
-  RealArray gpar[3];
-  getGridParameters(gpar[0],0,p1-1);
-  getGridParameters(gpar[1],1,p2-1);
-  getGridParameters(gpar[2],2,p3-1);
-
-  const int nel1 = (gpar[0].size()-1)/(p1-1);
-  const int nel2 = (gpar[1].size()-1)/(p2-1);
-  const int nel3 = (gpar[2].size()-1)/(p3-1);
+  const int nel1 = (nx-1)/(p1-1);
+  const int nel2 = (ny-1)/(p2-1);
+  const int nel3 = (nz-1)/(p3-1);

  utl::calcThreadGroups(nel1,nel2,nel3,threadGroupsVol);
-
-  // now the faces
-  threadGroupsFace.resize(6);
-  for (int face=0;face<6;++face) {
-    const int faceDir = (face+2)/((face+1)%2 ? -2 : 2);
-    std::vector<int> map;
-    int iel=0;
-    for (int i3 = 0; i3 < nel3; i3++) {
-      for (int i2 = 0; i2 < nel2; i2++) {
-        for (int i1 = 0; i1 < nel1; i1++, iel++) {
-          // Skip elements that are not on current boundary face
-          bool skipMe = false;
-          switch (faceDir)
-          {
-            case -1: if (i1 > 0)      skipMe = true; break;
-            case  1: if (i1 < nel1-1) skipMe = true; break;
-            case -2: if (i2 > 0)      skipMe = true; break;
-            case  2: if (i2 < nel2-1) skipMe = true; break;
-            case -3: if (i3 > 0)      skipMe = true; break;
-            case  3: if (i3 < nel3-1) skipMe = true; break;
-          }
-          if (skipMe)
-            continue;
-
-          map.push_back(iel);
-        }
-      }
-    }
-    int d1, d2;
-    if (face < 2) {
-      d1 = nel2;
-      d2 = nel3;
-    } else if (face < 4) {
-      d1 = nel1;
-      d2 = nel3;
-    } else {
-      d1 = nel1;
-      d2 = nel2;
-    }
-    utl::calcThreadGroups(d1,d2,threadGroupsFace[face]);
-    utl::mapThreadGroups(threadGroupsFace[face],map);
-  }
+}
+
+
+void ASMs3DLag::generateThreadGroups (char lIndex)
+{
+  if (threadGroupsFace.find(lIndex) != threadGroupsFace.end()) return;
+
+  const int p1 = svol->order(0);
+  const int p2 = svol->order(1);
+  const int p3 = svol->order(2);
+  const int n1 = (nx-1)/(p1-1);
+  const int n2 = (ny-1)/(p2-1);
+  const int n3 = (nz-1)/(p3-1);
+
+  // Find elements that are on the boundary face 'lIndex'
+  IntVec map; map.reserve(this->getNoBoundaryElms(lIndex,2));
+  int d1, d2, iel = 0;
+  for (int i3 = 1; i3 <= n3; i3++)
+    for (int i2 = 1; i2 <= n2; i2++)
+      for (int i1 = 1; i1 <= n1; i1++, iel++)
+	switch (lIndex)
+          {
+	  case 1: if (i1 ==  1) map.push_back(iel); break;
+	  case 2: if (i1 == n1) map.push_back(iel); break;
+	  case 3: if (i2 ==  1) map.push_back(iel); break;
+	  case 4: if (i2 == n2) map.push_back(iel); break;
+	  case 5: if (i3 ==  1) map.push_back(iel); break;
+	  case 6: if (i3 == n3) map.push_back(iel); break;
+          }
+
+  switch (lIndex)
+    {
+    case 1:
+    case 2:
+      d1 = n2;
+      d2 = n3;
+      break;
+    case 3:
+    case 4:
+      d1 = n1;
+      d2 = n3;
+      break;
+    default:
+      d1 = n1;
+      d2 = n2;
+    }
+
+  utl::calcThreadGroups(d1,d2,threadGroupsFace[lIndex]);
+  utl::mapThreadGroups(threadGroupsFace[lIndex],map);
 }
--- a/src/ASM/ASMs3DLag.h
+++ b/src/ASM/ASMs3DLag.h
@ -47,6 +47,17 @@ public:
  //! This is used to reinitialize the patch after it has been refined.
  virtual void clear(bool retainGeometry = false);

+  //! \brief Returns a matrix with nodal coordinates for an element.
+  //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
+  //! in one element
+  //! \param[in] iel Element index
+  virtual bool getElementCoordinates(Matrix& X, int iel) const;
+
+  //! \brief Returns a matrix with all nodal coordinates within the patch.
+  //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
+  //! in the patch
+  virtual void getNodalCoordinates(Matrix& X) const;
+
  //! \brief Returns the global coordinates for the given node.
  //! \param[in] inod 1-based node index local to current patch
  virtual Vec3 getCoord(size_t inod) const;
@ -134,26 +145,21 @@ public:
  virtual bool evalSolution(Matrix& sField, const Integrand& integrand,
 			    const RealArray* gpar, bool regular = true) const;

-  //! \brief Returns a matrix with nodal coordinates for an element.
-  //! \param[in] iel Element index
-  //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
-  //! in one element
-  virtual bool getElementCoordinates(Matrix& X, int iel) const;
-
-  //! \brief Generate thread groups
-  void generateThreadGroups();
-public:
-  //! \brief Returns a matrix with all nodal coordinates within the patch.
-  //! \param[out] X 3\f$\times\f$n-matrix, where \a n is the number of nodes
-  //! in the patch
-  virtual void getNodalCoordinates(Matrix& X) const;
-
  //! \brief Returns the number of nodal points in each parameter direction.
  //! \param[out] n1 Number of nodes in first (u) direction
  //! \param[out] n2 Number of nodes in second (v) direction
  //! \param[out] n3 Number of nodes in third (w) direction
  virtual bool getSize(int& n1, int& n2, int& n3, int = 0) const;

+  //! \brief Generates element groups for multi-threading of interior integrals.
+  virtual void generateThreadGroups();
+  //! \brief Generates element groups for multi-threading of boundary integrals.
+  //! \param[in] lIndex Local index [1,6] of the boundary face
+  virtual void generateThreadGroups(char lIndex);
+
+  //! \brief Returns the number of elements on a boundary.
+  virtual size_t getNoBoundaryElms(char lIndex, char ldim) const;
+
 private:
  size_t nx; //!< Number of nodes in first parameter direction
  size_t ny; //!< Number of nodes in second parameter direction
--- a/src/ASM/ASMs3DSpec.C
+++ b/src/ASM/ASMs3DSpec.C
@ -101,9 +101,6 @@ bool ASMs3DSpec::integrate (Integrand& integrand,
  if (!Legendre::basisDerivatives(p2,D2)) return false;
  if (!Legendre::basisDerivatives(p3,D3)) return false;

-  if (threadGroupsVol.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements in the patch ==========================

@ -133,6 +130,7 @@ bool ASMs3DSpec::integrate (Integrand& integrand,
          break;
        }

+
        // --- Integration loop over all Gauss points in each direction --------

        int count = 1;
@ -185,6 +183,15 @@ bool ASMs3DSpec::integrate (Integrand& integrand, int lIndex,
 {
  if (!svol) return true; // silently ignore empty patches

+  std::map<char,utl::ThreadGroups>::const_iterator tit;
+  if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end())
+  {
+    std::cerr <<" *** ASMs3DSpec::integrate: No thread groups for face "<<lIndex
+	      << std::endl;
+    return false;
+  }
+  const utl::ThreadGroups& threadGrp = tit->second;
+
  // Find the parametric direction of the face normal {-3,-2,-1, 1, 2, 3}
  const int faceDir = (lIndex+1)/(lIndex%2 ? -2 : 2);

@ -212,33 +219,38 @@ bool ASMs3DSpec::integrate (Integrand& integrand, int lIndex,

  int nen = p[0]*p[1]*p[2];

+
  // === Assembly loop over all elements on the patch face =====================
-  bool ok=true;
-  for (size_t g=0;g<threadGroupsFace[lIndex-1].size() && ok;++g) {
+
+  bool ok = true;
+  for (size_t g = 0; g < threadGrp.size() && ok; ++g) {
 #pragma omp parallel for schedule(static)
-    for (size_t t=0;t<threadGroupsFace[lIndex-1][g].size();++t) {
+    for (size_t t = 0; t < threadGrp[g].size(); ++t) {
      FiniteElement fe(nen);
      Matrix dNdu(nen,3), Xnod, Jac;
      Vec4   X;
      Vec3   normal;
      int    xi[3];
-      for (size_t l=0;l<threadGroupsFace[lIndex-1][g][t].size();++l) {
-        int iel = threadGroupsFace[lIndex-1][g][t][l];
+      for (size_t l = 0; l < threadGrp[g][t].size(); ++l) {
+        int iel = threadGrp[g][t][l];

 	// Set up nodal point coordinates for current element
-	if (!this->getElementCoordinates(Xnod,++iel)) {
+        if (!this->getElementCoordinates(Xnod,++iel))
+        {
          ok = false;
          break;
        }

 	// Initialize element quantities
-	fe.iel = MLGE[iel-1];
+        fe.iel = MLGE[iel-1];
        LocalIntegral* A = integrand.getLocalIntegral(nen,fe.iel,true);
-        if (!integrand.initElementBou(MNPC[iel-1],*A)) {
+        if (!integrand.initElementBou(MNPC[iel-1],*A))
+        {
          ok = false;
          break;
        }

+
 	// --- Integration loop over all Gauss points in each direction --------

 	for (int j = 0; j < p[t2-1]; j++)
@ -265,14 +277,16 @@ bool ASMs3DSpec::integrate (Integrand& integrand, int lIndex,

 	    // Evaluate the integrand and accumulate element contributions
 	    fe.detJxW *=  wg[t1-1][i]*wg[t2-1][j];
-            if (!integrand.evalBou(*A,fe,time,X,normal)) {
+            if (!integrand.evalBou(*A,fe,time,X,normal))
+            {
              ok = false;
              break;
            }
 	  }

 	// Assembly of global system integral
-	if (!glInt.assemble(A->ref(),fe.iel)) {
+        if (!glInt.assemble(A->ref(),fe.iel))
+        {
          ok = false;
          break;
        }
@ -379,6 +393,7 @@ bool ASMs3DSpec::integrateEdge (Integrand& integrand, int lEdge,
        LocalIntegral* A = integrand.getLocalIntegral(nen,fe.iel,true);
        if (!integrand.initElementBou(MNPC[iel-1],*A)) return false;

+
 	// --- Integration loop over all Gauss points along the edge -----------

 	for (int i = 0; i < pe; i++)
--- a/src/ASM/ASMs3Dmx.C
+++ b/src/ASM/ASMs3Dmx.C
@ -538,9 +538,6 @@ bool ASMs3Dmx::integrate (Integrand& integrand,
  const int nel2 = n2 - p2 + 1;
  const int nel3 = n3 - p3 + 1;

-  if (threadGroupsVol.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements in the patch ==========================

@ -664,6 +661,15 @@ bool ASMs3Dmx::integrate (Integrand& integrand, int lIndex,

  PROFILE2("ASMs3Dmx::integrate(B)");

+  std::map<char,utl::ThreadGroups>::const_iterator tit;
+  if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end())
+  {
+    std::cerr <<" *** ASMs3D::integrate: No thread groups for face "<< lIndex
+	      << std::endl;
+    return false;
+  }
+  const utl::ThreadGroups& threadGrp = tit->second;
+
  // Get Gaussian quadrature points and weights
  const double* xg = GaussQuadrature::getCoord(nGauss);
  const double* wg = GaussQuadrature::getWeight(nGauss);
@ -706,16 +712,13 @@ bool ASMs3Dmx::integrate (Integrand& integrand, int lIndex,
  const int nel1 = n1 - p1 + 1;
  const int nel2 = n2 - p2 + 1;

-  if (threadGroupsFace.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements on the patch face =====================

-  bool ok=true;
-  for (size_t g=0;g<threadGroupsFace[lIndex-1].size() && ok;++g) {
+  bool ok = true;
+  for (size_t g = 0; g < threadGrp.size() && ok; ++g) {
 #pragma omp parallel for schedule(static)
-    for (size_t t=0;t<threadGroupsFace[lIndex-1][g].size();++t) {
+    for (size_t t = 0; t < threadGrp[g].size(); ++t) {
      MxFiniteElement fe(basis1->order(0)*basis1->order(1)*basis1->order(2),
                         basis2->order(0)*basis2->order(1)*basis2->order(2));
      fe.xi = fe.eta = fe.zeta = faceDir < 0 ? -1.0 : 1.0;
@ -726,8 +729,8 @@ bool ASMs3Dmx::integrate (Integrand& integrand, int lIndex,
      Matrix dN1du, dN2du, Xnod, Jac;
      Vec4   X;
      Vec3   normal;
-      for (size_t l=0;l<threadGroupsFace[lIndex-1][g][t].size();++l) {
-        int iel = threadGroupsFace[lIndex-1][g][t][l];
+      for (size_t l = 0; l < threadGrp[g][t].size(); ++l) {
+        int iel = threadGrp[g][t][l];
        fe.iel = MLGE[iel];
        if (fe.iel < 1) continue; // zero-volume element

--- a/src/ASM/ASMs3DmxLag.C
+++ b/src/ASM/ASMs3DmxLag.C
@ -256,9 +256,6 @@ bool ASMs3DmxLag::integrate (Integrand& integrand,
  const int q2 = p2 - 1;
  const int q3 = p3 - 1;

-  if (threadGroupsVol.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements in the patch ==========================

@ -364,6 +361,15 @@ bool ASMs3DmxLag::integrate (Integrand& integrand, int lIndex,
 {
  if (!svol) return true; // silently ignore empty patches

+  std::map<char,utl::ThreadGroups>::const_iterator tit;
+  if ((tit = threadGroupsFace.find(lIndex)) == threadGroupsFace.end())
+  {
+    std::cerr <<" *** ASMs3DLag::integrate: No thread groups for face "<< lIndex
+	      << std::endl;
+    return false;
+  }
+  const utl::ThreadGroups& threadGrp = tit->second;
+
  // Get Gaussian quadrature points and weights
  const double* xg = GaussQuadrature::getCoord(nGauss);
  const double* wg = GaussQuadrature::getWeight(nGauss);
@ -389,23 +395,20 @@ bool ASMs3DmxLag::integrate (Integrand& integrand, int lIndex,
  const int nel1 = (nx2-1)/(p1-1);
  const int nel2 = (ny2-1)/(p2-1);

-  if (threadGroupsFace.empty())
-    generateThreadGroups();
-

  // === Assembly loop over all elements on the patch face =====================

-  bool ok=true;
-  for (size_t g=0;g<threadGroupsFace[lIndex-1].size() && ok;++g) {
+  bool ok = true;
+  for (size_t g = 0; g < threadGrp.size() && ok; ++g) {
 #pragma omp parallel for schedule(static)
-    for (size_t t=0;t<threadGroupsFace[lIndex-1][g].size();++t) {
+    for (size_t t = 0; t < threadGrp[g].size(); ++t) {
      MxFiniteElement fe(p1*p2*p3,q1*q2*q3);
      Matrix dN1du, dN2du, Xnod, Jac;
      Vec4   X;
      Vec3   normal;
      double xi[3];
-      for (size_t l=0;l<threadGroupsFace[lIndex-1][g][t].size();++l) {
-        int iel = threadGroupsFace[lIndex-1][g][t][l];
+      for (size_t l = 0; l < threadGrp[g][t].size(); ++l) {
+        int iel = threadGrp[g][t][l];
        int i1  =  iel % nel1;
        int i2  = (iel / nel1) % nel2;
        int i3  =  iel / (nel1*nel2);
--- a/src/SIM/SIMbase.C
+++ b/src/SIM/SIMbase.C
@ -667,18 +667,19 @@ bool SIMbase::preprocess (const std::vector<int>& ignored, bool fixDup)
  std::cout <<"\nResolving Dirichlet boundary conditions"<< std::endl;

  bool ok = true;
-  for (PropertyVec::const_iterator p = myProps.begin(); p != myProps.end(); p++)
-    switch (p->pcode) {
+  PropertyVec::const_iterator q;
+  for (q = myProps.begin(); q != myProps.end(); q++)
+    switch (q->pcode) {

    case Property::DIRICHLET:
-      if (this->addConstraint(p->patch,p->lindx,p->ldim,p->pindx))
+      if (this->addConstraint(q->patch,q->lindx,q->ldim,q->pindx))
 	std::cout << std::endl;
      else
 	ok = false;
      break;

    case Property::DIRICHLET_INHOM:
-      if (this->addConstraint(p->patch,p->lindx,p->ldim,p->pindx%1000,p->pindx))
+      if (this->addConstraint(q->patch,q->lindx,q->ldim,q->pindx%1000,q->pindx))
 	std::cout << std::endl;
      else
 	ok = false;
@ -700,6 +701,15 @@ bool SIMbase::preprocess (const std::vector<int>& ignored, bool fixDup)
  // Resolve possibly chaining of the MPC equations
  if (!allMPCs.empty()) ASMbase::resolveMPCchains(allMPCs);

+  // Generate element groups for multi-threading
+  for (mit = myModel.begin(); mit != myModel.end(); mit++)
+    (*mit)->generateThreadGroups();
+  for (q = myProps.begin(); q != myProps.end(); q++)
+    if (q->pcode == Property::NEUMANN)
+      if (q->patch > 0 && q->patch <= myModel.size())
+	if (q->ldim+1 == myModel[q->patch-1]->getNoParamDim())
+	  myModel[q->patch-1]->generateThreadGroups(q->lindx);
+
  // Preprocess the result points
  for (ResPointVec::iterator p = myPoints.begin(); p != myPoints.end();)
  {