Added: Flag for forced the thread strip direction.

Added: Private method for debug print of group defintions.
Changed: Put more code inside the ifdef USE_OPENMP.
This commit is contained in:
Arne Morten Kvarving 2018-05-25 10:28:39 +02:00 committed by Knut Morten Okstad
parent 598c09c4dd
commit c8c0e6297c
3 changed files with 169 additions and 132 deletions

View File

@ -62,6 +62,41 @@ TEST(TestThreadGroups, Groups2D)
CHECK_INTMATRICES_EQUAL(groups[0], "src/Utility/Test/refdata/ThreadGroups_2D_1.ref"); CHECK_INTMATRICES_EQUAL(groups[0], "src/Utility/Test/refdata/ThreadGroups_2D_1.ref");
#endif #endif
std::vector<bool> b14(4, true);
std::vector<bool> b24(4, true);
for (int i = 0; i < 2; ++i) {
ThreadGroups group((ThreadGroups::StripDirection)i);
group.calcGroups(b14, b24, 1, 1);
#ifdef USE_OPENMP
const int ref1[4][4] = {{0,4,8,12}, {2,6,10,14},
{0,1,2, 3}, {8,9,10,11}};
const int ref2[4][4] = {{1,5,9,13}, { 3, 7,11,15},
{4,5,6, 7}, {12,13,14,15}};
ASSERT_EQ(group.size(), 2U);
ASSERT_EQ(group[0].size(), 2U);
ASSERT_EQ(group[1].size(), 2U);
ASSERT_EQ(group[0][0].size(), 4U);
ASSERT_EQ(group[0][1].size(), 4U);
ASSERT_EQ(group[1][0].size(), 4U);
ASSERT_EQ(group[1][1].size(), 4U);
for (size_t j = 0; j < 4; ++j) {
ASSERT_EQ(group[0][0][j], ref1[i*2][j]);
ASSERT_EQ(group[0][1][j], ref1[i*2+1][j]);
ASSERT_EQ(group[1][0][j], ref2[i*2][j]);
ASSERT_EQ(group[1][1][j], ref2[i*2+1][j]);
}
#else
ASSERT_EQ(group.size(), 1U);
ASSERT_EQ(group[0].size(), 1U);
ASSERT_EQ(group[0][0].size(), 16U);
for (int j = 0; j < 16; ++j)
ASSERT_EQ(group[0][0][j], j);
#endif
}
#ifdef USE_OPENMP #ifdef USE_OPENMP
omp_set_num_threads(3); omp_set_num_threads(3);
#endif #endif

View File

@ -13,9 +13,7 @@
#include "ThreadGroups.h" #include "ThreadGroups.h"
#include <numeric> #include <numeric>
#if SP_DEBUG > 1
#include <iostream> #include <iostream>
#endif
#ifdef USE_OPENMP #ifdef USE_OPENMP
#include <omp.h> #include <omp.h>
#endif #endif
@ -33,33 +31,32 @@ void ThreadGroups::oneGroup (size_t nel)
void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2, void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
int p1, int p2) int p1, int p2)
{ {
#ifndef USE_OPENMP
this->oneGroup(el1.size()*el2.size());
#else
// Count the non-zero element in each direction, the zero-span elements // Count the non-zero element in each direction, the zero-span elements
// should not affect the partitioning as they don't involve any work // should not affect the partitioning as they don't involve any work
size_t i, nel1 = 0, nel2 = 0; size_t nel1 = 0, nel2 = 0;
for (i = 0; i < el1.size(); i++) for (bool e : el1) if (e) nel1++;
if (el1[i]) nel1++; for (bool e : el2) if (e) nel2++;
for (i = 0; i < el2.size(); i++)
if (el2[i]) nel2++;
int threads=1; int threads = omp_get_max_threads();
int stripsize=0;
int remainder=0;
int dir=0, mul=1;
#ifdef USE_OPENMP
threads = omp_get_max_threads();
int parts = threads > 1 ? 2*threads : 1; int parts = threads > 1 ? 2*threads : 1;
dir = getStripDirection(nel1,nel2,parts); if (stripDir == ANY)
mul = dir == 0 ? 1 : el1.size(); stripDir = getStripDirection(nel1,nel2,parts);
int els = dir == 0 ? nel1 : nel2; int mul = stripDir == U ? 1 : el1.size();
int els = stripDir == U ? nel1 : nel2;
// The minimum strip size (with) depends on the polynomial degree // The minimum strip size (width) depends on the polynomial degree
// due to the overlapping support of the splines basis functions // due to the overlapping support of the splines basis functions
int minsize = dir == 0 ? p1 : p2; int stripsize = 0;
int minsize = stripDir == U ? p1 : p2;
while (threads > 1 && (stripsize = els/parts) < minsize) { while (threads > 1 && (stripsize = els/parts) < minsize) {
threads --; threads --;
parts -= 2; parts -= 2;
} }
int remainder = 0;
if (threads > 1) if (threads > 1)
remainder = els - stripsize*parts; remainder = els - stripsize*parts;
else else
@ -71,7 +68,6 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
<< "\nstripsize " << stripsize << "\nstripsize " << stripsize
<< "\n# of strips " << els/stripsize << "\n# of strips " << els/stripsize
<< "\nremainder " << remainder << std::endl; << "\nremainder " << remainder << std::endl;
#endif
#endif #endif
nel1 = el1.size(); nel1 = el1.size();
@ -82,7 +78,7 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
{ {
int i, j, t, zspan, offs = 0; int i, j, t, zspan, offs = 0;
IntVec stripsizes[2], startelms[2]; IntVec stripsizes[2], startelms[2];
const BoolVec& elz = dir == 0 ? el1 : el2; const BoolVec& elz = stripDir == U ? el1 : el2;
stripsizes[0].resize(threads,stripsize); stripsizes[0].resize(threads,stripsize);
stripsizes[1].resize(threads,stripsize); stripsizes[1].resize(threads,stripsize);
@ -112,45 +108,41 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
for (i = 0; i < 2; ++i) { // loop over groups for (i = 0; i < 2; ++i) { // loop over groups
tg[i].resize(threads); tg[i].resize(threads);
for (int t = 0; t < threads; ++t) { // loop over threads for (int t = 0; t < threads; ++t) { // loop over threads
int maxx = dir == 0 ? stripsizes[i][t] : nel1; int maxx = stripDir == U ? stripsizes[i][t] : nel1;
int maxy = dir == 1 ? stripsizes[i][t] : nel2; int maxy = stripDir == V ? stripsizes[i][t] : nel2;
tg[i][t].reserve(maxx*maxy); tg[i][t].reserve(maxx*maxy);
for (int i2 = 0; i2 < maxy; ++i2) for (int i2 = 0; i2 < maxy; ++i2)
for (int i1 = 0; i1 < maxx; ++i1) for (int i1 = 0; i1 < maxx; ++i1)
tg[i][t].push_back(startelms[i][t]+i1+i2*nel1); tg[i][t].push_back(startelms[i][t]+i1+i2*nel1);
} }
#if defined(USE_OPENMP) && SP_DEBUG > 1 #if SP_DEBUG > 1
std::cout << "group " << i << std::endl; printGroup(tg[i],i);
for (size_t j = 0; j < tg[i].size(); ++j) {
std::cout << "\t thread " << j << ": ";
for (size_t k = 0; k < tg[i][j].size(); ++k)
std::cout << tg[i][j][k] << " ";
std::cout << std::endl;
}
#endif #endif
} }
} }
#endif
} }
void ThreadGroups::calcGroups (int nel1, int nel2, int minsize) void ThreadGroups::calcGroups (int nel1, int nel2, int minsize)
{ {
int threads=1; #ifndef USE_OPENMP
int stripsize=0; this->oneGroup(nel1*nel2);
int remainder=0; #else
int dir=0, mul=1; int threads = omp_get_max_threads();
#ifdef USE_OPENMP
threads = omp_get_max_threads();
int parts = threads > 1 ? 2*threads : 1; int parts = threads > 1 ? 2*threads : 1;
dir = getStripDirection(nel1,nel2,parts); if (stripDir == ANY)
mul = dir == 0 ? 1 : nel1; stripDir = getStripDirection(nel1,nel2,parts);
int els = dir == 0 ? nel1 : nel2; int mul = stripDir == U ? 1 : nel1;
int els = stripDir == U ? nel1 : nel2;
int stripsize = 0;
while (threads > 1 && (stripsize = els/parts) < minsize) { while (threads > 1 && (stripsize = els/parts) < minsize) {
threads --; threads --;
parts -= 2; parts -= 2;
} }
int remainder = 0;
if (threads > 1) if (threads > 1)
remainder = els - stripsize*parts; remainder = els - stripsize*parts;
else else
@ -162,7 +154,6 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int minsize)
<< "\nstripsize " << stripsize << "\nstripsize " << stripsize
<< "\n# of strips " << els/stripsize << "\n# of strips " << els/stripsize
<< "\nremainder " << remainder << std::endl; << "\nremainder " << remainder << std::endl;
#endif
#endif #endif
if (threads == 1) if (threads == 1)
@ -187,77 +178,51 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int minsize)
for (i = 0; i < 2; ++i) { // loop over groups for (i = 0; i < 2; ++i) { // loop over groups
tg[i].resize(threads); tg[i].resize(threads);
for (int t = 0; t < threads; ++t) { // loop over threads for (int t = 0; t < threads; ++t) { // loop over threads
int maxx = dir == 0 ? stripsizes[i][t] : nel1; int maxx = stripDir == U ? stripsizes[i][t] : nel1;
int maxy = dir == 1 ? stripsizes[i][t] : nel2; int maxy = stripDir == V ? stripsizes[i][t] : nel2;
for (int i2 = 0; i2 < maxy; ++i2) for (int i2 = 0; i2 < maxy; ++i2)
for (int i1 = 0; i1 < maxx; ++i1) for (int i1 = 0; i1 < maxx; ++i1)
tg[i][t].push_back(startelms[i][t]+i1+i2*nel1); tg[i][t].push_back(startelms[i][t]+i1+i2*nel1);
} }
#if defined(USE_OPENMP) && SP_DEBUG > 1 #if defined(USE_OPENMP) && SP_DEBUG > 1
std::cout << "group " << i << std::endl; printGroup(tg[i],i);
for (size_t j = 0; j < tg[i].size(); ++j) {
std::cout << "\t thread " << j << ": ";
for (size_t k = 0; k < tg[i][j].size(); ++k)
std::cout << tg[i][j][k] << " ";
std::cout << std::endl;
}
#endif #endif
} }
} }
} #endif
int ThreadGroups::getStripDirection (int nel1, int nel2, int parts)
{
int s1 = nel1 / parts;
int s2 = nel2 / parts;
int r1 = nel1 - s1*parts;
int r2 = nel2 - s2*parts;
if (r1*nel2 < r2*nel1)
return 0; // strips in u-direction
else if (r1*nel2 > r2*nel1)
return 1; // strips in v-direction
if (nel1 > nel2)
return 0; // strips in u-direction
else
return 1; // strips in v-direction
} }
void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2, void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
const BoolVec& el3, int p1, int p2, int p3) const BoolVec& el3, int p1, int p2, int p3)
{ {
#ifndef USE_OPENMP
this->oneGroup(el1.size()*el2.size()*el3.size());
#else
// Count the non-zero element in each direction, the zero-span elements // Count the non-zero element in each direction, the zero-span elements
// should not affect the partitioning as they don't involve any work // should not affect the partitioning as they don't involve any work
size_t i, nel1 = 0, nel2 = 0, nel3 = 0; size_t nel1 = 0, nel2 = 0, nel3 = 0;
for (i = 0; i < el1.size(); i++) for (bool e : el1) if (e) nel1++;
if (el1[i]) nel1++; for (bool e : el2) if (e) nel2++;
for (i = 0; i < el2.size(); i++) for (bool e : el3) if (e) nel3++;
if (el2[i]) nel2++;
for (i = 0; i < el3.size(); i++)
if (el3[i]) nel3++;
int threads=1; int threads = omp_get_max_threads();
int stripsize=0;
int remainder=0;
int dir=0, mul=1;
#ifdef USE_OPENMP
threads = omp_get_max_threads();
int parts = threads > 1 ? 2*threads : 1; int parts = threads > 1 ? 2*threads : 1;
dir = getStripDirection(nel1,nel2,nel3,parts); if (stripDir == ANY)
mul = dir == 0 ? 1 : el1.size()*(dir == 1 ? 1 : el2.size()); stripDir = getStripDirection(nel1,nel2,nel3,parts);
int els = dir == 0 ? nel1 : (dir == 1 ? nel2 : nel3); int mul = stripDir == U ? 1 : el1.size()*(stripDir == V ? 1 : el2.size());
int els = stripDir == U ? nel1 : (stripDir == V ? nel2 : nel3);
// The minimum strip size (with) depends on the polynomial degree // The minimum strip size (width) depends on the polynomial degree
// due to the overlapping support of the splines basis functions // due to the overlapping support of the splines basis functions
int minsize = dir == 0 ? p1 : (dir == 1 ? p2 : p3); int stripsize = 0;
int minsize = stripDir == U ? p1 : (stripDir == V ? p2 : p3);
while (threads > 1 && (stripsize = els/parts) < minsize) { while (threads > 1 && (stripsize = els/parts) < minsize) {
threads --; threads --;
parts -= 2; parts -= 2;
} }
int remainder = 0;
if (threads > 1) if (threads > 1)
remainder = els - stripsize*parts; remainder = els - stripsize*parts;
else else
@ -269,7 +234,6 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
<< "\nstripsize " << stripsize << "\nstripsize " << stripsize
<< "\n# of strips " << els/stripsize << "\n# of strips " << els/stripsize
<< "\nremainder " << remainder << std::endl; << "\nremainder " << remainder << std::endl;
#endif
#endif #endif
nel1 = el1.size(); nel1 = el1.size();
@ -281,7 +245,7 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
{ {
int i, j, t, zspan, offs = 0; int i, j, t, zspan, offs = 0;
IntVec stripsizes[2], startelms[2]; IntVec stripsizes[2], startelms[2];
const BoolVec& elz = dir == 0 ? el1 : (dir == 1 ? el2 : el3); const BoolVec& elz = stripDir == U ? el1 : (stripDir == V ? el2 : el3);
stripsizes[0].resize(threads,stripsize); stripsizes[0].resize(threads,stripsize);
stripsizes[1].resize(threads,stripsize); stripsizes[1].resize(threads,stripsize);
@ -311,9 +275,9 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
for (i = 0; i < 2; ++i) { // loop over groups for (i = 0; i < 2; ++i) { // loop over groups
tg[i].resize(threads); tg[i].resize(threads);
for (int t = 0; t < threads; ++t) { // loop over threads for (int t = 0; t < threads; ++t) { // loop over threads
int maxx = dir == 0 ? stripsizes[i][t] : nel1; int maxx = stripDir == U ? stripsizes[i][t] : nel1;
int maxy = dir == 1 ? stripsizes[i][t] : nel2; int maxy = stripDir == V ? stripsizes[i][t] : nel2;
int maxz = dir == 2 ? stripsizes[i][t] : nel3; int maxz = stripDir == W ? stripsizes[i][t] : nel3;
tg[i][t].reserve(maxx*maxy*maxz); tg[i][t].reserve(maxx*maxy*maxz);
for (int i3 = 0; i3 < maxz; ++i3) for (int i3 = 0; i3 < maxz; ++i3)
for (int i2 = 0; i2 < maxy; ++i2) for (int i2 = 0; i2 < maxy; ++i2)
@ -321,38 +285,34 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
tg[i][t].push_back(startelms[i][t]+i1+nel1*(i2+nel2*i3)); tg[i][t].push_back(startelms[i][t]+i1+nel1*(i2+nel2*i3));
} }
#if defined(USE_OPENMP) && SP_DEBUG > 1 #if SP_DEBUG > 1
std::cout << "group " << i << std::endl; printGroup(tg[i],i);
for (size_t j = 0; j < tg[i].size(); ++j) {
std::cout << "\t thread " << j << " (" << tg[i][j].size() << "): ";
for (size_t k = 0; k < tg[i][j].size(); ++k)
std::cout << tg[i][j][k] << " ";
std::cout << std::endl;
}
#endif #endif
} }
} }
#endif
} }
void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize) void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
{ {
int threads=1; #ifndef USE_OPENMP
int stripsize=0; this->oneGroup(nel1*nel2*nel3);
int remainder=0; #else
int i, dir=0, mul=1; int threads = omp_get_max_threads();
#ifdef USE_OPENMP
threads = omp_get_max_threads();
int parts = threads > 1 ? 2*threads : 1; int parts = threads > 1 ? 2*threads : 1;
dir = getStripDirection(nel1,nel2,nel3,parts); if (stripDir == ANY)
mul = dir == 0 ? 1 : nel1*(dir == 1 ? 1 : nel2); stripDir = getStripDirection(nel1,nel2,nel3,parts);
int els = dir == 0 ? nel1 : (dir == 1 ? nel2 : nel3); int mul = stripDir == U ? 1 : nel1*(stripDir == V ? 1 : nel2);
int els = stripDir == U ? nel1 : (stripDir == V ? nel2 : nel3);
int stripsize = 0;
while (threads > 1 && (stripsize = els/parts) < minsize) { while (threads > 1 && (stripsize = els/parts) < minsize) {
threads --; threads --;
parts -= 2; parts -= 2;
} }
int remainder = 0;
if (threads > 1) if (threads > 1)
remainder = els - stripsize*parts; remainder = els - stripsize*parts;
else else
@ -364,13 +324,13 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
<< "\nstripsize " << stripsize << "\nstripsize " << stripsize
<< "\n# of strips " << els/stripsize << "\n# of strips " << els/stripsize
<< "\nremainder " << remainder << std::endl; << "\nremainder " << remainder << std::endl;
#endif
#endif #endif
if (threads == 1) if (threads == 1)
this->oneGroup(nel1*nel2*nel3); this->oneGroup(nel1*nel2*nel3);
else else
{ {
int offs, i;
IntVec stripsizes[2]; IntVec stripsizes[2];
stripsizes[0].resize(threads,stripsize); stripsizes[0].resize(threads,stripsize);
stripsizes[1].resize(threads,stripsize); stripsizes[1].resize(threads,stripsize);
@ -378,7 +338,7 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
stripsizes[i%2][threads-(i+1)/2]++; stripsizes[i%2][threads-(i+1)/2]++;
IntVec startelms[2]; IntVec startelms[2];
for (int offs = i = 0; i < threads; ++i) { for (offs = i = 0; i < threads; ++i) {
startelms[0].push_back(offs*mul); startelms[0].push_back(offs*mul);
offs += stripsizes[0][i]; offs += stripsizes[0][i];
startelms[1].push_back(offs*mul); startelms[1].push_back(offs*mul);
@ -388,9 +348,9 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
for (i = 0; i < 2; ++i) { // loop over groups for (i = 0; i < 2; ++i) { // loop over groups
tg[i].resize(threads); tg[i].resize(threads);
for (int t = 0; t < threads; ++t) { // loop over threads for (int t = 0; t < threads; ++t) { // loop over threads
int maxx = dir == 0 ? stripsizes[i][t] : nel1; int maxx = stripDir == U ? stripsizes[i][t] : nel1;
int maxy = dir == 1 ? stripsizes[i][t] : nel2; int maxy = stripDir == V ? stripsizes[i][t] : nel2;
int maxz = dir == 2 ? stripsizes[i][t] : nel3; int maxz = stripDir == W ? stripsizes[i][t] : nel3;
for (int i3 = 0; i3 < maxz; ++i3) for (int i3 = 0; i3 < maxz; ++i3)
for (int i2 = 0; i2 < maxy; ++i2) for (int i2 = 0; i2 < maxy; ++i2)
for (int i1 = 0; i1 < maxx; ++i1) for (int i1 = 0; i1 < maxx; ++i1)
@ -398,20 +358,36 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
} }
#if defined(USE_OPENMP) && SP_DEBUG > 1 #if defined(USE_OPENMP) && SP_DEBUG > 1
std::cout << "group " << i << std::endl; printGroup(tg[i],i);
for (size_t j = 0; j < tg[i].size(); ++j) {
std::cout << "\t thread " << j << " (" << tg[i][j].size() << "): ";
for (size_t k = 0; k < tg[i][j].size(); ++k)
std::cout << tg[i][j][k] << " ";
std::cout << std::endl;
}
#endif #endif
} }
} }
#endif
} }
int ThreadGroups::getStripDirection (int nel1, int nel2, int nel3, int parts) ThreadGroups::StripDirection ThreadGroups::getStripDirection (int nel1,
int nel2,
int parts)
{
int s1 = nel1 / parts;
int s2 = nel2 / parts;
int r1 = nel1 - s1*parts;
int r2 = nel2 - s2*parts;
if (r1*nel2 < r2*nel1)
return U;
else if (r1*nel2 > r2*nel1)
return V;
return nel1 > nel2 ? U : V;
}
ThreadGroups::StripDirection ThreadGroups::getStripDirection (int nel1,
int nel2,
int nel3,
int parts)
{ {
int s1 = nel1 / parts; int s1 = nel1 / parts;
int s2 = nel2 / parts; int s2 = nel2 / parts;
@ -421,27 +397,27 @@ int ThreadGroups::getStripDirection (int nel1, int nel2, int nel3, int parts)
int r3 = nel3 - s3*parts; int r3 = nel3 - s3*parts;
if (r1*nel2*nel3 < nel1*r2*nel3 && r1*nel2*nel3 < nel1*nel2*r3) if (r1*nel2*nel3 < nel1*r2*nel3 && r1*nel2*nel3 < nel1*nel2*r3)
return 0; // strips along x axis return U;
else if (nel1*r2*nel3 < r1*nel2*nel3 && nel1*r2*nel3 < nel1*nel2*r3) else if (nel1*r2*nel3 < r1*nel2*nel3 && nel1*r2*nel3 < nel1*nel2*r3)
return 1; // strips along y axis return V;
else if (nel1*nel2*r3 < r1*nel2*nel3 && nel1*nel2*r3 < nel1*r2*nel3) else if (nel1*nel2*r3 < r1*nel2*nel3 && nel1*nel2*r3 < nel1*r2*nel3)
return 2; // strips along z axis return W;
// The number of left-over elements is not smallest in one direction only // The number of left-over elements is not smallest in one direction only
if (r1*nel2*nel3 > nel1*r2*nel3) if (r1*nel2*nel3 > nel1*r2*nel3)
return nel2 > nel3 ? 1 : 2; return nel2 > nel3 ? V : W;
else if (nel1*r2*nel3 > nel1*nel2*r3) else if (nel1*r2*nel3 > nel1*nel2*r3)
return nel1 > nel3 ? 0 : 2; return nel1 > nel3 ? U : W;
else if (nel1*nel2*r3 > r1*nel2*nel3) else if (nel1*nel2*r3 > r1*nel2*nel3)
return nel1 > nel2 ? 0 : 1; return nel1 > nel2 ? U : V;
// The number of left-over elements is the same in all three directions // The number of left-over elements is the same in all three directions
if (nel1 >= nel2 && nel1 >= nel3) if (nel1 >= nel2 && nel1 >= nel3)
return 0; return U;
else if (nel2 >= nel1 && nel2 >= nel3) else if (nel2 >= nel1 && nel2 >= nel3)
return 1; return V;
else else
return 2; return W;
} }
@ -452,3 +428,15 @@ void ThreadGroups::applyMap (const IntVec& map)
for (size_t j = 0; j < tg[l][k].size(); ++j) for (size_t j = 0; j < tg[l][k].size(); ++j)
tg[l][k][j] = map[tg[l][k][j]]; tg[l][k][j] = map[tg[l][k][j]];
} }
void ThreadGroups::printGroup (const IntMat& group, int g)
{
std::cout <<"group "<< g;
for (size_t t = 0; t < group.size(); t++)
{
std::cout <<"\n\t thread "<< t <<" ("<< group[t].size() <<"):";
for (int e : group[t]) std::cout <<" "<< e;
}
std::cout << std::endl;
}

View File

@ -29,6 +29,12 @@ class ThreadGroups
typedef std::vector<IntVec> IntMat; //!< Element lists for all threads typedef std::vector<IntVec> IntMat; //!< Element lists for all threads
public: public:
//! Directions to consider for element stripes.
enum StripDirection { U, V, W, ANY };
//! \brief Default constructor.
ThreadGroups(StripDirection dir = ANY) : stripDir(dir) {}
//! \brief Calculates a 2D thread group partitioning based on strips. //! \brief Calculates a 2D thread group partitioning based on strips.
//! \param[in] el1 Flags non-zero knot spans in first parameter direction //! \param[in] el1 Flags non-zero knot spans in first parameter direction
//! \param[in] el2 Flags non-zero knot spans in second parameter direction //! \param[in] el2 Flags non-zero knot spans in second parameter direction
@ -74,9 +80,17 @@ public:
protected: protected:
//! \brief Calculates the parameter direction of the treading strips in 2D. //! \brief Calculates the parameter direction of the treading strips in 2D.
static int getStripDirection(int nel1, int nel2, int parts); static StripDirection getStripDirection(int nel1, int nel2,
int parts);
//! \brief Calculates the parameter direction of the treading strips in 3D. //! \brief Calculates the parameter direction of the treading strips in 3D.
static int getStripDirection(int nel1, int nel2, int nel3, int parts); static StripDirection getStripDirection(int nel1, int nel2, int nel3,
int parts);
//! \brief Prints out a threading group definition.
static void printGroup(const IntMat& group, int g);
public:
StripDirection stripDir; //!< Actual direction to split elements
private: private:
IntMat tg[2]; //!< Threading groups (always two, but the second may be empty) IntMat tg[2]; //!< Threading groups (always two, but the second may be empty)