Added: Flag for forced the thread strip direction.
Added: Private method for debug print of group defintions. Changed: Put more code inside the ifdef USE_OPENMP.
This commit is contained in:
parent
598c09c4dd
commit
c8c0e6297c
@ -62,6 +62,41 @@ TEST(TestThreadGroups, Groups2D)
|
||||
CHECK_INTMATRICES_EQUAL(groups[0], "src/Utility/Test/refdata/ThreadGroups_2D_1.ref");
|
||||
#endif
|
||||
|
||||
std::vector<bool> b14(4, true);
|
||||
std::vector<bool> b24(4, true);
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
ThreadGroups group((ThreadGroups::StripDirection)i);
|
||||
group.calcGroups(b14, b24, 1, 1);
|
||||
#ifdef USE_OPENMP
|
||||
const int ref1[4][4] = {{0,4,8,12}, {2,6,10,14},
|
||||
{0,1,2, 3}, {8,9,10,11}};
|
||||
|
||||
const int ref2[4][4] = {{1,5,9,13}, { 3, 7,11,15},
|
||||
{4,5,6, 7}, {12,13,14,15}};
|
||||
|
||||
ASSERT_EQ(group.size(), 2U);
|
||||
ASSERT_EQ(group[0].size(), 2U);
|
||||
ASSERT_EQ(group[1].size(), 2U);
|
||||
ASSERT_EQ(group[0][0].size(), 4U);
|
||||
ASSERT_EQ(group[0][1].size(), 4U);
|
||||
ASSERT_EQ(group[1][0].size(), 4U);
|
||||
ASSERT_EQ(group[1][1].size(), 4U);
|
||||
for (size_t j = 0; j < 4; ++j) {
|
||||
ASSERT_EQ(group[0][0][j], ref1[i*2][j]);
|
||||
ASSERT_EQ(group[0][1][j], ref1[i*2+1][j]);
|
||||
ASSERT_EQ(group[1][0][j], ref2[i*2][j]);
|
||||
ASSERT_EQ(group[1][1][j], ref2[i*2+1][j]);
|
||||
}
|
||||
#else
|
||||
ASSERT_EQ(group.size(), 1U);
|
||||
ASSERT_EQ(group[0].size(), 1U);
|
||||
ASSERT_EQ(group[0][0].size(), 16U);
|
||||
for (int j = 0; j < 16; ++j)
|
||||
ASSERT_EQ(group[0][0][j], j);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
omp_set_num_threads(3);
|
||||
#endif
|
||||
|
@ -13,9 +13,7 @@
|
||||
|
||||
#include "ThreadGroups.h"
|
||||
#include <numeric>
|
||||
#if SP_DEBUG > 1
|
||||
#include <iostream>
|
||||
#endif
|
||||
#ifdef USE_OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
@ -33,33 +31,32 @@ void ThreadGroups::oneGroup (size_t nel)
|
||||
void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
||||
int p1, int p2)
|
||||
{
|
||||
#ifndef USE_OPENMP
|
||||
this->oneGroup(el1.size()*el2.size());
|
||||
#else
|
||||
// Count the non-zero element in each direction, the zero-span elements
|
||||
// should not affect the partitioning as they don't involve any work
|
||||
size_t i, nel1 = 0, nel2 = 0;
|
||||
for (i = 0; i < el1.size(); i++)
|
||||
if (el1[i]) nel1++;
|
||||
for (i = 0; i < el2.size(); i++)
|
||||
if (el2[i]) nel2++;
|
||||
size_t nel1 = 0, nel2 = 0;
|
||||
for (bool e : el1) if (e) nel1++;
|
||||
for (bool e : el2) if (e) nel2++;
|
||||
|
||||
int threads=1;
|
||||
int stripsize=0;
|
||||
int remainder=0;
|
||||
int dir=0, mul=1;
|
||||
#ifdef USE_OPENMP
|
||||
threads = omp_get_max_threads();
|
||||
int threads = omp_get_max_threads();
|
||||
int parts = threads > 1 ? 2*threads : 1;
|
||||
dir = getStripDirection(nel1,nel2,parts);
|
||||
mul = dir == 0 ? 1 : el1.size();
|
||||
int els = dir == 0 ? nel1 : nel2;
|
||||
if (stripDir == ANY)
|
||||
stripDir = getStripDirection(nel1,nel2,parts);
|
||||
int mul = stripDir == U ? 1 : el1.size();
|
||||
int els = stripDir == U ? nel1 : nel2;
|
||||
|
||||
// The minimum strip size (with) depends on the polynomial degree
|
||||
// The minimum strip size (width) depends on the polynomial degree
|
||||
// due to the overlapping support of the splines basis functions
|
||||
int minsize = dir == 0 ? p1 : p2;
|
||||
int stripsize = 0;
|
||||
int minsize = stripDir == U ? p1 : p2;
|
||||
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
||||
threads --;
|
||||
parts -= 2;
|
||||
}
|
||||
|
||||
int remainder = 0;
|
||||
if (threads > 1)
|
||||
remainder = els - stripsize*parts;
|
||||
else
|
||||
@ -71,7 +68,6 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
||||
<< "\nstripsize " << stripsize
|
||||
<< "\n# of strips " << els/stripsize
|
||||
<< "\nremainder " << remainder << std::endl;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
nel1 = el1.size();
|
||||
@ -82,7 +78,7 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
||||
{
|
||||
int i, j, t, zspan, offs = 0;
|
||||
IntVec stripsizes[2], startelms[2];
|
||||
const BoolVec& elz = dir == 0 ? el1 : el2;
|
||||
const BoolVec& elz = stripDir == U ? el1 : el2;
|
||||
|
||||
stripsizes[0].resize(threads,stripsize);
|
||||
stripsizes[1].resize(threads,stripsize);
|
||||
@ -112,45 +108,41 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
||||
for (i = 0; i < 2; ++i) { // loop over groups
|
||||
tg[i].resize(threads);
|
||||
for (int t = 0; t < threads; ++t) { // loop over threads
|
||||
int maxx = dir == 0 ? stripsizes[i][t] : nel1;
|
||||
int maxy = dir == 1 ? stripsizes[i][t] : nel2;
|
||||
int maxx = stripDir == U ? stripsizes[i][t] : nel1;
|
||||
int maxy = stripDir == V ? stripsizes[i][t] : nel2;
|
||||
tg[i][t].reserve(maxx*maxy);
|
||||
for (int i2 = 0; i2 < maxy; ++i2)
|
||||
for (int i1 = 0; i1 < maxx; ++i1)
|
||||
tg[i][t].push_back(startelms[i][t]+i1+i2*nel1);
|
||||
}
|
||||
#if defined(USE_OPENMP) && SP_DEBUG > 1
|
||||
std::cout << "group " << i << std::endl;
|
||||
for (size_t j = 0; j < tg[i].size(); ++j) {
|
||||
std::cout << "\t thread " << j << ": ";
|
||||
for (size_t k = 0; k < tg[i][j].size(); ++k)
|
||||
std::cout << tg[i][j][k] << " ";
|
||||
std::cout << std::endl;
|
||||
}
|
||||
#if SP_DEBUG > 1
|
||||
printGroup(tg[i],i);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void ThreadGroups::calcGroups (int nel1, int nel2, int minsize)
|
||||
{
|
||||
int threads=1;
|
||||
int stripsize=0;
|
||||
int remainder=0;
|
||||
int dir=0, mul=1;
|
||||
#ifdef USE_OPENMP
|
||||
threads = omp_get_max_threads();
|
||||
#ifndef USE_OPENMP
|
||||
this->oneGroup(nel1*nel2);
|
||||
#else
|
||||
int threads = omp_get_max_threads();
|
||||
int parts = threads > 1 ? 2*threads : 1;
|
||||
dir = getStripDirection(nel1,nel2,parts);
|
||||
mul = dir == 0 ? 1 : nel1;
|
||||
int els = dir == 0 ? nel1 : nel2;
|
||||
if (stripDir == ANY)
|
||||
stripDir = getStripDirection(nel1,nel2,parts);
|
||||
int mul = stripDir == U ? 1 : nel1;
|
||||
int els = stripDir == U ? nel1 : nel2;
|
||||
|
||||
int stripsize = 0;
|
||||
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
||||
threads --;
|
||||
parts -= 2;
|
||||
}
|
||||
|
||||
int remainder = 0;
|
||||
if (threads > 1)
|
||||
remainder = els - stripsize*parts;
|
||||
else
|
||||
@ -162,7 +154,6 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int minsize)
|
||||
<< "\nstripsize " << stripsize
|
||||
<< "\n# of strips " << els/stripsize
|
||||
<< "\nremainder " << remainder << std::endl;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (threads == 1)
|
||||
@ -187,77 +178,51 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int minsize)
|
||||
for (i = 0; i < 2; ++i) { // loop over groups
|
||||
tg[i].resize(threads);
|
||||
for (int t = 0; t < threads; ++t) { // loop over threads
|
||||
int maxx = dir == 0 ? stripsizes[i][t] : nel1;
|
||||
int maxy = dir == 1 ? stripsizes[i][t] : nel2;
|
||||
int maxx = stripDir == U ? stripsizes[i][t] : nel1;
|
||||
int maxy = stripDir == V ? stripsizes[i][t] : nel2;
|
||||
for (int i2 = 0; i2 < maxy; ++i2)
|
||||
for (int i1 = 0; i1 < maxx; ++i1)
|
||||
tg[i][t].push_back(startelms[i][t]+i1+i2*nel1);
|
||||
}
|
||||
#if defined(USE_OPENMP) && SP_DEBUG > 1
|
||||
std::cout << "group " << i << std::endl;
|
||||
for (size_t j = 0; j < tg[i].size(); ++j) {
|
||||
std::cout << "\t thread " << j << ": ";
|
||||
for (size_t k = 0; k < tg[i][j].size(); ++k)
|
||||
std::cout << tg[i][j][k] << " ";
|
||||
std::cout << std::endl;
|
||||
}
|
||||
printGroup(tg[i],i);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int ThreadGroups::getStripDirection (int nel1, int nel2, int parts)
|
||||
{
|
||||
int s1 = nel1 / parts;
|
||||
int s2 = nel2 / parts;
|
||||
int r1 = nel1 - s1*parts;
|
||||
int r2 = nel2 - s2*parts;
|
||||
|
||||
if (r1*nel2 < r2*nel1)
|
||||
return 0; // strips in u-direction
|
||||
else if (r1*nel2 > r2*nel1)
|
||||
return 1; // strips in v-direction
|
||||
|
||||
if (nel1 > nel2)
|
||||
return 0; // strips in u-direction
|
||||
else
|
||||
return 1; // strips in v-direction
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
||||
const BoolVec& el3, int p1, int p2, int p3)
|
||||
{
|
||||
#ifndef USE_OPENMP
|
||||
this->oneGroup(el1.size()*el2.size()*el3.size());
|
||||
#else
|
||||
// Count the non-zero element in each direction, the zero-span elements
|
||||
// should not affect the partitioning as they don't involve any work
|
||||
size_t i, nel1 = 0, nel2 = 0, nel3 = 0;
|
||||
for (i = 0; i < el1.size(); i++)
|
||||
if (el1[i]) nel1++;
|
||||
for (i = 0; i < el2.size(); i++)
|
||||
if (el2[i]) nel2++;
|
||||
for (i = 0; i < el3.size(); i++)
|
||||
if (el3[i]) nel3++;
|
||||
size_t nel1 = 0, nel2 = 0, nel3 = 0;
|
||||
for (bool e : el1) if (e) nel1++;
|
||||
for (bool e : el2) if (e) nel2++;
|
||||
for (bool e : el3) if (e) nel3++;
|
||||
|
||||
int threads=1;
|
||||
int stripsize=0;
|
||||
int remainder=0;
|
||||
int dir=0, mul=1;
|
||||
#ifdef USE_OPENMP
|
||||
threads = omp_get_max_threads();
|
||||
int threads = omp_get_max_threads();
|
||||
int parts = threads > 1 ? 2*threads : 1;
|
||||
dir = getStripDirection(nel1,nel2,nel3,parts);
|
||||
mul = dir == 0 ? 1 : el1.size()*(dir == 1 ? 1 : el2.size());
|
||||
int els = dir == 0 ? nel1 : (dir == 1 ? nel2 : nel3);
|
||||
if (stripDir == ANY)
|
||||
stripDir = getStripDirection(nel1,nel2,nel3,parts);
|
||||
int mul = stripDir == U ? 1 : el1.size()*(stripDir == V ? 1 : el2.size());
|
||||
int els = stripDir == U ? nel1 : (stripDir == V ? nel2 : nel3);
|
||||
|
||||
// The minimum strip size (with) depends on the polynomial degree
|
||||
// The minimum strip size (width) depends on the polynomial degree
|
||||
// due to the overlapping support of the splines basis functions
|
||||
int minsize = dir == 0 ? p1 : (dir == 1 ? p2 : p3);
|
||||
int stripsize = 0;
|
||||
int minsize = stripDir == U ? p1 : (stripDir == V ? p2 : p3);
|
||||
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
||||
threads --;
|
||||
parts -= 2;
|
||||
}
|
||||
|
||||
int remainder = 0;
|
||||
if (threads > 1)
|
||||
remainder = els - stripsize*parts;
|
||||
else
|
||||
@ -269,7 +234,6 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
||||
<< "\nstripsize " << stripsize
|
||||
<< "\n# of strips " << els/stripsize
|
||||
<< "\nremainder " << remainder << std::endl;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
nel1 = el1.size();
|
||||
@ -281,7 +245,7 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
||||
{
|
||||
int i, j, t, zspan, offs = 0;
|
||||
IntVec stripsizes[2], startelms[2];
|
||||
const BoolVec& elz = dir == 0 ? el1 : (dir == 1 ? el2 : el3);
|
||||
const BoolVec& elz = stripDir == U ? el1 : (stripDir == V ? el2 : el3);
|
||||
|
||||
stripsizes[0].resize(threads,stripsize);
|
||||
stripsizes[1].resize(threads,stripsize);
|
||||
@ -311,9 +275,9 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
||||
for (i = 0; i < 2; ++i) { // loop over groups
|
||||
tg[i].resize(threads);
|
||||
for (int t = 0; t < threads; ++t) { // loop over threads
|
||||
int maxx = dir == 0 ? stripsizes[i][t] : nel1;
|
||||
int maxy = dir == 1 ? stripsizes[i][t] : nel2;
|
||||
int maxz = dir == 2 ? stripsizes[i][t] : nel3;
|
||||
int maxx = stripDir == U ? stripsizes[i][t] : nel1;
|
||||
int maxy = stripDir == V ? stripsizes[i][t] : nel2;
|
||||
int maxz = stripDir == W ? stripsizes[i][t] : nel3;
|
||||
tg[i][t].reserve(maxx*maxy*maxz);
|
||||
for (int i3 = 0; i3 < maxz; ++i3)
|
||||
for (int i2 = 0; i2 < maxy; ++i2)
|
||||
@ -321,38 +285,34 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
||||
tg[i][t].push_back(startelms[i][t]+i1+nel1*(i2+nel2*i3));
|
||||
}
|
||||
|
||||
#if defined(USE_OPENMP) && SP_DEBUG > 1
|
||||
std::cout << "group " << i << std::endl;
|
||||
for (size_t j = 0; j < tg[i].size(); ++j) {
|
||||
std::cout << "\t thread " << j << " (" << tg[i][j].size() << "): ";
|
||||
for (size_t k = 0; k < tg[i][j].size(); ++k)
|
||||
std::cout << tg[i][j][k] << " ";
|
||||
std::cout << std::endl;
|
||||
}
|
||||
#if SP_DEBUG > 1
|
||||
printGroup(tg[i],i);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
|
||||
{
|
||||
int threads=1;
|
||||
int stripsize=0;
|
||||
int remainder=0;
|
||||
int i, dir=0, mul=1;
|
||||
#ifdef USE_OPENMP
|
||||
threads = omp_get_max_threads();
|
||||
#ifndef USE_OPENMP
|
||||
this->oneGroup(nel1*nel2*nel3);
|
||||
#else
|
||||
int threads = omp_get_max_threads();
|
||||
int parts = threads > 1 ? 2*threads : 1;
|
||||
dir = getStripDirection(nel1,nel2,nel3,parts);
|
||||
mul = dir == 0 ? 1 : nel1*(dir == 1 ? 1 : nel2);
|
||||
int els = dir == 0 ? nel1 : (dir == 1 ? nel2 : nel3);
|
||||
if (stripDir == ANY)
|
||||
stripDir = getStripDirection(nel1,nel2,nel3,parts);
|
||||
int mul = stripDir == U ? 1 : nel1*(stripDir == V ? 1 : nel2);
|
||||
int els = stripDir == U ? nel1 : (stripDir == V ? nel2 : nel3);
|
||||
|
||||
int stripsize = 0;
|
||||
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
||||
threads --;
|
||||
parts -= 2;
|
||||
}
|
||||
|
||||
int remainder = 0;
|
||||
if (threads > 1)
|
||||
remainder = els - stripsize*parts;
|
||||
else
|
||||
@ -364,13 +324,13 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
|
||||
<< "\nstripsize " << stripsize
|
||||
<< "\n# of strips " << els/stripsize
|
||||
<< "\nremainder " << remainder << std::endl;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (threads == 1)
|
||||
this->oneGroup(nel1*nel2*nel3);
|
||||
else
|
||||
{
|
||||
int offs, i;
|
||||
IntVec stripsizes[2];
|
||||
stripsizes[0].resize(threads,stripsize);
|
||||
stripsizes[1].resize(threads,stripsize);
|
||||
@ -378,7 +338,7 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
|
||||
stripsizes[i%2][threads-(i+1)/2]++;
|
||||
|
||||
IntVec startelms[2];
|
||||
for (int offs = i = 0; i < threads; ++i) {
|
||||
for (offs = i = 0; i < threads; ++i) {
|
||||
startelms[0].push_back(offs*mul);
|
||||
offs += stripsizes[0][i];
|
||||
startelms[1].push_back(offs*mul);
|
||||
@ -388,9 +348,9 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
|
||||
for (i = 0; i < 2; ++i) { // loop over groups
|
||||
tg[i].resize(threads);
|
||||
for (int t = 0; t < threads; ++t) { // loop over threads
|
||||
int maxx = dir == 0 ? stripsizes[i][t] : nel1;
|
||||
int maxy = dir == 1 ? stripsizes[i][t] : nel2;
|
||||
int maxz = dir == 2 ? stripsizes[i][t] : nel3;
|
||||
int maxx = stripDir == U ? stripsizes[i][t] : nel1;
|
||||
int maxy = stripDir == V ? stripsizes[i][t] : nel2;
|
||||
int maxz = stripDir == W ? stripsizes[i][t] : nel3;
|
||||
for (int i3 = 0; i3 < maxz; ++i3)
|
||||
for (int i2 = 0; i2 < maxy; ++i2)
|
||||
for (int i1 = 0; i1 < maxx; ++i1)
|
||||
@ -398,20 +358,36 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
|
||||
}
|
||||
|
||||
#if defined(USE_OPENMP) && SP_DEBUG > 1
|
||||
std::cout << "group " << i << std::endl;
|
||||
for (size_t j = 0; j < tg[i].size(); ++j) {
|
||||
std::cout << "\t thread " << j << " (" << tg[i][j].size() << "): ";
|
||||
for (size_t k = 0; k < tg[i][j].size(); ++k)
|
||||
std::cout << tg[i][j][k] << " ";
|
||||
std::cout << std::endl;
|
||||
}
|
||||
printGroup(tg[i],i);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int ThreadGroups::getStripDirection (int nel1, int nel2, int nel3, int parts)
|
||||
ThreadGroups::StripDirection ThreadGroups::getStripDirection (int nel1,
|
||||
int nel2,
|
||||
int parts)
|
||||
{
|
||||
int s1 = nel1 / parts;
|
||||
int s2 = nel2 / parts;
|
||||
int r1 = nel1 - s1*parts;
|
||||
int r2 = nel2 - s2*parts;
|
||||
|
||||
if (r1*nel2 < r2*nel1)
|
||||
return U;
|
||||
else if (r1*nel2 > r2*nel1)
|
||||
return V;
|
||||
|
||||
return nel1 > nel2 ? U : V;
|
||||
}
|
||||
|
||||
|
||||
ThreadGroups::StripDirection ThreadGroups::getStripDirection (int nel1,
|
||||
int nel2,
|
||||
int nel3,
|
||||
int parts)
|
||||
{
|
||||
int s1 = nel1 / parts;
|
||||
int s2 = nel2 / parts;
|
||||
@ -421,27 +397,27 @@ int ThreadGroups::getStripDirection (int nel1, int nel2, int nel3, int parts)
|
||||
int r3 = nel3 - s3*parts;
|
||||
|
||||
if (r1*nel2*nel3 < nel1*r2*nel3 && r1*nel2*nel3 < nel1*nel2*r3)
|
||||
return 0; // strips along x axis
|
||||
return U;
|
||||
else if (nel1*r2*nel3 < r1*nel2*nel3 && nel1*r2*nel3 < nel1*nel2*r3)
|
||||
return 1; // strips along y axis
|
||||
return V;
|
||||
else if (nel1*nel2*r3 < r1*nel2*nel3 && nel1*nel2*r3 < nel1*r2*nel3)
|
||||
return 2; // strips along z axis
|
||||
return W;
|
||||
|
||||
// The number of left-over elements is not smallest in one direction only
|
||||
if (r1*nel2*nel3 > nel1*r2*nel3)
|
||||
return nel2 > nel3 ? 1 : 2;
|
||||
return nel2 > nel3 ? V : W;
|
||||
else if (nel1*r2*nel3 > nel1*nel2*r3)
|
||||
return nel1 > nel3 ? 0 : 2;
|
||||
return nel1 > nel3 ? U : W;
|
||||
else if (nel1*nel2*r3 > r1*nel2*nel3)
|
||||
return nel1 > nel2 ? 0 : 1;
|
||||
return nel1 > nel2 ? U : V;
|
||||
|
||||
// The number of left-over elements is the same in all three directions
|
||||
if (nel1 >= nel2 && nel1 >= nel3)
|
||||
return 0;
|
||||
return U;
|
||||
else if (nel2 >= nel1 && nel2 >= nel3)
|
||||
return 1;
|
||||
return V;
|
||||
else
|
||||
return 2;
|
||||
return W;
|
||||
}
|
||||
|
||||
|
||||
@ -452,3 +428,15 @@ void ThreadGroups::applyMap (const IntVec& map)
|
||||
for (size_t j = 0; j < tg[l][k].size(); ++j)
|
||||
tg[l][k][j] = map[tg[l][k][j]];
|
||||
}
|
||||
|
||||
|
||||
void ThreadGroups::printGroup (const IntMat& group, int g)
|
||||
{
|
||||
std::cout <<"group "<< g;
|
||||
for (size_t t = 0; t < group.size(); t++)
|
||||
{
|
||||
std::cout <<"\n\t thread "<< t <<" ("<< group[t].size() <<"):";
|
||||
for (int e : group[t]) std::cout <<" "<< e;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
@ -29,6 +29,12 @@ class ThreadGroups
|
||||
typedef std::vector<IntVec> IntMat; //!< Element lists for all threads
|
||||
|
||||
public:
|
||||
//! Directions to consider for element stripes.
|
||||
enum StripDirection { U, V, W, ANY };
|
||||
|
||||
//! \brief Default constructor.
|
||||
ThreadGroups(StripDirection dir = ANY) : stripDir(dir) {}
|
||||
|
||||
//! \brief Calculates a 2D thread group partitioning based on strips.
|
||||
//! \param[in] el1 Flags non-zero knot spans in first parameter direction
|
||||
//! \param[in] el2 Flags non-zero knot spans in second parameter direction
|
||||
@ -74,9 +80,17 @@ public:
|
||||
|
||||
protected:
|
||||
//! \brief Calculates the parameter direction of the treading strips in 2D.
|
||||
static int getStripDirection(int nel1, int nel2, int parts);
|
||||
static StripDirection getStripDirection(int nel1, int nel2,
|
||||
int parts);
|
||||
//! \brief Calculates the parameter direction of the treading strips in 3D.
|
||||
static int getStripDirection(int nel1, int nel2, int nel3, int parts);
|
||||
static StripDirection getStripDirection(int nel1, int nel2, int nel3,
|
||||
int parts);
|
||||
|
||||
//! \brief Prints out a threading group definition.
|
||||
static void printGroup(const IntMat& group, int g);
|
||||
|
||||
public:
|
||||
StripDirection stripDir; //!< Actual direction to split elements
|
||||
|
||||
private:
|
||||
IntMat tg[2]; //!< Threading groups (always two, but the second may be empty)
|
||||
|
Loading…
Reference in New Issue
Block a user