Added: Flag for forced the thread strip direction.
Added: Private method for debug print of group defintions. Changed: Put more code inside the ifdef USE_OPENMP.
This commit is contained in:
parent
598c09c4dd
commit
c8c0e6297c
@ -62,6 +62,41 @@ TEST(TestThreadGroups, Groups2D)
|
|||||||
CHECK_INTMATRICES_EQUAL(groups[0], "src/Utility/Test/refdata/ThreadGroups_2D_1.ref");
|
CHECK_INTMATRICES_EQUAL(groups[0], "src/Utility/Test/refdata/ThreadGroups_2D_1.ref");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
std::vector<bool> b14(4, true);
|
||||||
|
std::vector<bool> b24(4, true);
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; ++i) {
|
||||||
|
ThreadGroups group((ThreadGroups::StripDirection)i);
|
||||||
|
group.calcGroups(b14, b24, 1, 1);
|
||||||
|
#ifdef USE_OPENMP
|
||||||
|
const int ref1[4][4] = {{0,4,8,12}, {2,6,10,14},
|
||||||
|
{0,1,2, 3}, {8,9,10,11}};
|
||||||
|
|
||||||
|
const int ref2[4][4] = {{1,5,9,13}, { 3, 7,11,15},
|
||||||
|
{4,5,6, 7}, {12,13,14,15}};
|
||||||
|
|
||||||
|
ASSERT_EQ(group.size(), 2U);
|
||||||
|
ASSERT_EQ(group[0].size(), 2U);
|
||||||
|
ASSERT_EQ(group[1].size(), 2U);
|
||||||
|
ASSERT_EQ(group[0][0].size(), 4U);
|
||||||
|
ASSERT_EQ(group[0][1].size(), 4U);
|
||||||
|
ASSERT_EQ(group[1][0].size(), 4U);
|
||||||
|
ASSERT_EQ(group[1][1].size(), 4U);
|
||||||
|
for (size_t j = 0; j < 4; ++j) {
|
||||||
|
ASSERT_EQ(group[0][0][j], ref1[i*2][j]);
|
||||||
|
ASSERT_EQ(group[0][1][j], ref1[i*2+1][j]);
|
||||||
|
ASSERT_EQ(group[1][0][j], ref2[i*2][j]);
|
||||||
|
ASSERT_EQ(group[1][1][j], ref2[i*2+1][j]);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
ASSERT_EQ(group.size(), 1U);
|
||||||
|
ASSERT_EQ(group[0].size(), 1U);
|
||||||
|
ASSERT_EQ(group[0][0].size(), 16U);
|
||||||
|
for (int j = 0; j < 16; ++j)
|
||||||
|
ASSERT_EQ(group[0][0][j], j);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef USE_OPENMP
|
#ifdef USE_OPENMP
|
||||||
omp_set_num_threads(3);
|
omp_set_num_threads(3);
|
||||||
#endif
|
#endif
|
||||||
|
@ -13,9 +13,7 @@
|
|||||||
|
|
||||||
#include "ThreadGroups.h"
|
#include "ThreadGroups.h"
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#if SP_DEBUG > 1
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#endif
|
|
||||||
#ifdef USE_OPENMP
|
#ifdef USE_OPENMP
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#endif
|
#endif
|
||||||
@ -33,33 +31,32 @@ void ThreadGroups::oneGroup (size_t nel)
|
|||||||
void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
||||||
int p1, int p2)
|
int p1, int p2)
|
||||||
{
|
{
|
||||||
|
#ifndef USE_OPENMP
|
||||||
|
this->oneGroup(el1.size()*el2.size());
|
||||||
|
#else
|
||||||
// Count the non-zero element in each direction, the zero-span elements
|
// Count the non-zero element in each direction, the zero-span elements
|
||||||
// should not affect the partitioning as they don't involve any work
|
// should not affect the partitioning as they don't involve any work
|
||||||
size_t i, nel1 = 0, nel2 = 0;
|
size_t nel1 = 0, nel2 = 0;
|
||||||
for (i = 0; i < el1.size(); i++)
|
for (bool e : el1) if (e) nel1++;
|
||||||
if (el1[i]) nel1++;
|
for (bool e : el2) if (e) nel2++;
|
||||||
for (i = 0; i < el2.size(); i++)
|
|
||||||
if (el2[i]) nel2++;
|
|
||||||
|
|
||||||
int threads=1;
|
int threads = omp_get_max_threads();
|
||||||
int stripsize=0;
|
|
||||||
int remainder=0;
|
|
||||||
int dir=0, mul=1;
|
|
||||||
#ifdef USE_OPENMP
|
|
||||||
threads = omp_get_max_threads();
|
|
||||||
int parts = threads > 1 ? 2*threads : 1;
|
int parts = threads > 1 ? 2*threads : 1;
|
||||||
dir = getStripDirection(nel1,nel2,parts);
|
if (stripDir == ANY)
|
||||||
mul = dir == 0 ? 1 : el1.size();
|
stripDir = getStripDirection(nel1,nel2,parts);
|
||||||
int els = dir == 0 ? nel1 : nel2;
|
int mul = stripDir == U ? 1 : el1.size();
|
||||||
|
int els = stripDir == U ? nel1 : nel2;
|
||||||
|
|
||||||
// The minimum strip size (with) depends on the polynomial degree
|
// The minimum strip size (width) depends on the polynomial degree
|
||||||
// due to the overlapping support of the splines basis functions
|
// due to the overlapping support of the splines basis functions
|
||||||
int minsize = dir == 0 ? p1 : p2;
|
int stripsize = 0;
|
||||||
|
int minsize = stripDir == U ? p1 : p2;
|
||||||
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
||||||
threads --;
|
threads --;
|
||||||
parts -= 2;
|
parts -= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int remainder = 0;
|
||||||
if (threads > 1)
|
if (threads > 1)
|
||||||
remainder = els - stripsize*parts;
|
remainder = els - stripsize*parts;
|
||||||
else
|
else
|
||||||
@ -71,7 +68,6 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
|||||||
<< "\nstripsize " << stripsize
|
<< "\nstripsize " << stripsize
|
||||||
<< "\n# of strips " << els/stripsize
|
<< "\n# of strips " << els/stripsize
|
||||||
<< "\nremainder " << remainder << std::endl;
|
<< "\nremainder " << remainder << std::endl;
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
nel1 = el1.size();
|
nel1 = el1.size();
|
||||||
@ -82,7 +78,7 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
|||||||
{
|
{
|
||||||
int i, j, t, zspan, offs = 0;
|
int i, j, t, zspan, offs = 0;
|
||||||
IntVec stripsizes[2], startelms[2];
|
IntVec stripsizes[2], startelms[2];
|
||||||
const BoolVec& elz = dir == 0 ? el1 : el2;
|
const BoolVec& elz = stripDir == U ? el1 : el2;
|
||||||
|
|
||||||
stripsizes[0].resize(threads,stripsize);
|
stripsizes[0].resize(threads,stripsize);
|
||||||
stripsizes[1].resize(threads,stripsize);
|
stripsizes[1].resize(threads,stripsize);
|
||||||
@ -112,45 +108,41 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
|||||||
for (i = 0; i < 2; ++i) { // loop over groups
|
for (i = 0; i < 2; ++i) { // loop over groups
|
||||||
tg[i].resize(threads);
|
tg[i].resize(threads);
|
||||||
for (int t = 0; t < threads; ++t) { // loop over threads
|
for (int t = 0; t < threads; ++t) { // loop over threads
|
||||||
int maxx = dir == 0 ? stripsizes[i][t] : nel1;
|
int maxx = stripDir == U ? stripsizes[i][t] : nel1;
|
||||||
int maxy = dir == 1 ? stripsizes[i][t] : nel2;
|
int maxy = stripDir == V ? stripsizes[i][t] : nel2;
|
||||||
tg[i][t].reserve(maxx*maxy);
|
tg[i][t].reserve(maxx*maxy);
|
||||||
for (int i2 = 0; i2 < maxy; ++i2)
|
for (int i2 = 0; i2 < maxy; ++i2)
|
||||||
for (int i1 = 0; i1 < maxx; ++i1)
|
for (int i1 = 0; i1 < maxx; ++i1)
|
||||||
tg[i][t].push_back(startelms[i][t]+i1+i2*nel1);
|
tg[i][t].push_back(startelms[i][t]+i1+i2*nel1);
|
||||||
}
|
}
|
||||||
#if defined(USE_OPENMP) && SP_DEBUG > 1
|
#if SP_DEBUG > 1
|
||||||
std::cout << "group " << i << std::endl;
|
printGroup(tg[i],i);
|
||||||
for (size_t j = 0; j < tg[i].size(); ++j) {
|
|
||||||
std::cout << "\t thread " << j << ": ";
|
|
||||||
for (size_t k = 0; k < tg[i][j].size(); ++k)
|
|
||||||
std::cout << tg[i][j][k] << " ";
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ThreadGroups::calcGroups (int nel1, int nel2, int minsize)
|
void ThreadGroups::calcGroups (int nel1, int nel2, int minsize)
|
||||||
{
|
{
|
||||||
int threads=1;
|
#ifndef USE_OPENMP
|
||||||
int stripsize=0;
|
this->oneGroup(nel1*nel2);
|
||||||
int remainder=0;
|
#else
|
||||||
int dir=0, mul=1;
|
int threads = omp_get_max_threads();
|
||||||
#ifdef USE_OPENMP
|
|
||||||
threads = omp_get_max_threads();
|
|
||||||
int parts = threads > 1 ? 2*threads : 1;
|
int parts = threads > 1 ? 2*threads : 1;
|
||||||
dir = getStripDirection(nel1,nel2,parts);
|
if (stripDir == ANY)
|
||||||
mul = dir == 0 ? 1 : nel1;
|
stripDir = getStripDirection(nel1,nel2,parts);
|
||||||
int els = dir == 0 ? nel1 : nel2;
|
int mul = stripDir == U ? 1 : nel1;
|
||||||
|
int els = stripDir == U ? nel1 : nel2;
|
||||||
|
|
||||||
|
int stripsize = 0;
|
||||||
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
||||||
threads --;
|
threads --;
|
||||||
parts -= 2;
|
parts -= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int remainder = 0;
|
||||||
if (threads > 1)
|
if (threads > 1)
|
||||||
remainder = els - stripsize*parts;
|
remainder = els - stripsize*parts;
|
||||||
else
|
else
|
||||||
@ -162,7 +154,6 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int minsize)
|
|||||||
<< "\nstripsize " << stripsize
|
<< "\nstripsize " << stripsize
|
||||||
<< "\n# of strips " << els/stripsize
|
<< "\n# of strips " << els/stripsize
|
||||||
<< "\nremainder " << remainder << std::endl;
|
<< "\nremainder " << remainder << std::endl;
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (threads == 1)
|
if (threads == 1)
|
||||||
@ -187,77 +178,51 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int minsize)
|
|||||||
for (i = 0; i < 2; ++i) { // loop over groups
|
for (i = 0; i < 2; ++i) { // loop over groups
|
||||||
tg[i].resize(threads);
|
tg[i].resize(threads);
|
||||||
for (int t = 0; t < threads; ++t) { // loop over threads
|
for (int t = 0; t < threads; ++t) { // loop over threads
|
||||||
int maxx = dir == 0 ? stripsizes[i][t] : nel1;
|
int maxx = stripDir == U ? stripsizes[i][t] : nel1;
|
||||||
int maxy = dir == 1 ? stripsizes[i][t] : nel2;
|
int maxy = stripDir == V ? stripsizes[i][t] : nel2;
|
||||||
for (int i2 = 0; i2 < maxy; ++i2)
|
for (int i2 = 0; i2 < maxy; ++i2)
|
||||||
for (int i1 = 0; i1 < maxx; ++i1)
|
for (int i1 = 0; i1 < maxx; ++i1)
|
||||||
tg[i][t].push_back(startelms[i][t]+i1+i2*nel1);
|
tg[i][t].push_back(startelms[i][t]+i1+i2*nel1);
|
||||||
}
|
}
|
||||||
#if defined(USE_OPENMP) && SP_DEBUG > 1
|
#if defined(USE_OPENMP) && SP_DEBUG > 1
|
||||||
std::cout << "group " << i << std::endl;
|
printGroup(tg[i],i);
|
||||||
for (size_t j = 0; j < tg[i].size(); ++j) {
|
|
||||||
std::cout << "\t thread " << j << ": ";
|
|
||||||
for (size_t k = 0; k < tg[i][j].size(); ++k)
|
|
||||||
std::cout << tg[i][j][k] << " ";
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
#endif
|
||||||
|
|
||||||
|
|
||||||
int ThreadGroups::getStripDirection (int nel1, int nel2, int parts)
|
|
||||||
{
|
|
||||||
int s1 = nel1 / parts;
|
|
||||||
int s2 = nel2 / parts;
|
|
||||||
int r1 = nel1 - s1*parts;
|
|
||||||
int r2 = nel2 - s2*parts;
|
|
||||||
|
|
||||||
if (r1*nel2 < r2*nel1)
|
|
||||||
return 0; // strips in u-direction
|
|
||||||
else if (r1*nel2 > r2*nel1)
|
|
||||||
return 1; // strips in v-direction
|
|
||||||
|
|
||||||
if (nel1 > nel2)
|
|
||||||
return 0; // strips in u-direction
|
|
||||||
else
|
|
||||||
return 1; // strips in v-direction
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
||||||
const BoolVec& el3, int p1, int p2, int p3)
|
const BoolVec& el3, int p1, int p2, int p3)
|
||||||
{
|
{
|
||||||
|
#ifndef USE_OPENMP
|
||||||
|
this->oneGroup(el1.size()*el2.size()*el3.size());
|
||||||
|
#else
|
||||||
// Count the non-zero element in each direction, the zero-span elements
|
// Count the non-zero element in each direction, the zero-span elements
|
||||||
// should not affect the partitioning as they don't involve any work
|
// should not affect the partitioning as they don't involve any work
|
||||||
size_t i, nel1 = 0, nel2 = 0, nel3 = 0;
|
size_t nel1 = 0, nel2 = 0, nel3 = 0;
|
||||||
for (i = 0; i < el1.size(); i++)
|
for (bool e : el1) if (e) nel1++;
|
||||||
if (el1[i]) nel1++;
|
for (bool e : el2) if (e) nel2++;
|
||||||
for (i = 0; i < el2.size(); i++)
|
for (bool e : el3) if (e) nel3++;
|
||||||
if (el2[i]) nel2++;
|
|
||||||
for (i = 0; i < el3.size(); i++)
|
|
||||||
if (el3[i]) nel3++;
|
|
||||||
|
|
||||||
int threads=1;
|
int threads = omp_get_max_threads();
|
||||||
int stripsize=0;
|
|
||||||
int remainder=0;
|
|
||||||
int dir=0, mul=1;
|
|
||||||
#ifdef USE_OPENMP
|
|
||||||
threads = omp_get_max_threads();
|
|
||||||
int parts = threads > 1 ? 2*threads : 1;
|
int parts = threads > 1 ? 2*threads : 1;
|
||||||
dir = getStripDirection(nel1,nel2,nel3,parts);
|
if (stripDir == ANY)
|
||||||
mul = dir == 0 ? 1 : el1.size()*(dir == 1 ? 1 : el2.size());
|
stripDir = getStripDirection(nel1,nel2,nel3,parts);
|
||||||
int els = dir == 0 ? nel1 : (dir == 1 ? nel2 : nel3);
|
int mul = stripDir == U ? 1 : el1.size()*(stripDir == V ? 1 : el2.size());
|
||||||
|
int els = stripDir == U ? nel1 : (stripDir == V ? nel2 : nel3);
|
||||||
|
|
||||||
// The minimum strip size (with) depends on the polynomial degree
|
// The minimum strip size (width) depends on the polynomial degree
|
||||||
// due to the overlapping support of the splines basis functions
|
// due to the overlapping support of the splines basis functions
|
||||||
int minsize = dir == 0 ? p1 : (dir == 1 ? p2 : p3);
|
int stripsize = 0;
|
||||||
|
int minsize = stripDir == U ? p1 : (stripDir == V ? p2 : p3);
|
||||||
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
||||||
threads --;
|
threads --;
|
||||||
parts -= 2;
|
parts -= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int remainder = 0;
|
||||||
if (threads > 1)
|
if (threads > 1)
|
||||||
remainder = els - stripsize*parts;
|
remainder = els - stripsize*parts;
|
||||||
else
|
else
|
||||||
@ -269,7 +234,6 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
|||||||
<< "\nstripsize " << stripsize
|
<< "\nstripsize " << stripsize
|
||||||
<< "\n# of strips " << els/stripsize
|
<< "\n# of strips " << els/stripsize
|
||||||
<< "\nremainder " << remainder << std::endl;
|
<< "\nremainder " << remainder << std::endl;
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
nel1 = el1.size();
|
nel1 = el1.size();
|
||||||
@ -281,7 +245,7 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
|||||||
{
|
{
|
||||||
int i, j, t, zspan, offs = 0;
|
int i, j, t, zspan, offs = 0;
|
||||||
IntVec stripsizes[2], startelms[2];
|
IntVec stripsizes[2], startelms[2];
|
||||||
const BoolVec& elz = dir == 0 ? el1 : (dir == 1 ? el2 : el3);
|
const BoolVec& elz = stripDir == U ? el1 : (stripDir == V ? el2 : el3);
|
||||||
|
|
||||||
stripsizes[0].resize(threads,stripsize);
|
stripsizes[0].resize(threads,stripsize);
|
||||||
stripsizes[1].resize(threads,stripsize);
|
stripsizes[1].resize(threads,stripsize);
|
||||||
@ -311,9 +275,9 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
|||||||
for (i = 0; i < 2; ++i) { // loop over groups
|
for (i = 0; i < 2; ++i) { // loop over groups
|
||||||
tg[i].resize(threads);
|
tg[i].resize(threads);
|
||||||
for (int t = 0; t < threads; ++t) { // loop over threads
|
for (int t = 0; t < threads; ++t) { // loop over threads
|
||||||
int maxx = dir == 0 ? stripsizes[i][t] : nel1;
|
int maxx = stripDir == U ? stripsizes[i][t] : nel1;
|
||||||
int maxy = dir == 1 ? stripsizes[i][t] : nel2;
|
int maxy = stripDir == V ? stripsizes[i][t] : nel2;
|
||||||
int maxz = dir == 2 ? stripsizes[i][t] : nel3;
|
int maxz = stripDir == W ? stripsizes[i][t] : nel3;
|
||||||
tg[i][t].reserve(maxx*maxy*maxz);
|
tg[i][t].reserve(maxx*maxy*maxz);
|
||||||
for (int i3 = 0; i3 < maxz; ++i3)
|
for (int i3 = 0; i3 < maxz; ++i3)
|
||||||
for (int i2 = 0; i2 < maxy; ++i2)
|
for (int i2 = 0; i2 < maxy; ++i2)
|
||||||
@ -321,38 +285,34 @@ void ThreadGroups::calcGroups (const BoolVec& el1, const BoolVec& el2,
|
|||||||
tg[i][t].push_back(startelms[i][t]+i1+nel1*(i2+nel2*i3));
|
tg[i][t].push_back(startelms[i][t]+i1+nel1*(i2+nel2*i3));
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(USE_OPENMP) && SP_DEBUG > 1
|
#if SP_DEBUG > 1
|
||||||
std::cout << "group " << i << std::endl;
|
printGroup(tg[i],i);
|
||||||
for (size_t j = 0; j < tg[i].size(); ++j) {
|
|
||||||
std::cout << "\t thread " << j << " (" << tg[i][j].size() << "): ";
|
|
||||||
for (size_t k = 0; k < tg[i][j].size(); ++k)
|
|
||||||
std::cout << tg[i][j][k] << " ";
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
|
void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
|
||||||
{
|
{
|
||||||
int threads=1;
|
#ifndef USE_OPENMP
|
||||||
int stripsize=0;
|
this->oneGroup(nel1*nel2*nel3);
|
||||||
int remainder=0;
|
#else
|
||||||
int i, dir=0, mul=1;
|
int threads = omp_get_max_threads();
|
||||||
#ifdef USE_OPENMP
|
|
||||||
threads = omp_get_max_threads();
|
|
||||||
int parts = threads > 1 ? 2*threads : 1;
|
int parts = threads > 1 ? 2*threads : 1;
|
||||||
dir = getStripDirection(nel1,nel2,nel3,parts);
|
if (stripDir == ANY)
|
||||||
mul = dir == 0 ? 1 : nel1*(dir == 1 ? 1 : nel2);
|
stripDir = getStripDirection(nel1,nel2,nel3,parts);
|
||||||
int els = dir == 0 ? nel1 : (dir == 1 ? nel2 : nel3);
|
int mul = stripDir == U ? 1 : nel1*(stripDir == V ? 1 : nel2);
|
||||||
|
int els = stripDir == U ? nel1 : (stripDir == V ? nel2 : nel3);
|
||||||
|
|
||||||
|
int stripsize = 0;
|
||||||
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
while (threads > 1 && (stripsize = els/parts) < minsize) {
|
||||||
threads --;
|
threads --;
|
||||||
parts -= 2;
|
parts -= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int remainder = 0;
|
||||||
if (threads > 1)
|
if (threads > 1)
|
||||||
remainder = els - stripsize*parts;
|
remainder = els - stripsize*parts;
|
||||||
else
|
else
|
||||||
@ -364,13 +324,13 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
|
|||||||
<< "\nstripsize " << stripsize
|
<< "\nstripsize " << stripsize
|
||||||
<< "\n# of strips " << els/stripsize
|
<< "\n# of strips " << els/stripsize
|
||||||
<< "\nremainder " << remainder << std::endl;
|
<< "\nremainder " << remainder << std::endl;
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (threads == 1)
|
if (threads == 1)
|
||||||
this->oneGroup(nel1*nel2*nel3);
|
this->oneGroup(nel1*nel2*nel3);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
int offs, i;
|
||||||
IntVec stripsizes[2];
|
IntVec stripsizes[2];
|
||||||
stripsizes[0].resize(threads,stripsize);
|
stripsizes[0].resize(threads,stripsize);
|
||||||
stripsizes[1].resize(threads,stripsize);
|
stripsizes[1].resize(threads,stripsize);
|
||||||
@ -378,7 +338,7 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
|
|||||||
stripsizes[i%2][threads-(i+1)/2]++;
|
stripsizes[i%2][threads-(i+1)/2]++;
|
||||||
|
|
||||||
IntVec startelms[2];
|
IntVec startelms[2];
|
||||||
for (int offs = i = 0; i < threads; ++i) {
|
for (offs = i = 0; i < threads; ++i) {
|
||||||
startelms[0].push_back(offs*mul);
|
startelms[0].push_back(offs*mul);
|
||||||
offs += stripsizes[0][i];
|
offs += stripsizes[0][i];
|
||||||
startelms[1].push_back(offs*mul);
|
startelms[1].push_back(offs*mul);
|
||||||
@ -388,9 +348,9 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
|
|||||||
for (i = 0; i < 2; ++i) { // loop over groups
|
for (i = 0; i < 2; ++i) { // loop over groups
|
||||||
tg[i].resize(threads);
|
tg[i].resize(threads);
|
||||||
for (int t = 0; t < threads; ++t) { // loop over threads
|
for (int t = 0; t < threads; ++t) { // loop over threads
|
||||||
int maxx = dir == 0 ? stripsizes[i][t] : nel1;
|
int maxx = stripDir == U ? stripsizes[i][t] : nel1;
|
||||||
int maxy = dir == 1 ? stripsizes[i][t] : nel2;
|
int maxy = stripDir == V ? stripsizes[i][t] : nel2;
|
||||||
int maxz = dir == 2 ? stripsizes[i][t] : nel3;
|
int maxz = stripDir == W ? stripsizes[i][t] : nel3;
|
||||||
for (int i3 = 0; i3 < maxz; ++i3)
|
for (int i3 = 0; i3 < maxz; ++i3)
|
||||||
for (int i2 = 0; i2 < maxy; ++i2)
|
for (int i2 = 0; i2 < maxy; ++i2)
|
||||||
for (int i1 = 0; i1 < maxx; ++i1)
|
for (int i1 = 0; i1 < maxx; ++i1)
|
||||||
@ -398,20 +358,36 @@ void ThreadGroups::calcGroups (int nel1, int nel2, int nel3, int minsize)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(USE_OPENMP) && SP_DEBUG > 1
|
#if defined(USE_OPENMP) && SP_DEBUG > 1
|
||||||
std::cout << "group " << i << std::endl;
|
printGroup(tg[i],i);
|
||||||
for (size_t j = 0; j < tg[i].size(); ++j) {
|
|
||||||
std::cout << "\t thread " << j << " (" << tg[i][j].size() << "): ";
|
|
||||||
for (size_t k = 0; k < tg[i][j].size(); ++k)
|
|
||||||
std::cout << tg[i][j][k] << " ";
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int ThreadGroups::getStripDirection (int nel1, int nel2, int nel3, int parts)
|
ThreadGroups::StripDirection ThreadGroups::getStripDirection (int nel1,
|
||||||
|
int nel2,
|
||||||
|
int parts)
|
||||||
|
{
|
||||||
|
int s1 = nel1 / parts;
|
||||||
|
int s2 = nel2 / parts;
|
||||||
|
int r1 = nel1 - s1*parts;
|
||||||
|
int r2 = nel2 - s2*parts;
|
||||||
|
|
||||||
|
if (r1*nel2 < r2*nel1)
|
||||||
|
return U;
|
||||||
|
else if (r1*nel2 > r2*nel1)
|
||||||
|
return V;
|
||||||
|
|
||||||
|
return nel1 > nel2 ? U : V;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ThreadGroups::StripDirection ThreadGroups::getStripDirection (int nel1,
|
||||||
|
int nel2,
|
||||||
|
int nel3,
|
||||||
|
int parts)
|
||||||
{
|
{
|
||||||
int s1 = nel1 / parts;
|
int s1 = nel1 / parts;
|
||||||
int s2 = nel2 / parts;
|
int s2 = nel2 / parts;
|
||||||
@ -421,27 +397,27 @@ int ThreadGroups::getStripDirection (int nel1, int nel2, int nel3, int parts)
|
|||||||
int r3 = nel3 - s3*parts;
|
int r3 = nel3 - s3*parts;
|
||||||
|
|
||||||
if (r1*nel2*nel3 < nel1*r2*nel3 && r1*nel2*nel3 < nel1*nel2*r3)
|
if (r1*nel2*nel3 < nel1*r2*nel3 && r1*nel2*nel3 < nel1*nel2*r3)
|
||||||
return 0; // strips along x axis
|
return U;
|
||||||
else if (nel1*r2*nel3 < r1*nel2*nel3 && nel1*r2*nel3 < nel1*nel2*r3)
|
else if (nel1*r2*nel3 < r1*nel2*nel3 && nel1*r2*nel3 < nel1*nel2*r3)
|
||||||
return 1; // strips along y axis
|
return V;
|
||||||
else if (nel1*nel2*r3 < r1*nel2*nel3 && nel1*nel2*r3 < nel1*r2*nel3)
|
else if (nel1*nel2*r3 < r1*nel2*nel3 && nel1*nel2*r3 < nel1*r2*nel3)
|
||||||
return 2; // strips along z axis
|
return W;
|
||||||
|
|
||||||
// The number of left-over elements is not smallest in one direction only
|
// The number of left-over elements is not smallest in one direction only
|
||||||
if (r1*nel2*nel3 > nel1*r2*nel3)
|
if (r1*nel2*nel3 > nel1*r2*nel3)
|
||||||
return nel2 > nel3 ? 1 : 2;
|
return nel2 > nel3 ? V : W;
|
||||||
else if (nel1*r2*nel3 > nel1*nel2*r3)
|
else if (nel1*r2*nel3 > nel1*nel2*r3)
|
||||||
return nel1 > nel3 ? 0 : 2;
|
return nel1 > nel3 ? U : W;
|
||||||
else if (nel1*nel2*r3 > r1*nel2*nel3)
|
else if (nel1*nel2*r3 > r1*nel2*nel3)
|
||||||
return nel1 > nel2 ? 0 : 1;
|
return nel1 > nel2 ? U : V;
|
||||||
|
|
||||||
// The number of left-over elements is the same in all three directions
|
// The number of left-over elements is the same in all three directions
|
||||||
if (nel1 >= nel2 && nel1 >= nel3)
|
if (nel1 >= nel2 && nel1 >= nel3)
|
||||||
return 0;
|
return U;
|
||||||
else if (nel2 >= nel1 && nel2 >= nel3)
|
else if (nel2 >= nel1 && nel2 >= nel3)
|
||||||
return 1;
|
return V;
|
||||||
else
|
else
|
||||||
return 2;
|
return W;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -452,3 +428,15 @@ void ThreadGroups::applyMap (const IntVec& map)
|
|||||||
for (size_t j = 0; j < tg[l][k].size(); ++j)
|
for (size_t j = 0; j < tg[l][k].size(); ++j)
|
||||||
tg[l][k][j] = map[tg[l][k][j]];
|
tg[l][k][j] = map[tg[l][k][j]];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ThreadGroups::printGroup (const IntMat& group, int g)
|
||||||
|
{
|
||||||
|
std::cout <<"group "<< g;
|
||||||
|
for (size_t t = 0; t < group.size(); t++)
|
||||||
|
{
|
||||||
|
std::cout <<"\n\t thread "<< t <<" ("<< group[t].size() <<"):";
|
||||||
|
for (int e : group[t]) std::cout <<" "<< e;
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
@ -29,6 +29,12 @@ class ThreadGroups
|
|||||||
typedef std::vector<IntVec> IntMat; //!< Element lists for all threads
|
typedef std::vector<IntVec> IntMat; //!< Element lists for all threads
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
//! Directions to consider for element stripes.
|
||||||
|
enum StripDirection { U, V, W, ANY };
|
||||||
|
|
||||||
|
//! \brief Default constructor.
|
||||||
|
ThreadGroups(StripDirection dir = ANY) : stripDir(dir) {}
|
||||||
|
|
||||||
//! \brief Calculates a 2D thread group partitioning based on strips.
|
//! \brief Calculates a 2D thread group partitioning based on strips.
|
||||||
//! \param[in] el1 Flags non-zero knot spans in first parameter direction
|
//! \param[in] el1 Flags non-zero knot spans in first parameter direction
|
||||||
//! \param[in] el2 Flags non-zero knot spans in second parameter direction
|
//! \param[in] el2 Flags non-zero knot spans in second parameter direction
|
||||||
@ -74,9 +80,17 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
//! \brief Calculates the parameter direction of the treading strips in 2D.
|
//! \brief Calculates the parameter direction of the treading strips in 2D.
|
||||||
static int getStripDirection(int nel1, int nel2, int parts);
|
static StripDirection getStripDirection(int nel1, int nel2,
|
||||||
|
int parts);
|
||||||
//! \brief Calculates the parameter direction of the treading strips in 3D.
|
//! \brief Calculates the parameter direction of the treading strips in 3D.
|
||||||
static int getStripDirection(int nel1, int nel2, int nel3, int parts);
|
static StripDirection getStripDirection(int nel1, int nel2, int nel3,
|
||||||
|
int parts);
|
||||||
|
|
||||||
|
//! \brief Prints out a threading group definition.
|
||||||
|
static void printGroup(const IntMat& group, int g);
|
||||||
|
|
||||||
|
public:
|
||||||
|
StripDirection stripDir; //!< Actual direction to split elements
|
||||||
|
|
||||||
private:
|
private:
|
||||||
IntMat tg[2]; //!< Threading groups (always two, but the second may be empty)
|
IntMat tg[2]; //!< Threading groups (always two, but the second may be empty)
|
||||||
|
Loading…
Reference in New Issue
Block a user