added: strip along the optimal direction + spread remainder over several threads when possible
git-svn-id: http://svn.sintef.no/trondheim/IFEM/trunk@1445 e10b68d5-8a6e-419e-a041-bce267b0401d
This commit is contained in:
parent
7789611606
commit
6d49692360
@ -295,7 +295,7 @@ size_t utl::find_closest (const std::vector<real>& a, real v)
|
||||
}
|
||||
|
||||
|
||||
void utl::calcThreadGroups(int nel1, int nel2, utl::ThreadGroups& result)
|
||||
void utl::calcThreadGroups(int nel1, int nel2, ThreadGroups& result)
|
||||
{
|
||||
int threads=1;
|
||||
int groups=1;
|
||||
@ -306,17 +306,33 @@ void utl::calcThreadGroups(int nel1, int nel2, utl::ThreadGroups& result)
|
||||
if (threads > 1)
|
||||
groups = 2;
|
||||
|
||||
stripsize = nel1/(groups*threads);
|
||||
if (stripsize < 2) {
|
||||
int dir, els, mul;
|
||||
int s1 = nel1/(groups*threads);
|
||||
int s2 = nel2/(groups*threads);
|
||||
int r1 = nel1-(s1*groups*threads);
|
||||
int r2 = nel2-(s2*groups*threads);
|
||||
if (r1*nel2 < r2*nel1) {
|
||||
stripsize = s1;
|
||||
dir = 0;
|
||||
els = nel1;
|
||||
mul = 1;
|
||||
} else {
|
||||
stripsize = s2;
|
||||
els = nel2;
|
||||
dir = 1;
|
||||
mul = nel1;
|
||||
}
|
||||
|
||||
if (stripsize < 2 && groups > 1) {
|
||||
std::cerr << __FUNCTION__ << ": Warning: too many threads available." << std::endl
|
||||
<< "Reducing to a suitable amount" << std::endl;
|
||||
while (((stripsize = nel1/(groups*threads)) < 2) && threads > 1)
|
||||
while (((stripsize = els/(groups*threads)) < 2) && threads > 1)
|
||||
threads--;
|
||||
if (threads == 1)
|
||||
groups=1;
|
||||
stripsize = nel1/(groups*threads);
|
||||
stripsize = els/(groups*threads);
|
||||
}
|
||||
remainder = nel1-(stripsize*groups*threads);
|
||||
remainder = els-(stripsize*groups*threads);
|
||||
#endif
|
||||
result.resize(groups);
|
||||
|
||||
@ -325,7 +341,7 @@ void utl::calcThreadGroups(int nel1, int nel2, utl::ThreadGroups& result)
|
||||
std::cout << "nel1 " << nel1 << std::endl;
|
||||
std::cout << "nel2 " << nel2 << std::endl;
|
||||
std::cout << "stripsize " << stripsize << std::endl;
|
||||
std::cout << "# of strips " << nel1/stripsize << std::endl;
|
||||
std::cout << "# of strips " << els/stripsize << std::endl;
|
||||
std::cout << "remainder " << remainder << std::endl;
|
||||
#endif
|
||||
|
||||
@ -334,16 +350,36 @@ void utl::calcThreadGroups(int nel1, int nel2, utl::ThreadGroups& result)
|
||||
for (int i=0;i<nel1*nel2;++i)
|
||||
result[0][0].push_back(i);
|
||||
} else {
|
||||
std::vector< std::vector<int> > stripsizes;
|
||||
stripsizes.resize(2);
|
||||
stripsizes[0].resize(threads,stripsize);
|
||||
stripsizes[1].resize(threads,stripsize);
|
||||
int r=0;
|
||||
for (int i=0;i<remainder && r < remainder;++i) {
|
||||
stripsizes[1][threads-1-i]++;
|
||||
r++;
|
||||
if (r < remainder) {
|
||||
stripsizes[0][threads-1-i]++;
|
||||
r++;
|
||||
}
|
||||
}
|
||||
std::vector< std::vector<int> > startelms;
|
||||
startelms.resize(2);
|
||||
int offs=0;
|
||||
for (int i=0;i<threads;++i) {
|
||||
startelms[0].push_back(offs*mul);
|
||||
offs += stripsizes[0][i];
|
||||
startelms[1].push_back(offs*mul);
|
||||
offs += stripsizes[1][i];
|
||||
}
|
||||
for (size_t g=0;g<result.size();++g) { // loop over groups
|
||||
result[g].resize(threads);
|
||||
for (int t=0;t<threads;++t) { // loop over threads
|
||||
size_t startel = g*stripsize+result.size()*t*stripsize;
|
||||
int curstripsize = stripsize;
|
||||
if (t == threads-1 && g == result.size()-1)
|
||||
curstripsize += remainder;
|
||||
for (int i2=0; i2 < nel2; ++i2) { // loop in y direction
|
||||
for (int i1=0;i1<curstripsize; ++i1) {
|
||||
int iEl = startel+i1+i2*nel1;
|
||||
int maxx = dir==0?stripsizes[g][t]:nel1;
|
||||
int maxy = dir==1?stripsizes[g][t]:nel2;
|
||||
for (int i2=0; i2 < maxy; ++i2) { // loop in y direction
|
||||
for (int i1=0;i1<maxx; ++i1) {
|
||||
int iEl = startelms[g][t]+i1+i2*nel1;
|
||||
result[g][t].push_back(iEl);
|
||||
}
|
||||
}
|
||||
@ -376,17 +412,43 @@ void utl::calcThreadGroups(int nel1, int nel2, int nel3, ThreadGroups& result)
|
||||
if (threads > 1)
|
||||
groups = 2;
|
||||
|
||||
stripsize = nel1/(groups*threads);
|
||||
if (stripsize < 2) {
|
||||
int dir, els, mul;
|
||||
int s1 = nel1/(groups*threads);
|
||||
int s2 = nel2/(groups*threads);
|
||||
int s3 = nel3/(groups*threads);
|
||||
int r1 = nel1-(s1*groups*threads);
|
||||
int r2 = nel2-(s2*groups*threads);
|
||||
int r3 = nel3-(s3*groups*threads);
|
||||
if (r1*nel2*nel3 < r2*nel1*nel2 && r1*nel2*nel3 < r3*nel1*nel2 ) {
|
||||
// strips along x axis
|
||||
stripsize = s1;
|
||||
dir = 0;
|
||||
els = nel1;
|
||||
mul = 1;
|
||||
} else if (r2*nel1*nel3 < r1*nel2*nel3 && r2*nel1*nel3 < r3*nel1*nel2 ) {
|
||||
// strips along y axis
|
||||
stripsize = s2;
|
||||
els = nel2;
|
||||
dir = 1;
|
||||
mul = nel1;
|
||||
} else {
|
||||
// strips along z axis
|
||||
stripsize = s3;
|
||||
els = nel3;
|
||||
dir = 2;
|
||||
mul = nel1*nel2;
|
||||
}
|
||||
|
||||
if (stripsize < 2 && groups > 1) {
|
||||
std::cerr << __FUNCTION__ << ": Warning: too many threads available." << std::endl
|
||||
<< "Reducing to a suitable amount" << std::endl;
|
||||
while ((stripsize = nel1/(groups*threads)) < 2 && threads > 1)
|
||||
while (((stripsize = els/(groups*threads)) < 2) && threads > 1)
|
||||
threads--;
|
||||
if (threads == 1)
|
||||
groups=1;
|
||||
stripsize = nel1/(groups*threads);
|
||||
stripsize = els/(groups*threads);
|
||||
}
|
||||
remainder = nel1-(stripsize*groups*threads);
|
||||
remainder = els-(stripsize*groups*threads);
|
||||
#endif
|
||||
result.resize(groups);
|
||||
|
||||
@ -396,22 +458,49 @@ void utl::calcThreadGroups(int nel1, int nel2, int nel3, ThreadGroups& result)
|
||||
std::cout << "nel2 " << nel2 << std::endl;
|
||||
std::cout << "nel3 " << nel3 << std::endl;
|
||||
std::cout << "stripsize " << stripsize << std::endl;
|
||||
std::cout << "# of strips " << (stripsize?nel1/stripsize:0) << std::endl;
|
||||
std::cout << "# of strips " << els/stripsize << std::endl;
|
||||
std::cout << "remainder " << remainder << std::endl;
|
||||
#endif
|
||||
|
||||
for (size_t g=0;g<result.size();++g) { // loop over groups
|
||||
result[g].resize(threads);
|
||||
for (int t=0;t<threads;++t) { // loop over threads
|
||||
size_t startel = g*stripsize+result.size()*t*stripsize;
|
||||
int curstripsize = stripsize;
|
||||
if (t == threads-1 && g == result.size()-1)
|
||||
curstripsize += remainder;
|
||||
for (int i2=0; i2 < nel2; ++i2) { // loop in y direction
|
||||
for (int i3=0; i3 < nel3; ++i3) {
|
||||
for (int i1=0;i1<curstripsize; ++i1) {
|
||||
int iEl = startel+i1+i3*nel1*nel2+i2*nel1;
|
||||
result[g][t].push_back(iEl);
|
||||
if (groups == 1) {
|
||||
result[0].resize(1);
|
||||
for (int i=0;i<nel1*nel2*nel3;++i)
|
||||
result[0][0].push_back(i);
|
||||
} else {
|
||||
std::vector< std::vector<int> > stripsizes;
|
||||
stripsizes.resize(2);
|
||||
stripsizes[0].resize(threads,stripsize);
|
||||
stripsizes[1].resize(threads,stripsize);
|
||||
int r=0;
|
||||
for (int i=0;i<remainder && r < remainder;++i) {
|
||||
stripsizes[1][threads-1-i]++;
|
||||
r++;
|
||||
if (r < remainder) {
|
||||
stripsizes[0][threads-1-i]++;
|
||||
r++;
|
||||
}
|
||||
}
|
||||
std::vector< std::vector<int> > startelms;
|
||||
startelms.resize(2);
|
||||
int offs=0;
|
||||
for (int i=0;i<threads;++i) {
|
||||
startelms[0].push_back(offs*mul);
|
||||
offs += stripsizes[0][i];
|
||||
startelms[1].push_back(offs*mul);
|
||||
offs += stripsizes[1][i];
|
||||
}
|
||||
for (size_t g=0;g<result.size();++g) { // loop over groups
|
||||
result[g].resize(threads);
|
||||
for (int t=0;t<threads;++t) { // loop over threads
|
||||
int maxx = dir==0?stripsizes[g][t]:nel1;
|
||||
int maxy = dir==1?stripsizes[g][t]:nel2;
|
||||
int maxz = dir==2?stripsizes[g][t]:nel3;
|
||||
for (int i3=0; i3 < maxz; ++i3) {
|
||||
for (int i2=0; i2 < maxy; ++i2) { // loop in y direction
|
||||
for (int i1=0; i1< maxx; ++i1) {
|
||||
int iEl = startelms[g][t]+i1+i2*nel1+i3*nel1*nel2;
|
||||
result[g][t].push_back(iEl);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user