From 6d49692360a8cef3bcf16a5d87fdcd599938c007 Mon Sep 17 00:00:00 2001 From: akva Date: Wed, 1 Feb 2012 13:18:07 +0000 Subject: [PATCH] added: strip along the optimal direction + spread remainder over several threads when possible git-svn-id: http://svn.sintef.no/trondheim/IFEM/trunk@1445 e10b68d5-8a6e-419e-a041-bce267b0401d --- src/Utility/Utilities.C | 153 +++++++++++++++++++++++++++++++--------- 1 file changed, 121 insertions(+), 32 deletions(-) diff --git a/src/Utility/Utilities.C b/src/Utility/Utilities.C index b9f09d92..f12edac2 100644 --- a/src/Utility/Utilities.C +++ b/src/Utility/Utilities.C @@ -295,7 +295,7 @@ size_t utl::find_closest (const std::vector& a, real v) } -void utl::calcThreadGroups(int nel1, int nel2, utl::ThreadGroups& result) +void utl::calcThreadGroups(int nel1, int nel2, ThreadGroups& result) { int threads=1; int groups=1; @@ -306,17 +306,33 @@ void utl::calcThreadGroups(int nel1, int nel2, utl::ThreadGroups& result) if (threads > 1) groups = 2; - stripsize = nel1/(groups*threads); - if (stripsize < 2) { + int dir, els, mul; + int s1 = nel1/(groups*threads); + int s2 = nel2/(groups*threads); + int r1 = nel1-(s1*groups*threads); + int r2 = nel2-(s2*groups*threads); + if (r1*nel2 < r2*nel1) { + stripsize = s1; + dir = 0; + els = nel1; + mul = 1; + } else { + stripsize = s2; + els = nel2; + dir = 1; + mul = nel1; + } + + if (stripsize < 2 && groups > 1) { std::cerr << __FUNCTION__ << ": Warning: too many threads available." << std::endl << "Reducing to a suitable amount" << std::endl; - while (((stripsize = nel1/(groups*threads)) < 2) && threads > 1) + while (((stripsize = els/(groups*threads)) < 2) && threads > 1) threads--; if (threads == 1) groups=1; - stripsize = nel1/(groups*threads); + stripsize = els/(groups*threads); } - remainder = nel1-(stripsize*groups*threads); + remainder = els-(stripsize*groups*threads); #endif result.resize(groups); @@ -325,7 +341,7 @@ void utl::calcThreadGroups(int nel1, int nel2, utl::ThreadGroups& result) std::cout << "nel1 " << nel1 << std::endl; std::cout << "nel2 " << nel2 << std::endl; std::cout << "stripsize " << stripsize << std::endl; - std::cout << "# of strips " << nel1/stripsize << std::endl; + std::cout << "# of strips " << els/stripsize << std::endl; std::cout << "remainder " << remainder << std::endl; #endif @@ -334,16 +350,36 @@ void utl::calcThreadGroups(int nel1, int nel2, utl::ThreadGroups& result) for (int i=0;i > stripsizes; + stripsizes.resize(2); + stripsizes[0].resize(threads,stripsize); + stripsizes[1].resize(threads,stripsize); + int r=0; + for (int i=0;i > startelms; + startelms.resize(2); + int offs=0; + for (int i=0;i 1) groups = 2; - stripsize = nel1/(groups*threads); - if (stripsize < 2) { + int dir, els, mul; + int s1 = nel1/(groups*threads); + int s2 = nel2/(groups*threads); + int s3 = nel3/(groups*threads); + int r1 = nel1-(s1*groups*threads); + int r2 = nel2-(s2*groups*threads); + int r3 = nel3-(s3*groups*threads); + if (r1*nel2*nel3 < r2*nel1*nel2 && r1*nel2*nel3 < r3*nel1*nel2 ) { + // strips along x axis + stripsize = s1; + dir = 0; + els = nel1; + mul = 1; + } else if (r2*nel1*nel3 < r1*nel2*nel3 && r2*nel1*nel3 < r3*nel1*nel2 ) { + // strips along y axis + stripsize = s2; + els = nel2; + dir = 1; + mul = nel1; + } else { + // strips along z axis + stripsize = s3; + els = nel3; + dir = 2; + mul = nel1*nel2; + } + + if (stripsize < 2 && groups > 1) { std::cerr << __FUNCTION__ << ": Warning: too many threads available." << std::endl << "Reducing to a suitable amount" << std::endl; - while ((stripsize = nel1/(groups*threads)) < 2 && threads > 1) + while (((stripsize = els/(groups*threads)) < 2) && threads > 1) threads--; if (threads == 1) groups=1; - stripsize = nel1/(groups*threads); + stripsize = els/(groups*threads); } - remainder = nel1-(stripsize*groups*threads); + remainder = els-(stripsize*groups*threads); #endif result.resize(groups); @@ -396,22 +458,49 @@ void utl::calcThreadGroups(int nel1, int nel2, int nel3, ThreadGroups& result) std::cout << "nel2 " << nel2 << std::endl; std::cout << "nel3 " << nel3 << std::endl; std::cout << "stripsize " << stripsize << std::endl; - std::cout << "# of strips " << (stripsize?nel1/stripsize:0) << std::endl; + std::cout << "# of strips " << els/stripsize << std::endl; std::cout << "remainder " << remainder << std::endl; #endif - for (size_t g=0;g > stripsizes; + stripsizes.resize(2); + stripsizes[0].resize(threads,stripsize); + stripsizes[1].resize(threads,stripsize); + int r=0; + for (int i=0;i > startelms; + startelms.resize(2); + int offs=0; + for (int i=0;i