Fixed: The profiler can now be used also inside multi-threading loops.

This commit is contained in:
Knut Morten Okstad
2015-12-09 18:17:43 +01:00
parent 4b2cde1154
commit 184b9c31e7
2 changed files with 72 additions and 29 deletions

View File

@@ -12,23 +12,26 @@
//==============================================================================
#include "Profiler.h"
#include "LinAlgInit.h"
#ifdef PARALLEL_PETSC
#include "petscsys.h"
#endif
#include <sys/time.h>
#include "LinAlgInit.h"
#ifdef USE_OPENMP
#include <omp.h>
#endif
Profiler* utl::profiler = 0;
Profiler* utl::profiler = nullptr;
Profiler::Profiler (const std::string& name) : myName(name), nRunners(0)
{
#ifdef USE_OPENMP
myMTimers.resize(omp_get_max_threads());
#endif
this->start("Total");
allCPU = allWall = 0.0;
@@ -36,6 +39,7 @@ Profiler::Profiler (const std::string& name) : myName(name), nRunners(0)
// Update pointer to current profiler (it should only be one at any time)
if (utl::profiler) delete utl::profiler;
utl::profiler = this;
LinAlgInit::increfs();
}
@@ -44,6 +48,7 @@ Profiler::~Profiler ()
{
this->stop("Total");
this->report(std::cout);
LinAlgInit::decrefs();
}
@@ -61,13 +66,28 @@ static double WallTime ()
}
//! \brief Returns the current thread ID when in a parallel loop, -1 otherwise.
static int iThread ()
{
#ifdef USE_OPENMP
if (omp_in_parallel())
return omp_get_thread_num();
#endif
return -1;
}
void Profiler::start (const std::string& funcName)
{
Profile& p = myTimers[funcName];
int tID = iThread();
Profile& p = tID < 0 ? myTimers[funcName] : myMTimers[tID][funcName];
if (p.running) return;
if (tID < 0)
nRunners++;
p.running = true;
nRunners++;
p.nCalls++;
p.startWall = WallTime();
p.startCPU = clock();
@@ -78,9 +98,11 @@ void Profiler::stop (const std::string& funcName)
{
clock_t stopCPU = clock();
double stopWall = WallTime();
int tID = iThread();
std::map<std::string,Profile>::iterator it = myTimers.find(funcName);
if (it == myTimers.end())
ProfileMap& timers = tID < 0 ? myTimers : myMTimers[tID];
ProfileMap::iterator it = timers.find(funcName);
if (it == timers.end())
std::cerr <<" *** No matching timer for "<< funcName << std::endl;
else if (it->second.running)
{
@@ -91,7 +113,7 @@ void Profiler::stop (const std::string& funcName)
p.running = false;
p.totalCPU += deltaCPU;
p.totalWall += deltaWall;
if (--nRunners == 1)
if (tID < 0 && --nRunners == 1)
{
// This is a "main" task, accumulate the total time for all main tasks
allCPU += deltaCPU;
@@ -132,7 +154,7 @@ std::ostream& operator<<(std::ostream& os, const Profiler::Profile& p)
else
os <<" |";
return os <<'\n';
return os;
}
@@ -141,8 +163,8 @@ void Profiler::report (std::ostream& os) const
if (myTimers.empty()) return;
use_ms = true; // Print mean times in microseconds by default
std::map<std::string,Profile>::const_iterator it;
for (it = myTimers.begin(); it != myTimers.end(); it++)
ProfileMap::const_iterator it;
for (it = myTimers.begin(); it != myTimers.end(); ++it)
{
// Make sure the task has stopped profiling (in case of exceptions)
if (it->second.running) const_cast<Profiler*>(this)->stop(it->first);
@@ -154,7 +176,7 @@ void Profiler::report (std::ostream& os) const
// Find the time for "other" tasks, i.e., the difference between
// the measured total time and the sum of all the measured tasks
Profile other;
std::map<std::string,Profile>::const_iterator tit = myTimers.find("Total");
ProfileMap::const_iterator tit = myTimers.find("Total");
if (tit != myTimers.end())
{
if (!tit->second.haveTime()) return; // Nothing to report, run in zero time
@@ -162,37 +184,54 @@ void Profiler::report (std::ostream& os) const
other.totalWall = tit->second.totalWall - allWall;
}
int myId = -1;
#ifdef PARALLEL_PETSC
MPI_Comm_rank(PETSC_COMM_WORLD,&myId);
#endif
// Print a table with timing results, all tasks with zero time are ommitted
const char* Ms = (use_ms ? "Mean(ms)" : "Mean(s) ");
os <<"\n==============================================================="
<<"\n=== Profiling results for "<< myName;
if (myId >= 0) os <<" on processor "<< myId;
#ifdef PARALLEL_PETSC
int myPid;
MPI_Comm_rank(PETSC_COMM_WORLD,&myPid);
os <<" on processor "<< myPid;
#endif
os <<"\n================================================================="
<<"\n | CPU time | Wall time |"
<<"\nTask | Total(s) "<<Ms<<"| Total(s) "<<Ms<<"| calls"
<<"\n----------------------+--------------------+--------------------+------"
<<"\n";
<<"\nTask | Total(s) "<<Ms<<"| Total(s) "<<Ms<<"| calls";
if (!myMTimers.empty()) os <<" | thread";
os <<"\n----------------------+--------------------+--------------------+------";
if (!myMTimers.empty()) os <<"-+-------";
os << std::endl;
os.precision(2);
os.flags(std::ios::fixed|std::ios::right);
for (it = myTimers.begin(); it != myTimers.end(); it++)
for (it = myTimers.begin(); it != myTimers.end(); ++it)
if (it != tit && it->second.haveTime())
{
if (it->first.size() >= 22)
os << it->first.substr(0,22) <<'|'<< it->second;
os << it->first.substr(0,22);
else
os << it->first << std::string(22-it->first.size(),' ')
<<'|'<< it->second;
os << it->first << std::string(22-it->first.size(),' ');
os <<'|'<< it->second << std::endl;
}
for (size_t i = 0; i < myMTimers.size(); i++)
for (it = myMTimers[i].begin(); it != myMTimers[i].end(); ++it)
if (it->second.haveTime())
{
if (it->first.size() >= 22)
os << it->first.substr(0,22);
else
os << it->first << std::string(22-it->first.size(),' ');
os <<'|'<< it->second <<" "<< i+1 << std::endl;
}
// Finally, print the "other" and "total" times
if (other.haveTime())
os <<"Other |"<< other;
if (tit != myTimers.end())
os <<"----------------------+--------------------+--------------------+------"
<<"\nTotal time |"<< tit->second;
os <<"======================================================================="
{
os <<"\n----------------------+--------------------+--------------------+------";
if (!myMTimers.empty()) os <<"-+-------";
os <<"\nTotal time |"<< tit->second;
}
os <<"\n================================================================="
<< std::endl;
}

View File

@@ -16,6 +16,7 @@
#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <ctime>
@@ -77,7 +78,10 @@ private:
std::string myName; //!< Name of this profiler
std::map<std::string,Profile> myTimers; //!< The task profiles with names
typedef std::map<std::string,Profile> ProfileMap; //!< Map of profilers
ProfileMap myTimers; //!< The task profiles with names
std::vector<ProfileMap> myMTimers; //!< Task profiles with names and thread iD
double allCPU; //!< Accumulated CPU time from all "main" tasks
double allWall; //!< Accumulated wall clock time of all "main" tasks