Fixed: The profiler can now be used also inside multi-threading loops.
This commit is contained in:
@@ -12,23 +12,26 @@
|
||||
//==============================================================================
|
||||
|
||||
#include "Profiler.h"
|
||||
#include "LinAlgInit.h"
|
||||
#ifdef PARALLEL_PETSC
|
||||
#include "petscsys.h"
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "LinAlgInit.h"
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
|
||||
Profiler* utl::profiler = 0;
|
||||
Profiler* utl::profiler = nullptr;
|
||||
|
||||
|
||||
Profiler::Profiler (const std::string& name) : myName(name), nRunners(0)
|
||||
{
|
||||
#ifdef USE_OPENMP
|
||||
myMTimers.resize(omp_get_max_threads());
|
||||
#endif
|
||||
|
||||
this->start("Total");
|
||||
|
||||
allCPU = allWall = 0.0;
|
||||
@@ -36,6 +39,7 @@ Profiler::Profiler (const std::string& name) : myName(name), nRunners(0)
|
||||
// Update pointer to current profiler (it should only be one at any time)
|
||||
if (utl::profiler) delete utl::profiler;
|
||||
utl::profiler = this;
|
||||
|
||||
LinAlgInit::increfs();
|
||||
}
|
||||
|
||||
@@ -44,6 +48,7 @@ Profiler::~Profiler ()
|
||||
{
|
||||
this->stop("Total");
|
||||
this->report(std::cout);
|
||||
|
||||
LinAlgInit::decrefs();
|
||||
}
|
||||
|
||||
@@ -61,13 +66,28 @@ static double WallTime ()
|
||||
}
|
||||
|
||||
|
||||
//! \brief Returns the current thread ID when in a parallel loop, -1 otherwise.
|
||||
|
||||
static int iThread ()
|
||||
{
|
||||
#ifdef USE_OPENMP
|
||||
if (omp_in_parallel())
|
||||
return omp_get_thread_num();
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
void Profiler::start (const std::string& funcName)
|
||||
{
|
||||
Profile& p = myTimers[funcName];
|
||||
int tID = iThread();
|
||||
Profile& p = tID < 0 ? myTimers[funcName] : myMTimers[tID][funcName];
|
||||
if (p.running) return;
|
||||
|
||||
if (tID < 0)
|
||||
nRunners++;
|
||||
|
||||
p.running = true;
|
||||
nRunners++;
|
||||
p.nCalls++;
|
||||
p.startWall = WallTime();
|
||||
p.startCPU = clock();
|
||||
@@ -78,9 +98,11 @@ void Profiler::stop (const std::string& funcName)
|
||||
{
|
||||
clock_t stopCPU = clock();
|
||||
double stopWall = WallTime();
|
||||
int tID = iThread();
|
||||
|
||||
std::map<std::string,Profile>::iterator it = myTimers.find(funcName);
|
||||
if (it == myTimers.end())
|
||||
ProfileMap& timers = tID < 0 ? myTimers : myMTimers[tID];
|
||||
ProfileMap::iterator it = timers.find(funcName);
|
||||
if (it == timers.end())
|
||||
std::cerr <<" *** No matching timer for "<< funcName << std::endl;
|
||||
else if (it->second.running)
|
||||
{
|
||||
@@ -91,7 +113,7 @@ void Profiler::stop (const std::string& funcName)
|
||||
p.running = false;
|
||||
p.totalCPU += deltaCPU;
|
||||
p.totalWall += deltaWall;
|
||||
if (--nRunners == 1)
|
||||
if (tID < 0 && --nRunners == 1)
|
||||
{
|
||||
// This is a "main" task, accumulate the total time for all main tasks
|
||||
allCPU += deltaCPU;
|
||||
@@ -132,7 +154,7 @@ std::ostream& operator<<(std::ostream& os, const Profiler::Profile& p)
|
||||
else
|
||||
os <<" |";
|
||||
|
||||
return os <<'\n';
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
@@ -141,8 +163,8 @@ void Profiler::report (std::ostream& os) const
|
||||
if (myTimers.empty()) return;
|
||||
|
||||
use_ms = true; // Print mean times in microseconds by default
|
||||
std::map<std::string,Profile>::const_iterator it;
|
||||
for (it = myTimers.begin(); it != myTimers.end(); it++)
|
||||
ProfileMap::const_iterator it;
|
||||
for (it = myTimers.begin(); it != myTimers.end(); ++it)
|
||||
{
|
||||
// Make sure the task has stopped profiling (in case of exceptions)
|
||||
if (it->second.running) const_cast<Profiler*>(this)->stop(it->first);
|
||||
@@ -154,7 +176,7 @@ void Profiler::report (std::ostream& os) const
|
||||
// Find the time for "other" tasks, i.e., the difference between
|
||||
// the measured total time and the sum of all the measured tasks
|
||||
Profile other;
|
||||
std::map<std::string,Profile>::const_iterator tit = myTimers.find("Total");
|
||||
ProfileMap::const_iterator tit = myTimers.find("Total");
|
||||
if (tit != myTimers.end())
|
||||
{
|
||||
if (!tit->second.haveTime()) return; // Nothing to report, run in zero time
|
||||
@@ -162,37 +184,54 @@ void Profiler::report (std::ostream& os) const
|
||||
other.totalWall = tit->second.totalWall - allWall;
|
||||
}
|
||||
|
||||
int myId = -1;
|
||||
#ifdef PARALLEL_PETSC
|
||||
MPI_Comm_rank(PETSC_COMM_WORLD,&myId);
|
||||
#endif
|
||||
|
||||
// Print a table with timing results, all tasks with zero time are ommitted
|
||||
const char* Ms = (use_ms ? "Mean(ms)" : "Mean(s) ");
|
||||
os <<"\n==============================================================="
|
||||
<<"\n=== Profiling results for "<< myName;
|
||||
if (myId >= 0) os <<" on processor "<< myId;
|
||||
#ifdef PARALLEL_PETSC
|
||||
int myPid;
|
||||
MPI_Comm_rank(PETSC_COMM_WORLD,&myPid);
|
||||
os <<" on processor "<< myPid;
|
||||
#endif
|
||||
os <<"\n================================================================="
|
||||
<<"\n | CPU time | Wall time |"
|
||||
<<"\nTask | Total(s) "<<Ms<<"| Total(s) "<<Ms<<"| calls"
|
||||
<<"\n----------------------+--------------------+--------------------+------"
|
||||
<<"\n";
|
||||
<<"\nTask | Total(s) "<<Ms<<"| Total(s) "<<Ms<<"| calls";
|
||||
if (!myMTimers.empty()) os <<" | thread";
|
||||
os <<"\n----------------------+--------------------+--------------------+------";
|
||||
if (!myMTimers.empty()) os <<"-+-------";
|
||||
os << std::endl;
|
||||
os.precision(2);
|
||||
os.flags(std::ios::fixed|std::ios::right);
|
||||
for (it = myTimers.begin(); it != myTimers.end(); it++)
|
||||
for (it = myTimers.begin(); it != myTimers.end(); ++it)
|
||||
if (it != tit && it->second.haveTime())
|
||||
{
|
||||
if (it->first.size() >= 22)
|
||||
os << it->first.substr(0,22) <<'|'<< it->second;
|
||||
os << it->first.substr(0,22);
|
||||
else
|
||||
os << it->first << std::string(22-it->first.size(),' ')
|
||||
<<'|'<< it->second;
|
||||
os << it->first << std::string(22-it->first.size(),' ');
|
||||
os <<'|'<< it->second << std::endl;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < myMTimers.size(); i++)
|
||||
for (it = myMTimers[i].begin(); it != myMTimers[i].end(); ++it)
|
||||
if (it->second.haveTime())
|
||||
{
|
||||
if (it->first.size() >= 22)
|
||||
os << it->first.substr(0,22);
|
||||
else
|
||||
os << it->first << std::string(22-it->first.size(),' ');
|
||||
os <<'|'<< it->second <<" "<< i+1 << std::endl;
|
||||
}
|
||||
|
||||
// Finally, print the "other" and "total" times
|
||||
if (other.haveTime())
|
||||
os <<"Other |"<< other;
|
||||
if (tit != myTimers.end())
|
||||
os <<"----------------------+--------------------+--------------------+------"
|
||||
<<"\nTotal time |"<< tit->second;
|
||||
os <<"======================================================================="
|
||||
{
|
||||
os <<"\n----------------------+--------------------+--------------------+------";
|
||||
if (!myMTimers.empty()) os <<"-+-------";
|
||||
os <<"\nTotal time |"<< tit->second;
|
||||
}
|
||||
os <<"\n================================================================="
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <ctime>
|
||||
|
||||
@@ -77,7 +78,10 @@ private:
|
||||
|
||||
std::string myName; //!< Name of this profiler
|
||||
|
||||
std::map<std::string,Profile> myTimers; //!< The task profiles with names
|
||||
typedef std::map<std::string,Profile> ProfileMap; //!< Map of profilers
|
||||
|
||||
ProfileMap myTimers; //!< The task profiles with names
|
||||
std::vector<ProfileMap> myMTimers; //!< Task profiles with names and thread iD
|
||||
|
||||
double allCPU; //!< Accumulated CPU time from all "main" tasks
|
||||
double allWall; //!< Accumulated wall clock time of all "main" tasks
|
||||
|
||||
Reference in New Issue
Block a user