[BENCHMARK_APP] Align pc reports, revert format print to previous, excel friendly (#14644)

* [Python] Align pc output in Python benchmark, update sorted output to new headers, print floats with 6-digit precision, fix date format printing to excel-friendly

* [PYTHON/C++] Change precision of raports and pc counters report, simplify code

* [C++] Align C++ benchamrk_app report to Python's by setting the precision value to 3

* [C++] Fix CsvDumper precision

* [C++/PYTHON] Align pcand pcsort outputs, merge C++ printSort methods into one

* Update samples/cpp/common/utils/include/samples/common.hpp

Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>

* Update samples/cpp/common/utils/include/samples/common.hpp

Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>

Co-authored-by: Michal Lukaszewski <michal.lukaszewski@intel.com>
Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>
This commit is contained in:
Piotr Krzemiński
2023-01-11 09:38:37 +01:00
committed by GitHub
parent c7a1468fb7
commit f4ff1cbdf3
5 changed files with 159 additions and 242 deletions

View File

@@ -22,7 +22,7 @@ void StatisticsReport::add_parameters(const Category& category, const Parameters
}
void StatisticsReport::dump() {
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv");
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv", 3);
auto dump_parameters = [&dumper](const Parameters& parameters) {
for (auto& parameter : parameters) {
@@ -88,7 +88,7 @@ void StatisticsReport::dump_performance_counters_request(CsvDumper& dumper, cons
? status_names[(int)layer.status]
: "INVALID_STATUS");
dumper << layer.node_type << layer.exec_type;
dumper << std::to_string(layer.real_time.count() / 1000.0) << std::to_string(layer.cpu_time.count() / 1000.0);
dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0;
total += layer.real_time;
total_cpu += layer.cpu_time;
dumper.endLine();
@@ -135,8 +135,7 @@ void StatisticsReport::dump_sort_performance_counters_request(CsvDumper& dumper,
? status_names[(int)layer.status]
: "INVALID_STATUS");
dumper << layer.node_type << layer.exec_type;
dumper << std::to_string(layer.real_time.count() / 1000.0)
<< std::to_string(layer.cpu_time.count() / 1000.0);
dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0;
dumper << (layer.real_time * 1.0 / total) * 100;
dumper.endLine();
layersize += 1;
@@ -191,7 +190,7 @@ void StatisticsReport::dump_performance_counters(const std::vector<PerformanceCo
slog::info << "Performance counters are empty. No reports are dumped." << slog::endl;
return;
}
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_" + _config.report_type + "_report.csv");
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_" + _config.report_type + "_report.csv", 3);
if (_config.report_type == detailedCntReport) {
for (auto& pc : perfCounts) {
dump_performance_counters_request(dumper, pc);

View File

@@ -1001,57 +1001,78 @@ inline std::string getFullDeviceName(ov::Core& core, std::string device) {
static UNUSED void printPerformanceCounts(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
bool bshowHeader = true,
int precision = 3) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
std::chrono::microseconds totalTimeCpu = std::chrono::microseconds::zero();
// Print performance counts
if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl;
stream << std::endl << "Performance counts:" << std::endl << std::endl;
}
std::ios::fmtflags fmt(std::cout.flags());
stream << std::fixed << std::setprecision(precision);
for (const auto& it : performanceData) {
std::string toPrint(it.node_name);
const int maxLayerName = 30;
if (it.node_name.length() >= maxLayerName) {
toPrint = it.node_name.substr(0, maxLayerName - 5);
toPrint += "...";
}
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(15) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(15) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
stream << std::setw(25) << std::left << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
stream << std::setw(25) << std::left << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
stream << std::endl;
if (it.real_time.count() > 0) {
totalTime += it.real_time;
}
if (it.cpu_time.count() > 0) {
totalTimeCpu += it.cpu_time;
}
std::string toPrint(it.node_name);
const int maxPrintLength = 20;
if (it.node_name.length() >= maxPrintLength) {
toPrint = it.node_name.substr(0, maxPrintLength - 5);
toPrint += "...";
}
stream << std::setw(maxPrintLength) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(21) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(21) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(21) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << "layerType: ";
if (it.node_type.length() >= maxPrintLength) {
stream << std::setw(maxPrintLength) << std::left << it.node_type.substr(0, maxPrintLength - 3) + "..."
<< " ";
} else {
stream << std::setw(maxPrintLength) << std::left << it.node_type << " ";
}
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) << " ";
stream << "realTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.real_time.count() / 1000.0 << " ";
stream << "cpuTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.cpu_time.count() / 1000.0 << " ";
stream << std::endl;
}
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
<< " milliseconds" << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl;
stream << std::setw(25) << std::left << "Total time: " << std::fixed << std::setprecision(3)
<< totalTime.count() / 1000.0 << " milliseconds" << std::endl;
stream << std::setw(25) << std::left << "Total CPU time: " << std::fixed << std::setprecision(3)
<< totalTimeCpu.count() / 1000.0 << " milliseconds" << std::endl;
stream << std::endl;
stream << "Full device name: " << deviceName << std::endl;
stream << std::endl;
std::cout.flags(fmt);
}
static UNUSED void printPerformanceCounts(ov::InferRequest request,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
bool bshowHeader = true,
int precision = 3) {
auto performanceMap = request.get_profiling_info();
printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader);
printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader, precision);
}
static inline std::string double_to_string(const double number) {
@@ -1124,183 +1145,78 @@ static inline void fill_tensor_random(ov::Tensor tensor) {
}
}
static UNUSED void printPerformanceCountsNoSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
// Print performance counts
if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl;
}
std::ios::fmtflags fmt(std::cout.flags());
for (const auto& it : performanceData) {
if (it.real_time.count() > 0) {
totalTime += it.real_time;
}
}
if (totalTime.count() != 0) {
for (const auto& it : performanceData) {
std::string toPrint(it.node_name);
const int maxLayerName = 30;
if (it.node_name.length() >= maxLayerName) {
toPrint = it.node_name.substr(0, maxLayerName - 5);
toPrint += "...";
}
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(15) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(15) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
stream << std::setw(25) << std::left
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
stream << std::setw(25) << std::left
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
std::stringstream opt_proportion_ss;
opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
std::string opt_proportion_str = opt_proportion_ss.str();
if (opt_proportion_str == "0.00") {
opt_proportion_str = "N/A";
}
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
stream << std::endl;
}
}
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
<< " milliseconds" << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl;
std::cout.flags(fmt);
}
static UNUSED bool sort_pc_descend(const ov::ProfilingInfo& profiling1, const ov::ProfilingInfo& profiling2) {
return profiling1.real_time > profiling2.real_time;
}
static UNUSED void printPerformanceCountsDescendSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
static UNUSED void printPerformanceCountsSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
std::string sorttype,
bool bshowHeader = true,
int precision = 3) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
std::chrono::microseconds totalTimeCpu = std::chrono::microseconds::zero();
// Print performance counts
if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl;
stream << std::endl << "Performance counts:" << std::endl << std::endl;
}
std::ios::fmtflags fmt(std::cout.flags());
stream << std::fixed << std::setprecision(precision);
for (const auto& it : performanceData) {
if (it.real_time.count() > 0) {
totalTime += it.real_time;
}
}
if (totalTime.count() != 0) {
// sort perfcounter
std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
for (const auto& it : sortPerfCounts) {
std::string toPrint(it.node_name);
const int maxLayerName = 30;
if (it.node_name.length() >= maxLayerName) {
toPrint = it.node_name.substr(0, maxLayerName - 5);
toPrint += "...";
}
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(15) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(15) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
stream << std::setw(25) << std::left
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
stream << std::setw(25) << std::left
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
std::stringstream opt_proportion_ss;
opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
std::string opt_proportion_str = opt_proportion_ss.str();
if (opt_proportion_str == "0.00") {
opt_proportion_str = "N/A";
}
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
stream << std::endl;
}
}
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
<< " milliseconds" << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl;
std::cout.flags(fmt);
}
static UNUSED void printPerformanceCountsSimpleSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
// Print performance counts
if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl;
}
std::ios::fmtflags fmt(std::cout.flags());
for (const auto& it : performanceData) {
if (it.real_time.count() > 0) {
totalTime += it.real_time;
if (it.cpu_time.count() > 0) {
totalTimeCpu += it.cpu_time;
}
}
if (totalTime.count() != 0) {
// sort perfcounter
std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
if (sorttype == pcSort || sorttype == pcSimpleSort) {
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
}
for (const auto& it : sortPerfCounts) {
if (it.status == ov::ProfilingInfo::Status::EXECUTED) {
if ((sorttype == pcSimpleSort && it.status == ov::ProfilingInfo::Status::EXECUTED) ||
sorttype != pcSimpleSort) {
std::string toPrint(it.node_name);
const int maxLayerName = 30;
const int maxPrintLength = 20;
if (it.node_name.length() >= maxLayerName) {
toPrint = it.node_name.substr(0, maxLayerName - 5);
if (it.node_name.length() >= maxPrintLength) {
toPrint = it.node_name.substr(0, maxPrintLength - 5);
toPrint += "...";
}
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
stream << std::setw(15) << std::left << "EXECUTED ";
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
stream << std::setw(25) << std::left
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
stream << std::setw(25) << std::left
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
stream << std::setw(maxPrintLength) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(21) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(21) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(21) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << "layerType: ";
if (it.node_type.length() >= maxPrintLength) {
stream << std::setw(maxPrintLength) << std::left
<< it.node_type.substr(0, maxPrintLength - 3) + "..."
<< " ";
} else {
stream << std::setw(maxPrintLength) << std::left << it.node_type << " ";
}
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) << " ";
stream << "realTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.real_time.count() / 1000.0 << " ";
stream << "cpuTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.cpu_time.count() / 1000.0 << " ";
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
std::stringstream opt_proportion_ss;
@@ -1310,29 +1226,14 @@ static UNUSED void printPerformanceCountsSimpleSort(std::vector<ov::ProfilingInf
opt_proportion_str = "N/A";
}
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
stream << std::endl;
}
}
}
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
<< " milliseconds" << std::endl;
stream << std::setw(25) << std::left << "Total time: " << totalTime.count() / 1000.0 << " milliseconds"
<< std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl;
std::cout.flags(fmt);
}
static UNUSED void printPerformanceCountsSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
std::string sorttype,
bool bshowHeader = true) {
if (sorttype == pcNoSort) {
printPerformanceCountsNoSort(performanceData, stream, deviceName, bshowHeader);
} else if (sorttype == pcSort) {
printPerformanceCountsDescendSort(performanceData, stream, deviceName, bshowHeader);
} else if (sorttype == pcSimpleSort) {
printPerformanceCountsSimpleSort(performanceData, stream, deviceName, bshowHeader);
}
}

View File

@@ -6,6 +6,7 @@
#include <ctime>
#include <fstream>
#include <iosfwd>
#include <iostream>
#include <sstream>
#include <string>
@@ -35,8 +36,9 @@ public:
* @brief A constructor. Disables dumping in case dump file cannot be created
* @param enabled - True if dumping is enabled by default.
* @param name - name of file to dump to. File won't be created if first parameter is false.
* @param precision - floating point numbers' decimal places to print.
*/
explicit CsvDumper(bool enabled = true, const std::string& name = "") : canDump(enabled) {
explicit CsvDumper(bool enabled = true, const std::string& name = "", const int precision = 3) : canDump(enabled) {
if (!canDump) {
return;
}
@@ -45,6 +47,8 @@ public:
if (!file) {
slog::warn << "Cannot create dump file! Disabling dump." << slog::endl;
canDump = false;
} else {
setPrecision(precision);
}
}
@@ -57,6 +61,18 @@ public:
delimiter = c;
}
/**
* @brief Sets a precision used to print floating point values
* @param precision - Decimal places to print
* @return
*/
void setPrecision(int precision) {
if (canDump) {
file.precision(precision);
file.setf(std::ios::fixed);
}
}
/**
* @brief Overloads operator to organize streaming values to file. Does nothing if dumping is
* disabled Adds delimiter at the end of value provided