[BENCHMARK_APP] Align pc reports, revert format print to previous, excel friendly (#14644)

* [Python] Align pc output in Python benchmark, update sorted output to new headers, print floats with 6-digit precision, fix date format printing to excel-friendly

* [PYTHON/C++] Change precision of raports and pc counters report, simplify code

* [C++] Align C++ benchamrk_app report to Python's by setting the precision value to 3

* [C++] Fix CsvDumper precision

* [C++/PYTHON] Align pcand pcsort outputs, merge C++ printSort methods into one

* Update samples/cpp/common/utils/include/samples/common.hpp

Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>

* Update samples/cpp/common/utils/include/samples/common.hpp

Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>

Co-authored-by: Michal Lukaszewski <michal.lukaszewski@intel.com>
Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>
This commit is contained in:
Piotr Krzemiński 2023-01-11 09:38:37 +01:00 committed by GitHub
parent c7a1468fb7
commit f4ff1cbdf3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 159 additions and 242 deletions

View File

@ -22,7 +22,7 @@ void StatisticsReport::add_parameters(const Category& category, const Parameters
} }
void StatisticsReport::dump() { void StatisticsReport::dump() {
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv"); CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv", 3);
auto dump_parameters = [&dumper](const Parameters& parameters) { auto dump_parameters = [&dumper](const Parameters& parameters) {
for (auto& parameter : parameters) { for (auto& parameter : parameters) {
@ -88,7 +88,7 @@ void StatisticsReport::dump_performance_counters_request(CsvDumper& dumper, cons
? status_names[(int)layer.status] ? status_names[(int)layer.status]
: "INVALID_STATUS"); : "INVALID_STATUS");
dumper << layer.node_type << layer.exec_type; dumper << layer.node_type << layer.exec_type;
dumper << std::to_string(layer.real_time.count() / 1000.0) << std::to_string(layer.cpu_time.count() / 1000.0); dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0;
total += layer.real_time; total += layer.real_time;
total_cpu += layer.cpu_time; total_cpu += layer.cpu_time;
dumper.endLine(); dumper.endLine();
@ -135,8 +135,7 @@ void StatisticsReport::dump_sort_performance_counters_request(CsvDumper& dumper,
? status_names[(int)layer.status] ? status_names[(int)layer.status]
: "INVALID_STATUS"); : "INVALID_STATUS");
dumper << layer.node_type << layer.exec_type; dumper << layer.node_type << layer.exec_type;
dumper << std::to_string(layer.real_time.count() / 1000.0) dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0;
<< std::to_string(layer.cpu_time.count() / 1000.0);
dumper << (layer.real_time * 1.0 / total) * 100; dumper << (layer.real_time * 1.0 / total) * 100;
dumper.endLine(); dumper.endLine();
layersize += 1; layersize += 1;
@ -191,7 +190,7 @@ void StatisticsReport::dump_performance_counters(const std::vector<PerformanceCo
slog::info << "Performance counters are empty. No reports are dumped." << slog::endl; slog::info << "Performance counters are empty. No reports are dumped." << slog::endl;
return; return;
} }
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_" + _config.report_type + "_report.csv"); CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_" + _config.report_type + "_report.csv", 3);
if (_config.report_type == detailedCntReport) { if (_config.report_type == detailedCntReport) {
for (auto& pc : perfCounts) { for (auto& pc : perfCounts) {
dump_performance_counters_request(dumper, pc); dump_performance_counters_request(dumper, pc);

View File

@ -1001,57 +1001,78 @@ inline std::string getFullDeviceName(ov::Core& core, std::string device) {
static UNUSED void printPerformanceCounts(std::vector<ov::ProfilingInfo> performanceData, static UNUSED void printPerformanceCounts(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream, std::ostream& stream,
std::string deviceName, std::string deviceName,
bool bshowHeader = true) { bool bshowHeader = true,
int precision = 3) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero(); std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
std::chrono::microseconds totalTimeCpu = std::chrono::microseconds::zero();
// Print performance counts // Print performance counts
if (bshowHeader) { if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl; stream << std::endl << "Performance counts:" << std::endl << std::endl;
} }
std::ios::fmtflags fmt(std::cout.flags()); std::ios::fmtflags fmt(std::cout.flags());
stream << std::fixed << std::setprecision(precision);
for (const auto& it : performanceData) { for (const auto& it : performanceData) {
std::string toPrint(it.node_name);
const int maxLayerName = 30;
if (it.node_name.length() >= maxLayerName) {
toPrint = it.node_name.substr(0, maxLayerName - 5);
toPrint += "...";
}
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(15) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(15) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
stream << std::setw(25) << std::left << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
stream << std::setw(25) << std::left << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
stream << std::endl;
if (it.real_time.count() > 0) { if (it.real_time.count() > 0) {
totalTime += it.real_time; totalTime += it.real_time;
} }
if (it.cpu_time.count() > 0) {
totalTimeCpu += it.cpu_time;
}
std::string toPrint(it.node_name);
const int maxPrintLength = 20;
if (it.node_name.length() >= maxPrintLength) {
toPrint = it.node_name.substr(0, maxPrintLength - 5);
toPrint += "...";
}
stream << std::setw(maxPrintLength) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(21) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(21) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(21) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << "layerType: ";
if (it.node_type.length() >= maxPrintLength) {
stream << std::setw(maxPrintLength) << std::left << it.node_type.substr(0, maxPrintLength - 3) + "..."
<< " ";
} else {
stream << std::setw(maxPrintLength) << std::left << it.node_type << " ";
}
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) << " ";
stream << "realTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.real_time.count() / 1000.0 << " ";
stream << "cpuTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.cpu_time.count() / 1000.0 << " ";
stream << std::endl;
} }
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0) stream << std::setw(25) << std::left << "Total time: " << std::fixed << std::setprecision(3)
<< " milliseconds" << std::endl; << totalTime.count() / 1000.0 << " milliseconds" << std::endl;
std::cout << std::endl; stream << std::setw(25) << std::left << "Total CPU time: " << std::fixed << std::setprecision(3)
std::cout << "Full device name: " << deviceName << std::endl; << totalTimeCpu.count() / 1000.0 << " milliseconds" << std::endl;
std::cout << std::endl; stream << std::endl;
stream << "Full device name: " << deviceName << std::endl;
stream << std::endl;
std::cout.flags(fmt); std::cout.flags(fmt);
} }
static UNUSED void printPerformanceCounts(ov::InferRequest request, static UNUSED void printPerformanceCounts(ov::InferRequest request,
std::ostream& stream, std::ostream& stream,
std::string deviceName, std::string deviceName,
bool bshowHeader = true) { bool bshowHeader = true,
int precision = 3) {
auto performanceMap = request.get_profiling_info(); auto performanceMap = request.get_profiling_info();
printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader); printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader, precision);
} }
static inline std::string double_to_string(const double number) { static inline std::string double_to_string(const double number) {
@ -1124,183 +1145,78 @@ static inline void fill_tensor_random(ov::Tensor tensor) {
} }
} }
static UNUSED void printPerformanceCountsNoSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
// Print performance counts
if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl;
}
std::ios::fmtflags fmt(std::cout.flags());
for (const auto& it : performanceData) {
if (it.real_time.count() > 0) {
totalTime += it.real_time;
}
}
if (totalTime.count() != 0) {
for (const auto& it : performanceData) {
std::string toPrint(it.node_name);
const int maxLayerName = 30;
if (it.node_name.length() >= maxLayerName) {
toPrint = it.node_name.substr(0, maxLayerName - 5);
toPrint += "...";
}
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(15) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(15) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
stream << std::setw(25) << std::left
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
stream << std::setw(25) << std::left
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
std::stringstream opt_proportion_ss;
opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
std::string opt_proportion_str = opt_proportion_ss.str();
if (opt_proportion_str == "0.00") {
opt_proportion_str = "N/A";
}
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
stream << std::endl;
}
}
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
<< " milliseconds" << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl;
std::cout.flags(fmt);
}
static UNUSED bool sort_pc_descend(const ov::ProfilingInfo& profiling1, const ov::ProfilingInfo& profiling2) { static UNUSED bool sort_pc_descend(const ov::ProfilingInfo& profiling1, const ov::ProfilingInfo& profiling2) {
return profiling1.real_time > profiling2.real_time; return profiling1.real_time > profiling2.real_time;
} }
static UNUSED void printPerformanceCountsDescendSort(std::vector<ov::ProfilingInfo> performanceData, static UNUSED void printPerformanceCountsSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream, std::ostream& stream,
std::string deviceName, std::string deviceName,
bool bshowHeader = true) { std::string sorttype,
bool bshowHeader = true,
int precision = 3) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero(); std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
std::chrono::microseconds totalTimeCpu = std::chrono::microseconds::zero();
// Print performance counts // Print performance counts
if (bshowHeader) { if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl; stream << std::endl << "Performance counts:" << std::endl << std::endl;
} }
std::ios::fmtflags fmt(std::cout.flags()); std::ios::fmtflags fmt(std::cout.flags());
stream << std::fixed << std::setprecision(precision);
for (const auto& it : performanceData) { for (const auto& it : performanceData) {
if (it.real_time.count() > 0) { if (it.real_time.count() > 0) {
totalTime += it.real_time; totalTime += it.real_time;
} }
} if (it.cpu_time.count() > 0) {
if (totalTime.count() != 0) { totalTimeCpu += it.cpu_time;
// sort perfcounter
std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
for (const auto& it : sortPerfCounts) {
std::string toPrint(it.node_name);
const int maxLayerName = 30;
if (it.node_name.length() >= maxLayerName) {
toPrint = it.node_name.substr(0, maxLayerName - 5);
toPrint += "...";
}
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(15) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(15) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
stream << std::setw(25) << std::left
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
stream << std::setw(25) << std::left
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
std::stringstream opt_proportion_ss;
opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
std::string opt_proportion_str = opt_proportion_ss.str();
if (opt_proportion_str == "0.00") {
opt_proportion_str = "N/A";
}
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
stream << std::endl;
}
}
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
<< " milliseconds" << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl;
std::cout.flags(fmt);
}
static UNUSED void printPerformanceCountsSimpleSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
// Print performance counts
if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl;
}
std::ios::fmtflags fmt(std::cout.flags());
for (const auto& it : performanceData) {
if (it.real_time.count() > 0) {
totalTime += it.real_time;
} }
} }
if (totalTime.count() != 0) { if (totalTime.count() != 0) {
// sort perfcounter
std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)}; std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend); if (sorttype == pcSort || sorttype == pcSimpleSort) {
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
}
for (const auto& it : sortPerfCounts) { for (const auto& it : sortPerfCounts) {
if (it.status == ov::ProfilingInfo::Status::EXECUTED) { if ((sorttype == pcSimpleSort && it.status == ov::ProfilingInfo::Status::EXECUTED) ||
sorttype != pcSimpleSort) {
std::string toPrint(it.node_name); std::string toPrint(it.node_name);
const int maxLayerName = 30; const int maxPrintLength = 20;
if (it.node_name.length() >= maxLayerName) { if (it.node_name.length() >= maxPrintLength) {
toPrint = it.node_name.substr(0, maxLayerName - 5); toPrint = it.node_name.substr(0, maxPrintLength - 5);
toPrint += "..."; toPrint += "...";
} }
stream << std::setw(maxLayerName) << std::left << toPrint << " "; stream << std::setw(maxPrintLength) << std::left << toPrint << " ";
stream << std::setw(15) << std::left << "EXECUTED "; switch (it.status) {
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " "; case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " "; stream << std::setw(21) << std::left << "EXECUTED ";
stream << std::setw(25) << std::left break;
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " "; case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(25) << std::left stream << std::setw(21) << std::left << "NOT_RUN ";
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " "; break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(21) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << "layerType: ";
if (it.node_type.length() >= maxPrintLength) {
stream << std::setw(maxPrintLength) << std::left
<< it.node_type.substr(0, maxPrintLength - 3) + "..."
<< " ";
} else {
stream << std::setw(maxPrintLength) << std::left << it.node_type << " ";
}
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) << " ";
stream << "realTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.real_time.count() / 1000.0 << " ";
stream << "cpuTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.cpu_time.count() / 1000.0 << " ";
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count(); double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
std::stringstream opt_proportion_ss; std::stringstream opt_proportion_ss;
@ -1310,29 +1226,14 @@ static UNUSED void printPerformanceCountsSimpleSort(std::vector<ov::ProfilingInf
opt_proportion_str = "N/A"; opt_proportion_str = "N/A";
} }
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%"; stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
stream << std::endl; stream << std::endl;
} }
} }
} }
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0) stream << std::setw(25) << std::left << "Total time: " << totalTime.count() / 1000.0 << " milliseconds"
<< " milliseconds" << std::endl; << std::endl;
std::cout << std::endl; std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl; std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl; std::cout << std::endl;
std::cout.flags(fmt); std::cout.flags(fmt);
} }
static UNUSED void printPerformanceCountsSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
std::string sorttype,
bool bshowHeader = true) {
if (sorttype == pcNoSort) {
printPerformanceCountsNoSort(performanceData, stream, deviceName, bshowHeader);
} else if (sorttype == pcSort) {
printPerformanceCountsDescendSort(performanceData, stream, deviceName, bshowHeader);
} else if (sorttype == pcSimpleSort) {
printPerformanceCountsSimpleSort(performanceData, stream, deviceName, bshowHeader);
}
}

View File

@ -6,6 +6,7 @@
#include <ctime> #include <ctime>
#include <fstream> #include <fstream>
#include <iosfwd>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <string> #include <string>
@ -35,8 +36,9 @@ public:
* @brief A constructor. Disables dumping in case dump file cannot be created * @brief A constructor. Disables dumping in case dump file cannot be created
* @param enabled - True if dumping is enabled by default. * @param enabled - True if dumping is enabled by default.
* @param name - name of file to dump to. File won't be created if first parameter is false. * @param name - name of file to dump to. File won't be created if first parameter is false.
* @param precision - floating point numbers' decimal places to print.
*/ */
explicit CsvDumper(bool enabled = true, const std::string& name = "") : canDump(enabled) { explicit CsvDumper(bool enabled = true, const std::string& name = "", const int precision = 3) : canDump(enabled) {
if (!canDump) { if (!canDump) {
return; return;
} }
@ -45,6 +47,8 @@ public:
if (!file) { if (!file) {
slog::warn << "Cannot create dump file! Disabling dump." << slog::endl; slog::warn << "Cannot create dump file! Disabling dump." << slog::endl;
canDump = false; canDump = false;
} else {
setPrecision(precision);
} }
} }
@ -57,6 +61,18 @@ public:
delimiter = c; delimiter = c;
} }
/**
* @brief Sets a precision used to print floating point values
* @param precision - Decimal places to print
* @return
*/
void setPrecision(int precision) {
if (canDump) {
file.precision(precision);
file.setf(std::ios::fixed);
}
}
/** /**
* @brief Overloads operator to organize streaming values to file. Does nothing if dumping is * @brief Overloads operator to organize streaming values to file. Does nothing if dumping is
* disabled Adds delimiter at the end of value provided * disabled Adds delimiter at the end of value provided

View File

@ -65,17 +65,17 @@ class StatisticsReport:
f.write(self.csv_separator.join(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)\n'])) f.write(self.csv_separator.join(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)\n']))
for pi in prof_info: for pi in prof_info:
f.write(self.csv_separator.join([pi.node_name, str(pi.status), pi.node_type, pi.exec_type, f.write(self.csv_separator.join([pi.node_name, str(pi.status), pi.node_type, pi.exec_type,
str((pi.real_time // timedelta(microseconds=1))/1000.0), f"{pi.real_time / timedelta(milliseconds=1):.3f}",
str((pi.cpu_time // timedelta(microseconds=1))/1000.0)])) f"{pi.cpu_time / timedelta(milliseconds=1):.3f}"]))
f.write('\n') f.write('\n')
total += pi.real_time total += pi.real_time
total_cpu += pi.cpu_time total_cpu += pi.cpu_time
f.write(self.csv_separator.join(['Total','','','', f.write(self.csv_separator.join(['Total', '', '', '',
str((total // timedelta(microseconds=1))/1000.0), f"{total / timedelta(milliseconds=1):.3f}",
str((total_cpu // timedelta(microseconds=1))/1000.0)])) f"{total_cpu / timedelta(milliseconds=1):.3f}"]))
f.write('\n\n') f.write('\n\n')
def dump_performance_counters_sorted(self,prof_sorted_info): def dump_performance_counters_sorted(self, prof_sorted_info):
"""Save sorted performance counters into csv file. """Save sorted performance counters into csv file.
""" """
filename = os.path.join(self.config.report_folder, f'benchmark_sorted_report.csv') filename = os.path.join(self.config.report_folder, f'benchmark_sorted_report.csv')
@ -83,16 +83,18 @@ class StatisticsReport:
total_cpu = 0 total_cpu = 0
with open(filename, 'w') as f: with open(filename, 'w') as f:
writer = csv.writer(f) writer = csv.writer(f)
writer.writerow(['LayerName', 'ExecStatus', 'LayerType', 'ExecType', 'RealTime (ms)', 'CPUTime (ms)' , 'Proportion(%) \n']) writer.writerow(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)' , 'proportion (%)\n'])
for tmp_prof in prof_sorted_info: for tmp_prof in prof_sorted_info:
writer.writerow([tmp_prof[0], str(tmp_prof[1]), writer.writerow([tmp_prof[0], str(tmp_prof[1]),
tmp_prof[2], tmp_prof[6], tmp_prof[2], tmp_prof[6],
str(tmp_prof[3]), str(tmp_prof[4]), f"{tmp_prof[3] / 1000:.3f}", # Divide by 1000
f"{tmp_prof[4] / 1000:.3f}",
str("%.2f"%(tmp_prof[5]*100))+"%"]) str("%.2f"%(tmp_prof[5]*100))+"%"])
total += tmp_prof[3] total += tmp_prof[3]
total_cpu += tmp_prof[4] total_cpu += tmp_prof[4]
f.write('\n') f.write('\n')
writer.writerow(["Total time cost: RealTime %.2fms , CPUTime %.2fms"%(total,total_cpu)]) writer.writerow(["Total time: %.2f milliseconds"%(total / 1000)])
writer.writerow(["Total CPU time: %.2f milliseconds"%(total_cpu / 1000)])
f.write('\n\n') f.write('\n\n')
logger.info(f'Sorted performance counters report is stored to {filename}') logger.info(f'Sorted performance counters report is stored to {filename}')

View File

@ -74,11 +74,9 @@ def get_element_type(precision):
def fuse_mean_scale(preproc: PrePostProcessor, app_inputs_info): def fuse_mean_scale(preproc: PrePostProcessor, app_inputs_info):
# TODO: remove warning after 23.3 release # TODO: remove warning after 23.3 release
warned = False; warned = False
warn_msg = 'Mean/scale values are fused into the model. This slows down performance compared to --imean and --iscale which existed before' warn_msg = 'Mean/scale values are fused into the model. This slows down performance compared to --imean and --iscale which existed before'
for input_info in app_inputs_info: for input_info in app_inputs_info:
print(input_info)
print(input_info.mean)
if input_info.mean.size: if input_info.mean.size:
if not warned: if not warned:
logger.warning(warn_msg) logger.warning(warn_msg)
@ -399,51 +397,52 @@ def print_perf_counters_sort(perf_counts_list,sort_flag="sort"):
total_detail_data = sorted(total_detail_data,key=lambda tmp_data:tmp_data[-4],reverse=True) total_detail_data = sorted(total_detail_data,key=lambda tmp_data:tmp_data[-4],reverse=True)
total_detail_data = [tmp_data for tmp_data in total_detail_data if str(tmp_data[1])!="Status.NOT_RUN"] total_detail_data = [tmp_data for tmp_data in total_detail_data if str(tmp_data[1])!="Status.NOT_RUN"]
print_detail_result(total_detail_data) print_detail_result(total_detail_data)
print(f'Total time: {total_time} microseconds') print(f'Total time: {total_time / 1000:.3f} milliseconds')
print(f'Total CPU time: {total_time_cpu} microseconds') print(f'Total CPU time: {total_time_cpu / 1000:.3f} milliseconds')
print(f'Total proportion: {"%.2f"%(round(total_real_time_proportion)*100)} % \n') print(f'Total proportion: {"%.2f"%(round(total_real_time_proportion)*100)} % \n')
return total_detail_data return total_detail_data
def print_detail_result(result_list): def print_detail_result(result_list):
""" Print_perf_counters_sort result """ Print_perf_counters_sort result
""" """
max_layer_name = 30 max_print_length = 20
for tmp_result in result_list: for tmp_result in result_list:
node_name = tmp_result[0] node_name = tmp_result[0]
layerStatus = tmp_result[1] layerStatus = tmp_result[1]
layerType = tmp_result[2] layerType = tmp_result[2]
real_time = tmp_result[3] real_time = tmp_result[3]
cpu_time = tmp_result[4] cpu_time = tmp_result[4]
real_proportion = "%.2f"%(tmp_result[5]*100) real_proportion = "%.2f" % (tmp_result[5] * 100)
if real_proportion == "0.00": if real_proportion == "0.00":
real_proportion = "N/A" real_proportion = "N/A"
execType = tmp_result[6] execType = tmp_result[6]
print(f"{node_name[:max_layer_name - 4] + '...' if (len(node_name) >= max_layer_name) else node_name:<30}" print(f"{node_name[:max_print_length - 4] + '...' if (len(node_name) >= max_print_length) else node_name:<20} "
f"{str(layerStatus):<20}" f"{str(layerStatus):<20} "
f"{'layerType: ' + layerType:<30}" f"layerType: {layerType[:max_print_length - 4] + '...' if (len(layerType) >= max_print_length) else layerType:<20} "
f"{'realTime: ' + str(real_time):<20}" f"execType: {execType:<20} "
f"{'cpu: ' + str(cpu_time):<15}" f"realTime (ms): {real_time / timedelta(milliseconds=1):<10.3f} "
f"{'proportion: '+ str(real_proportion)+'%':<20}" f"cpuTime (ms): {cpu_time / timedelta(milliseconds=1):<10.3f}"
f"{'execType: ' + execType:<20}") f"proportion: {str(real_proportion +'%'):<8}")
def print_perf_counters(perf_counts_list): def print_perf_counters(perf_counts_list):
max_layer_name = 30 max_print_length = 20
for ni in range(len(perf_counts_list)): for ni in range(len(perf_counts_list)):
perf_counts = perf_counts_list[ni] perf_counts = perf_counts_list[ni]
total_time = timedelta() total_time = timedelta()
total_time_cpu = timedelta() total_time_cpu = timedelta()
logger.info(f"Performance counts for {ni}-th infer request") logger.info(f"Performance counts for {ni}-th infer request")
for pi in perf_counts: for pi in perf_counts:
print(f"{pi.node_name[:max_layer_name - 4] + '...' if (len(pi.node_name) >= max_layer_name) else pi.node_name:<30}" print(f"{pi.node_name[:max_print_length - 4] + '...' if (len(pi.node_name) >= max_print_length) else pi.node_name:<20} "
f"{str(pi.status):<15}" f"{str(pi.status):<20} "
f"{'layerType: ' + pi.node_type:<30}" f"layerType: {pi.node_type[:max_print_length - 4] + '...' if (len(pi.node_type) >= max_print_length) else pi.node_type:<20} "
f"{'realTime: ' + str((pi.real_time // timedelta(microseconds=1)) / 1000.0):<20}" f"execType: {pi.exec_type:<20} "
f"{'cpu: ' + str((pi.cpu_time // timedelta(microseconds=1)) / 1000.0):<20}" f"realTime (ms): {pi.real_time / timedelta(milliseconds=1):<10.3f} "
f"{'execType: ' + pi.exec_type:<20}") f"cpuTime (ms): {pi.cpu_time / timedelta(milliseconds=1):<10.3f}")
total_time += pi.real_time total_time += pi.real_time
total_time_cpu += pi.cpu_time total_time_cpu += pi.cpu_time
print(f'Total time: {(total_time // timedelta(microseconds=1)) / 1000.0} seconds') print(f'Total time: {total_time / timedelta(milliseconds=1)} milliseconds')
print(f'Total CPU time: {(total_time_cpu // timedelta(microseconds=1)) / 1000.0} seconds\n') print(f'Total CPU time: {total_time_cpu / timedelta(milliseconds=1)} milliseconds\n')
def get_command_line_arguments(argv): def get_command_line_arguments(argv):