[BENCHMARK_APP] Align pc reports, revert format print to previous, excel friendly (#14644)

* [Python] Align pc output in Python benchmark, update sorted output to new headers, print floats with 6-digit precision, fix date format printing to excel-friendly

* [PYTHON/C++] Change precision of raports and pc counters report, simplify code

* [C++] Align C++ benchamrk_app report to Python's by setting the precision value to 3

* [C++] Fix CsvDumper precision

* [C++/PYTHON] Align pcand pcsort outputs, merge C++ printSort methods into one

* Update samples/cpp/common/utils/include/samples/common.hpp

Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>

* Update samples/cpp/common/utils/include/samples/common.hpp

Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>

Co-authored-by: Michal Lukaszewski <michal.lukaszewski@intel.com>
Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>
This commit is contained in:
Piotr Krzemiński 2023-01-11 09:38:37 +01:00 committed by GitHub
parent c7a1468fb7
commit f4ff1cbdf3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 159 additions and 242 deletions

View File

@ -22,7 +22,7 @@ void StatisticsReport::add_parameters(const Category& category, const Parameters
}
void StatisticsReport::dump() {
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv");
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv", 3);
auto dump_parameters = [&dumper](const Parameters& parameters) {
for (auto& parameter : parameters) {
@ -88,7 +88,7 @@ void StatisticsReport::dump_performance_counters_request(CsvDumper& dumper, cons
? status_names[(int)layer.status]
: "INVALID_STATUS");
dumper << layer.node_type << layer.exec_type;
dumper << std::to_string(layer.real_time.count() / 1000.0) << std::to_string(layer.cpu_time.count() / 1000.0);
dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0;
total += layer.real_time;
total_cpu += layer.cpu_time;
dumper.endLine();
@ -135,8 +135,7 @@ void StatisticsReport::dump_sort_performance_counters_request(CsvDumper& dumper,
? status_names[(int)layer.status]
: "INVALID_STATUS");
dumper << layer.node_type << layer.exec_type;
dumper << std::to_string(layer.real_time.count() / 1000.0)
<< std::to_string(layer.cpu_time.count() / 1000.0);
dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0;
dumper << (layer.real_time * 1.0 / total) * 100;
dumper.endLine();
layersize += 1;
@ -191,7 +190,7 @@ void StatisticsReport::dump_performance_counters(const std::vector<PerformanceCo
slog::info << "Performance counters are empty. No reports are dumped." << slog::endl;
return;
}
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_" + _config.report_type + "_report.csv");
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_" + _config.report_type + "_report.csv", 3);
if (_config.report_type == detailedCntReport) {
for (auto& pc : perfCounts) {
dump_performance_counters_request(dumper, pc);

View File

@ -1001,57 +1001,78 @@ inline std::string getFullDeviceName(ov::Core& core, std::string device) {
static UNUSED void printPerformanceCounts(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
bool bshowHeader = true,
int precision = 3) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
std::chrono::microseconds totalTimeCpu = std::chrono::microseconds::zero();
// Print performance counts
if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl;
stream << std::endl << "Performance counts:" << std::endl << std::endl;
}
std::ios::fmtflags fmt(std::cout.flags());
stream << std::fixed << std::setprecision(precision);
for (const auto& it : performanceData) {
std::string toPrint(it.node_name);
const int maxLayerName = 30;
if (it.node_name.length() >= maxLayerName) {
toPrint = it.node_name.substr(0, maxLayerName - 5);
toPrint += "...";
}
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(15) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(15) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
stream << std::setw(25) << std::left << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
stream << std::setw(25) << std::left << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
stream << std::endl;
if (it.real_time.count() > 0) {
totalTime += it.real_time;
}
if (it.cpu_time.count() > 0) {
totalTimeCpu += it.cpu_time;
}
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
<< " milliseconds" << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl;
std::string toPrint(it.node_name);
const int maxPrintLength = 20;
if (it.node_name.length() >= maxPrintLength) {
toPrint = it.node_name.substr(0, maxPrintLength - 5);
toPrint += "...";
}
stream << std::setw(maxPrintLength) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(21) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(21) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(21) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << "layerType: ";
if (it.node_type.length() >= maxPrintLength) {
stream << std::setw(maxPrintLength) << std::left << it.node_type.substr(0, maxPrintLength - 3) + "..."
<< " ";
} else {
stream << std::setw(maxPrintLength) << std::left << it.node_type << " ";
}
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) << " ";
stream << "realTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.real_time.count() / 1000.0 << " ";
stream << "cpuTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.cpu_time.count() / 1000.0 << " ";
stream << std::endl;
}
stream << std::setw(25) << std::left << "Total time: " << std::fixed << std::setprecision(3)
<< totalTime.count() / 1000.0 << " milliseconds" << std::endl;
stream << std::setw(25) << std::left << "Total CPU time: " << std::fixed << std::setprecision(3)
<< totalTimeCpu.count() / 1000.0 << " milliseconds" << std::endl;
stream << std::endl;
stream << "Full device name: " << deviceName << std::endl;
stream << std::endl;
std::cout.flags(fmt);
}
static UNUSED void printPerformanceCounts(ov::InferRequest request,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
bool bshowHeader = true,
int precision = 3) {
auto performanceMap = request.get_profiling_info();
printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader);
printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader, precision);
}
static inline std::string double_to_string(const double number) {
@ -1124,215 +1145,95 @@ static inline void fill_tensor_random(ov::Tensor tensor) {
}
}
static UNUSED void printPerformanceCountsNoSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
// Print performance counts
if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl;
}
std::ios::fmtflags fmt(std::cout.flags());
for (const auto& it : performanceData) {
if (it.real_time.count() > 0) {
totalTime += it.real_time;
}
}
if (totalTime.count() != 0) {
for (const auto& it : performanceData) {
std::string toPrint(it.node_name);
const int maxLayerName = 30;
if (it.node_name.length() >= maxLayerName) {
toPrint = it.node_name.substr(0, maxLayerName - 5);
toPrint += "...";
}
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(15) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(15) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
stream << std::setw(25) << std::left
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
stream << std::setw(25) << std::left
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
std::stringstream opt_proportion_ss;
opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
std::string opt_proportion_str = opt_proportion_ss.str();
if (opt_proportion_str == "0.00") {
opt_proportion_str = "N/A";
}
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
stream << std::endl;
}
}
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
<< " milliseconds" << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl;
std::cout.flags(fmt);
}
static UNUSED bool sort_pc_descend(const ov::ProfilingInfo& profiling1, const ov::ProfilingInfo& profiling2) {
return profiling1.real_time > profiling2.real_time;
}
static UNUSED void printPerformanceCountsDescendSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
// Print performance counts
if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl;
}
std::ios::fmtflags fmt(std::cout.flags());
for (const auto& it : performanceData) {
if (it.real_time.count() > 0) {
totalTime += it.real_time;
}
}
if (totalTime.count() != 0) {
// sort perfcounter
std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
for (const auto& it : sortPerfCounts) {
std::string toPrint(it.node_name);
const int maxLayerName = 30;
if (it.node_name.length() >= maxLayerName) {
toPrint = it.node_name.substr(0, maxLayerName - 5);
toPrint += "...";
}
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(15) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(15) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
stream << std::setw(25) << std::left
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
stream << std::setw(25) << std::left
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
std::stringstream opt_proportion_ss;
opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
std::string opt_proportion_str = opt_proportion_ss.str();
if (opt_proportion_str == "0.00") {
opt_proportion_str = "N/A";
}
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
stream << std::endl;
}
}
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
<< " milliseconds" << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl;
std::cout.flags(fmt);
}
static UNUSED void printPerformanceCountsSimpleSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
bool bshowHeader = true) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
// Print performance counts
if (bshowHeader) {
stream << std::endl << "performance counts:" << std::endl << std::endl;
}
std::ios::fmtflags fmt(std::cout.flags());
for (const auto& it : performanceData) {
if (it.real_time.count() > 0) {
totalTime += it.real_time;
}
}
if (totalTime.count() != 0) {
// sort perfcounter
std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
for (const auto& it : sortPerfCounts) {
if (it.status == ov::ProfilingInfo::Status::EXECUTED) {
std::string toPrint(it.node_name);
const int maxLayerName = 30;
if (it.node_name.length() >= maxLayerName) {
toPrint = it.node_name.substr(0, maxLayerName - 5);
toPrint += "...";
}
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
stream << std::setw(15) << std::left << "EXECUTED ";
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
stream << std::setw(25) << std::left
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
stream << std::setw(25) << std::left
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
std::stringstream opt_proportion_ss;
opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
std::string opt_proportion_str = opt_proportion_ss.str();
if (opt_proportion_str == "0.00") {
opt_proportion_str = "N/A";
}
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
stream << std::endl;
}
}
}
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
<< " milliseconds" << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl;
std::cout.flags(fmt);
}
static UNUSED void printPerformanceCountsSort(std::vector<ov::ProfilingInfo> performanceData,
std::ostream& stream,
std::string deviceName,
std::string sorttype,
bool bshowHeader = true) {
if (sorttype == pcNoSort) {
printPerformanceCountsNoSort(performanceData, stream, deviceName, bshowHeader);
} else if (sorttype == pcSort) {
printPerformanceCountsDescendSort(performanceData, stream, deviceName, bshowHeader);
} else if (sorttype == pcSimpleSort) {
printPerformanceCountsSimpleSort(performanceData, stream, deviceName, bshowHeader);
bool bshowHeader = true,
int precision = 3) {
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
std::chrono::microseconds totalTimeCpu = std::chrono::microseconds::zero();
// Print performance counts
if (bshowHeader) {
stream << std::endl << "Performance counts:" << std::endl << std::endl;
}
std::ios::fmtflags fmt(std::cout.flags());
stream << std::fixed << std::setprecision(precision);
for (const auto& it : performanceData) {
if (it.real_time.count() > 0) {
totalTime += it.real_time;
}
if (it.cpu_time.count() > 0) {
totalTimeCpu += it.cpu_time;
}
}
if (totalTime.count() != 0) {
std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
if (sorttype == pcSort || sorttype == pcSimpleSort) {
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
}
for (const auto& it : sortPerfCounts) {
if ((sorttype == pcSimpleSort && it.status == ov::ProfilingInfo::Status::EXECUTED) ||
sorttype != pcSimpleSort) {
std::string toPrint(it.node_name);
const int maxPrintLength = 20;
if (it.node_name.length() >= maxPrintLength) {
toPrint = it.node_name.substr(0, maxPrintLength - 5);
toPrint += "...";
}
stream << std::setw(maxPrintLength) << std::left << toPrint << " ";
switch (it.status) {
case ov::ProfilingInfo::Status::EXECUTED:
stream << std::setw(21) << std::left << "EXECUTED ";
break;
case ov::ProfilingInfo::Status::NOT_RUN:
stream << std::setw(21) << std::left << "NOT_RUN ";
break;
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
stream << std::setw(21) << std::left << "OPTIMIZED_OUT ";
break;
}
stream << "layerType: ";
if (it.node_type.length() >= maxPrintLength) {
stream << std::setw(maxPrintLength) << std::left
<< it.node_type.substr(0, maxPrintLength - 3) + "..."
<< " ";
} else {
stream << std::setw(maxPrintLength) << std::left << it.node_type << " ";
}
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) << " ";
stream << "realTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.real_time.count() / 1000.0 << " ";
stream << "cpuTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
<< it.cpu_time.count() / 1000.0 << " ";
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
std::stringstream opt_proportion_ss;
opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
std::string opt_proportion_str = opt_proportion_ss.str();
if (opt_proportion_str == "0.00") {
opt_proportion_str = "N/A";
}
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
stream << std::endl;
}
}
}
stream << std::setw(25) << std::left << "Total time: " << totalTime.count() / 1000.0 << " milliseconds"
<< std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << deviceName << std::endl;
std::cout << std::endl;
std::cout.flags(fmt);
}

View File

@ -6,6 +6,7 @@
#include <ctime>
#include <fstream>
#include <iosfwd>
#include <iostream>
#include <sstream>
#include <string>
@ -35,8 +36,9 @@ public:
* @brief A constructor. Disables dumping in case dump file cannot be created
* @param enabled - True if dumping is enabled by default.
* @param name - name of file to dump to. File won't be created if first parameter is false.
* @param precision - floating point numbers' decimal places to print.
*/
explicit CsvDumper(bool enabled = true, const std::string& name = "") : canDump(enabled) {
explicit CsvDumper(bool enabled = true, const std::string& name = "", const int precision = 3) : canDump(enabled) {
if (!canDump) {
return;
}
@ -45,6 +47,8 @@ public:
if (!file) {
slog::warn << "Cannot create dump file! Disabling dump." << slog::endl;
canDump = false;
} else {
setPrecision(precision);
}
}
@ -57,6 +61,18 @@ public:
delimiter = c;
}
/**
* @brief Sets a precision used to print floating point values
* @param precision - Decimal places to print
* @return
*/
void setPrecision(int precision) {
if (canDump) {
file.precision(precision);
file.setf(std::ios::fixed);
}
}
/**
* @brief Overloads operator to organize streaming values to file. Does nothing if dumping is
* disabled Adds delimiter at the end of value provided

View File

@ -65,14 +65,14 @@ class StatisticsReport:
f.write(self.csv_separator.join(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)\n']))
for pi in prof_info:
f.write(self.csv_separator.join([pi.node_name, str(pi.status), pi.node_type, pi.exec_type,
str((pi.real_time // timedelta(microseconds=1))/1000.0),
str((pi.cpu_time // timedelta(microseconds=1))/1000.0)]))
f"{pi.real_time / timedelta(milliseconds=1):.3f}",
f"{pi.cpu_time / timedelta(milliseconds=1):.3f}"]))
f.write('\n')
total += pi.real_time
total_cpu += pi.cpu_time
f.write(self.csv_separator.join(['Total', '', '', '',
str((total // timedelta(microseconds=1))/1000.0),
str((total_cpu // timedelta(microseconds=1))/1000.0)]))
f"{total / timedelta(milliseconds=1):.3f}",
f"{total_cpu / timedelta(milliseconds=1):.3f}"]))
f.write('\n\n')
def dump_performance_counters_sorted(self, prof_sorted_info):
@ -83,16 +83,18 @@ class StatisticsReport:
total_cpu = 0
with open(filename, 'w') as f:
writer = csv.writer(f)
writer.writerow(['LayerName', 'ExecStatus', 'LayerType', 'ExecType', 'RealTime (ms)', 'CPUTime (ms)' , 'Proportion(%) \n'])
writer.writerow(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)' , 'proportion (%)\n'])
for tmp_prof in prof_sorted_info:
writer.writerow([tmp_prof[0], str(tmp_prof[1]),
tmp_prof[2], tmp_prof[6],
str(tmp_prof[3]), str(tmp_prof[4]),
f"{tmp_prof[3] / 1000:.3f}", # Divide by 1000
f"{tmp_prof[4] / 1000:.3f}",
str("%.2f"%(tmp_prof[5]*100))+"%"])
total += tmp_prof[3]
total_cpu += tmp_prof[4]
f.write('\n')
writer.writerow(["Total time cost: RealTime %.2fms , CPUTime %.2fms"%(total,total_cpu)])
writer.writerow(["Total time: %.2f milliseconds"%(total / 1000)])
writer.writerow(["Total CPU time: %.2f milliseconds"%(total_cpu / 1000)])
f.write('\n\n')
logger.info(f'Sorted performance counters report is stored to {filename}')

View File

@ -74,11 +74,9 @@ def get_element_type(precision):
def fuse_mean_scale(preproc: PrePostProcessor, app_inputs_info):
# TODO: remove warning after 23.3 release
warned = False;
warned = False
warn_msg = 'Mean/scale values are fused into the model. This slows down performance compared to --imean and --iscale which existed before'
for input_info in app_inputs_info:
print(input_info)
print(input_info.mean)
if input_info.mean.size:
if not warned:
logger.warning(warn_msg)
@ -399,15 +397,15 @@ def print_perf_counters_sort(perf_counts_list,sort_flag="sort"):
total_detail_data = sorted(total_detail_data,key=lambda tmp_data:tmp_data[-4],reverse=True)
total_detail_data = [tmp_data for tmp_data in total_detail_data if str(tmp_data[1])!="Status.NOT_RUN"]
print_detail_result(total_detail_data)
print(f'Total time: {total_time} microseconds')
print(f'Total CPU time: {total_time_cpu} microseconds')
print(f'Total time: {total_time / 1000:.3f} milliseconds')
print(f'Total CPU time: {total_time_cpu / 1000:.3f} milliseconds')
print(f'Total proportion: {"%.2f"%(round(total_real_time_proportion)*100)} % \n')
return total_detail_data
def print_detail_result(result_list):
""" Print_perf_counters_sort result
"""
max_layer_name = 30
max_print_length = 20
for tmp_result in result_list:
node_name = tmp_result[0]
layerStatus = tmp_result[1]
@ -418,32 +416,33 @@ def print_detail_result(result_list):
if real_proportion == "0.00":
real_proportion = "N/A"
execType = tmp_result[6]
print(f"{node_name[:max_layer_name - 4] + '...' if (len(node_name) >= max_layer_name) else node_name:<30}"
print(f"{node_name[:max_print_length - 4] + '...' if (len(node_name) >= max_print_length) else node_name:<20} "
f"{str(layerStatus):<20} "
f"{'layerType: ' + layerType:<30}"
f"{'realTime: ' + str(real_time):<20}"
f"{'cpu: ' + str(cpu_time):<15}"
f"{'proportion: '+ str(real_proportion)+'%':<20}"
f"{'execType: ' + execType:<20}")
f"layerType: {layerType[:max_print_length - 4] + '...' if (len(layerType) >= max_print_length) else layerType:<20} "
f"execType: {execType:<20} "
f"realTime (ms): {real_time / timedelta(milliseconds=1):<10.3f} "
f"cpuTime (ms): {cpu_time / timedelta(milliseconds=1):<10.3f}"
f"proportion: {str(real_proportion +'%'):<8}")
def print_perf_counters(perf_counts_list):
max_layer_name = 30
max_print_length = 20
for ni in range(len(perf_counts_list)):
perf_counts = perf_counts_list[ni]
total_time = timedelta()
total_time_cpu = timedelta()
logger.info(f"Performance counts for {ni}-th infer request")
for pi in perf_counts:
print(f"{pi.node_name[:max_layer_name - 4] + '...' if (len(pi.node_name) >= max_layer_name) else pi.node_name:<30}"
f"{str(pi.status):<15}"
f"{'layerType: ' + pi.node_type:<30}"
f"{'realTime: ' + str((pi.real_time // timedelta(microseconds=1)) / 1000.0):<20}"
f"{'cpu: ' + str((pi.cpu_time // timedelta(microseconds=1)) / 1000.0):<20}"
f"{'execType: ' + pi.exec_type:<20}")
print(f"{pi.node_name[:max_print_length - 4] + '...' if (len(pi.node_name) >= max_print_length) else pi.node_name:<20} "
f"{str(pi.status):<20} "
f"layerType: {pi.node_type[:max_print_length - 4] + '...' if (len(pi.node_type) >= max_print_length) else pi.node_type:<20} "
f"execType: {pi.exec_type:<20} "
f"realTime (ms): {pi.real_time / timedelta(milliseconds=1):<10.3f} "
f"cpuTime (ms): {pi.cpu_time / timedelta(milliseconds=1):<10.3f}")
total_time += pi.real_time
total_time_cpu += pi.cpu_time
print(f'Total time: {(total_time // timedelta(microseconds=1)) / 1000.0} seconds')
print(f'Total CPU time: {(total_time_cpu // timedelta(microseconds=1)) / 1000.0} seconds\n')
print(f'Total time: {total_time / timedelta(milliseconds=1)} milliseconds')
print(f'Total CPU time: {total_time_cpu / timedelta(milliseconds=1)} milliseconds\n')
def get_command_line_arguments(argv):