From f4ff1cbdf3a10448f79f543f483edbd6a135e418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Krzemi=C5=84ski?= Date: Wed, 11 Jan 2023 09:38:37 +0100 Subject: [PATCH] [BENCHMARK_APP] Align pc reports, revert format print to previous, excel friendly (#14644) * [Python] Align pc output in Python benchmark, update sorted output to new headers, print floats with 6-digit precision, fix date format printing to excel-friendly * [PYTHON/C++] Change precision of raports and pc counters report, simplify code * [C++] Align C++ benchamrk_app report to Python's by setting the precision value to 3 * [C++] Fix CsvDumper precision * [C++/PYTHON] Align pcand pcsort outputs, merge C++ printSort methods into one * Update samples/cpp/common/utils/include/samples/common.hpp Co-authored-by: Zlobin Vladimir * Update samples/cpp/common/utils/include/samples/common.hpp Co-authored-by: Zlobin Vladimir Co-authored-by: Michal Lukaszewski Co-authored-by: Zlobin Vladimir --- .../cpp/benchmark_app/statistics_report.cpp | 9 +- .../common/utils/include/samples/common.hpp | 307 ++++++------------ .../utils/include/samples/csv_dumper.hpp | 18 +- .../benchmark/utils/statistics_report.py | 22 +- .../openvino/tools/benchmark/utils/utils.py | 45 ++- 5 files changed, 159 insertions(+), 242 deletions(-) diff --git a/samples/cpp/benchmark_app/statistics_report.cpp b/samples/cpp/benchmark_app/statistics_report.cpp index 590008e6ab1..369348ad838 100644 --- a/samples/cpp/benchmark_app/statistics_report.cpp +++ b/samples/cpp/benchmark_app/statistics_report.cpp @@ -22,7 +22,7 @@ void StatisticsReport::add_parameters(const Category& category, const Parameters } void StatisticsReport::dump() { - CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv"); + CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv", 3); auto dump_parameters = [&dumper](const Parameters& parameters) { for (auto& parameter : parameters) { @@ -88,7 +88,7 @@ void StatisticsReport::dump_performance_counters_request(CsvDumper& dumper, cons ? status_names[(int)layer.status] : "INVALID_STATUS"); dumper << layer.node_type << layer.exec_type; - dumper << std::to_string(layer.real_time.count() / 1000.0) << std::to_string(layer.cpu_time.count() / 1000.0); + dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0; total += layer.real_time; total_cpu += layer.cpu_time; dumper.endLine(); @@ -135,8 +135,7 @@ void StatisticsReport::dump_sort_performance_counters_request(CsvDumper& dumper, ? status_names[(int)layer.status] : "INVALID_STATUS"); dumper << layer.node_type << layer.exec_type; - dumper << std::to_string(layer.real_time.count() / 1000.0) - << std::to_string(layer.cpu_time.count() / 1000.0); + dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0; dumper << (layer.real_time * 1.0 / total) * 100; dumper.endLine(); layersize += 1; @@ -191,7 +190,7 @@ void StatisticsReport::dump_performance_counters(const std::vector performanceData, std::ostream& stream, std::string deviceName, - bool bshowHeader = true) { + bool bshowHeader = true, + int precision = 3) { std::chrono::microseconds totalTime = std::chrono::microseconds::zero(); + std::chrono::microseconds totalTimeCpu = std::chrono::microseconds::zero(); // Print performance counts if (bshowHeader) { - stream << std::endl << "performance counts:" << std::endl << std::endl; + stream << std::endl << "Performance counts:" << std::endl << std::endl; } std::ios::fmtflags fmt(std::cout.flags()); + stream << std::fixed << std::setprecision(precision); + for (const auto& it : performanceData) { - std::string toPrint(it.node_name); - const int maxLayerName = 30; - - if (it.node_name.length() >= maxLayerName) { - toPrint = it.node_name.substr(0, maxLayerName - 5); - toPrint += "..."; - } - - stream << std::setw(maxLayerName) << std::left << toPrint << " "; - switch (it.status) { - case ov::ProfilingInfo::Status::EXECUTED: - stream << std::setw(15) << std::left << "EXECUTED "; - break; - case ov::ProfilingInfo::Status::NOT_RUN: - stream << std::setw(15) << std::left << "NOT_RUN "; - break; - case ov::ProfilingInfo::Status::OPTIMIZED_OUT: - stream << std::setw(15) << std::left << "OPTIMIZED_OUT "; - break; - } - stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " "; - stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " "; - stream << std::setw(25) << std::left << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " "; - stream << std::setw(25) << std::left << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " "; - stream << std::endl; if (it.real_time.count() > 0) { totalTime += it.real_time; } + if (it.cpu_time.count() > 0) { + totalTimeCpu += it.cpu_time; + } + + std::string toPrint(it.node_name); + const int maxPrintLength = 20; + + if (it.node_name.length() >= maxPrintLength) { + toPrint = it.node_name.substr(0, maxPrintLength - 5); + toPrint += "..."; + } + + stream << std::setw(maxPrintLength) << std::left << toPrint << " "; + switch (it.status) { + case ov::ProfilingInfo::Status::EXECUTED: + stream << std::setw(21) << std::left << "EXECUTED "; + break; + case ov::ProfilingInfo::Status::NOT_RUN: + stream << std::setw(21) << std::left << "NOT_RUN "; + break; + case ov::ProfilingInfo::Status::OPTIMIZED_OUT: + stream << std::setw(21) << std::left << "OPTIMIZED_OUT "; + break; + } + + stream << "layerType: "; + if (it.node_type.length() >= maxPrintLength) { + stream << std::setw(maxPrintLength) << std::left << it.node_type.substr(0, maxPrintLength - 3) + "..." + << " "; + } else { + stream << std::setw(maxPrintLength) << std::left << it.node_type << " "; + } + + stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) << " "; + stream << "realTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3) + << it.real_time.count() / 1000.0 << " "; + stream << "cpuTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3) + << it.cpu_time.count() / 1000.0 << " "; + stream << std::endl; } - stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0) - << " milliseconds" << std::endl; - std::cout << std::endl; - std::cout << "Full device name: " << deviceName << std::endl; - std::cout << std::endl; + stream << std::setw(25) << std::left << "Total time: " << std::fixed << std::setprecision(3) + << totalTime.count() / 1000.0 << " milliseconds" << std::endl; + stream << std::setw(25) << std::left << "Total CPU time: " << std::fixed << std::setprecision(3) + << totalTimeCpu.count() / 1000.0 << " milliseconds" << std::endl; + stream << std::endl; + stream << "Full device name: " << deviceName << std::endl; + stream << std::endl; std::cout.flags(fmt); } static UNUSED void printPerformanceCounts(ov::InferRequest request, std::ostream& stream, std::string deviceName, - bool bshowHeader = true) { + bool bshowHeader = true, + int precision = 3) { auto performanceMap = request.get_profiling_info(); - printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader); + printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader, precision); } static inline std::string double_to_string(const double number) { @@ -1124,183 +1145,78 @@ static inline void fill_tensor_random(ov::Tensor tensor) { } } -static UNUSED void printPerformanceCountsNoSort(std::vector performanceData, - std::ostream& stream, - std::string deviceName, - bool bshowHeader = true) { - std::chrono::microseconds totalTime = std::chrono::microseconds::zero(); - // Print performance counts - if (bshowHeader) { - stream << std::endl << "performance counts:" << std::endl << std::endl; - } - std::ios::fmtflags fmt(std::cout.flags()); - - for (const auto& it : performanceData) { - if (it.real_time.count() > 0) { - totalTime += it.real_time; - } - } - if (totalTime.count() != 0) { - for (const auto& it : performanceData) { - std::string toPrint(it.node_name); - const int maxLayerName = 30; - - if (it.node_name.length() >= maxLayerName) { - toPrint = it.node_name.substr(0, maxLayerName - 5); - toPrint += "..."; - } - - stream << std::setw(maxLayerName) << std::left << toPrint << " "; - switch (it.status) { - case ov::ProfilingInfo::Status::EXECUTED: - stream << std::setw(15) << std::left << "EXECUTED "; - break; - case ov::ProfilingInfo::Status::NOT_RUN: - stream << std::setw(15) << std::left << "NOT_RUN "; - break; - case ov::ProfilingInfo::Status::OPTIMIZED_OUT: - stream << std::setw(15) << std::left << "OPTIMIZED_OUT "; - break; - } - stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " "; - stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " "; - stream << std::setw(25) << std::left - << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " "; - stream << std::setw(25) << std::left - << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " "; - - double opt_proportion = it.real_time.count() * 100.0 / totalTime.count(); - std::stringstream opt_proportion_ss; - opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion; - std::string opt_proportion_str = opt_proportion_ss.str(); - if (opt_proportion_str == "0.00") { - opt_proportion_str = "N/A"; - } - stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%"; - - stream << std::endl; - } - } - stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0) - << " milliseconds" << std::endl; - std::cout << std::endl; - std::cout << "Full device name: " << deviceName << std::endl; - std::cout << std::endl; - std::cout.flags(fmt); -} - static UNUSED bool sort_pc_descend(const ov::ProfilingInfo& profiling1, const ov::ProfilingInfo& profiling2) { return profiling1.real_time > profiling2.real_time; } -static UNUSED void printPerformanceCountsDescendSort(std::vector performanceData, - std::ostream& stream, - std::string deviceName, - bool bshowHeader = true) { +static UNUSED void printPerformanceCountsSort(std::vector performanceData, + std::ostream& stream, + std::string deviceName, + std::string sorttype, + bool bshowHeader = true, + int precision = 3) { std::chrono::microseconds totalTime = std::chrono::microseconds::zero(); + std::chrono::microseconds totalTimeCpu = std::chrono::microseconds::zero(); + // Print performance counts if (bshowHeader) { - stream << std::endl << "performance counts:" << std::endl << std::endl; + stream << std::endl << "Performance counts:" << std::endl << std::endl; } std::ios::fmtflags fmt(std::cout.flags()); + stream << std::fixed << std::setprecision(precision); for (const auto& it : performanceData) { if (it.real_time.count() > 0) { totalTime += it.real_time; } - } - if (totalTime.count() != 0) { - // sort perfcounter - std::vector sortPerfCounts{std::begin(performanceData), std::end(performanceData)}; - std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend); - - for (const auto& it : sortPerfCounts) { - std::string toPrint(it.node_name); - const int maxLayerName = 30; - - if (it.node_name.length() >= maxLayerName) { - toPrint = it.node_name.substr(0, maxLayerName - 5); - toPrint += "..."; - } - - stream << std::setw(maxLayerName) << std::left << toPrint << " "; - switch (it.status) { - case ov::ProfilingInfo::Status::EXECUTED: - stream << std::setw(15) << std::left << "EXECUTED "; - break; - case ov::ProfilingInfo::Status::NOT_RUN: - stream << std::setw(15) << std::left << "NOT_RUN "; - break; - case ov::ProfilingInfo::Status::OPTIMIZED_OUT: - stream << std::setw(15) << std::left << "OPTIMIZED_OUT "; - break; - } - stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " "; - stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " "; - stream << std::setw(25) << std::left - << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " "; - stream << std::setw(25) << std::left - << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " "; - - double opt_proportion = it.real_time.count() * 100.0 / totalTime.count(); - std::stringstream opt_proportion_ss; - opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion; - std::string opt_proportion_str = opt_proportion_ss.str(); - if (opt_proportion_str == "0.00") { - opt_proportion_str = "N/A"; - } - stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%"; - - stream << std::endl; - } - } - stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0) - << " milliseconds" << std::endl; - std::cout << std::endl; - std::cout << "Full device name: " << deviceName << std::endl; - std::cout << std::endl; - std::cout.flags(fmt); -} - -static UNUSED void printPerformanceCountsSimpleSort(std::vector performanceData, - std::ostream& stream, - std::string deviceName, - bool bshowHeader = true) { - std::chrono::microseconds totalTime = std::chrono::microseconds::zero(); - // Print performance counts - if (bshowHeader) { - stream << std::endl << "performance counts:" << std::endl << std::endl; - } - std::ios::fmtflags fmt(std::cout.flags()); - - for (const auto& it : performanceData) { - if (it.real_time.count() > 0) { - totalTime += it.real_time; + if (it.cpu_time.count() > 0) { + totalTimeCpu += it.cpu_time; } } if (totalTime.count() != 0) { - // sort perfcounter std::vector sortPerfCounts{std::begin(performanceData), std::end(performanceData)}; - std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend); + if (sorttype == pcSort || sorttype == pcSimpleSort) { + std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend); + } for (const auto& it : sortPerfCounts) { - if (it.status == ov::ProfilingInfo::Status::EXECUTED) { + if ((sorttype == pcSimpleSort && it.status == ov::ProfilingInfo::Status::EXECUTED) || + sorttype != pcSimpleSort) { std::string toPrint(it.node_name); - const int maxLayerName = 30; + const int maxPrintLength = 20; - if (it.node_name.length() >= maxLayerName) { - toPrint = it.node_name.substr(0, maxLayerName - 5); + if (it.node_name.length() >= maxPrintLength) { + toPrint = it.node_name.substr(0, maxPrintLength - 5); toPrint += "..."; } - stream << std::setw(maxLayerName) << std::left << toPrint << " "; - stream << std::setw(15) << std::left << "EXECUTED "; - stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " "; - stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " "; - stream << std::setw(25) << std::left - << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " "; - stream << std::setw(25) << std::left - << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " "; + stream << std::setw(maxPrintLength) << std::left << toPrint << " "; + switch (it.status) { + case ov::ProfilingInfo::Status::EXECUTED: + stream << std::setw(21) << std::left << "EXECUTED "; + break; + case ov::ProfilingInfo::Status::NOT_RUN: + stream << std::setw(21) << std::left << "NOT_RUN "; + break; + case ov::ProfilingInfo::Status::OPTIMIZED_OUT: + stream << std::setw(21) << std::left << "OPTIMIZED_OUT "; + break; + } + + stream << "layerType: "; + if (it.node_type.length() >= maxPrintLength) { + stream << std::setw(maxPrintLength) << std::left + << it.node_type.substr(0, maxPrintLength - 3) + "..." + << " "; + } else { + stream << std::setw(maxPrintLength) << std::left << it.node_type << " "; + } + + stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) << " "; + stream << "realTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3) + << it.real_time.count() / 1000.0 << " "; + stream << "cpuTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3) + << it.cpu_time.count() / 1000.0 << " "; double opt_proportion = it.real_time.count() * 100.0 / totalTime.count(); std::stringstream opt_proportion_ss; @@ -1310,29 +1226,14 @@ static UNUSED void printPerformanceCountsSimpleSort(std::vector performanceData, - std::ostream& stream, - std::string deviceName, - std::string sorttype, - bool bshowHeader = true) { - if (sorttype == pcNoSort) { - printPerformanceCountsNoSort(performanceData, stream, deviceName, bshowHeader); - } else if (sorttype == pcSort) { - printPerformanceCountsDescendSort(performanceData, stream, deviceName, bshowHeader); - } else if (sorttype == pcSimpleSort) { - printPerformanceCountsSimpleSort(performanceData, stream, deviceName, bshowHeader); - } -} diff --git a/samples/cpp/common/utils/include/samples/csv_dumper.hpp b/samples/cpp/common/utils/include/samples/csv_dumper.hpp index 5c80134341a..4b0a62195bb 100644 --- a/samples/cpp/common/utils/include/samples/csv_dumper.hpp +++ b/samples/cpp/common/utils/include/samples/csv_dumper.hpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -35,8 +36,9 @@ public: * @brief A constructor. Disables dumping in case dump file cannot be created * @param enabled - True if dumping is enabled by default. * @param name - name of file to dump to. File won't be created if first parameter is false. + * @param precision - floating point numbers' decimal places to print. */ - explicit CsvDumper(bool enabled = true, const std::string& name = "") : canDump(enabled) { + explicit CsvDumper(bool enabled = true, const std::string& name = "", const int precision = 3) : canDump(enabled) { if (!canDump) { return; } @@ -45,6 +47,8 @@ public: if (!file) { slog::warn << "Cannot create dump file! Disabling dump." << slog::endl; canDump = false; + } else { + setPrecision(precision); } } @@ -57,6 +61,18 @@ public: delimiter = c; } + /** + * @brief Sets a precision used to print floating point values + * @param precision - Decimal places to print + * @return + */ + void setPrecision(int precision) { + if (canDump) { + file.precision(precision); + file.setf(std::ios::fixed); + } + } + /** * @brief Overloads operator to organize streaming values to file. Does nothing if dumping is * disabled Adds delimiter at the end of value provided diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/statistics_report.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/statistics_report.py index 1909335266d..6368828d740 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/utils/statistics_report.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/statistics_report.py @@ -65,17 +65,17 @@ class StatisticsReport: f.write(self.csv_separator.join(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)\n'])) for pi in prof_info: f.write(self.csv_separator.join([pi.node_name, str(pi.status), pi.node_type, pi.exec_type, - str((pi.real_time // timedelta(microseconds=1))/1000.0), - str((pi.cpu_time // timedelta(microseconds=1))/1000.0)])) + f"{pi.real_time / timedelta(milliseconds=1):.3f}", + f"{pi.cpu_time / timedelta(milliseconds=1):.3f}"])) f.write('\n') total += pi.real_time total_cpu += pi.cpu_time - f.write(self.csv_separator.join(['Total','','','', - str((total // timedelta(microseconds=1))/1000.0), - str((total_cpu // timedelta(microseconds=1))/1000.0)])) + f.write(self.csv_separator.join(['Total', '', '', '', + f"{total / timedelta(milliseconds=1):.3f}", + f"{total_cpu / timedelta(milliseconds=1):.3f}"])) f.write('\n\n') - def dump_performance_counters_sorted(self,prof_sorted_info): + def dump_performance_counters_sorted(self, prof_sorted_info): """Save sorted performance counters into csv file. """ filename = os.path.join(self.config.report_folder, f'benchmark_sorted_report.csv') @@ -83,16 +83,18 @@ class StatisticsReport: total_cpu = 0 with open(filename, 'w') as f: writer = csv.writer(f) - writer.writerow(['LayerName', 'ExecStatus', 'LayerType', 'ExecType', 'RealTime (ms)', 'CPUTime (ms)' , 'Proportion(%) \n']) + writer.writerow(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)' , 'proportion (%)\n']) for tmp_prof in prof_sorted_info: writer.writerow([tmp_prof[0], str(tmp_prof[1]), tmp_prof[2], tmp_prof[6], - str(tmp_prof[3]), str(tmp_prof[4]), + f"{tmp_prof[3] / 1000:.3f}", # Divide by 1000 + f"{tmp_prof[4] / 1000:.3f}", str("%.2f"%(tmp_prof[5]*100))+"%"]) total += tmp_prof[3] total_cpu += tmp_prof[4] - f.write('\n') - writer.writerow(["Total time cost: RealTime %.2fms , CPUTime %.2fms"%(total,total_cpu)]) + f.write('\n') + writer.writerow(["Total time: %.2f milliseconds"%(total / 1000)]) + writer.writerow(["Total CPU time: %.2f milliseconds"%(total_cpu / 1000)]) f.write('\n\n') logger.info(f'Sorted performance counters report is stored to {filename}') diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py index ac39b17cbd8..bd49ada55ee 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py @@ -74,11 +74,9 @@ def get_element_type(precision): def fuse_mean_scale(preproc: PrePostProcessor, app_inputs_info): # TODO: remove warning after 23.3 release - warned = False; + warned = False warn_msg = 'Mean/scale values are fused into the model. This slows down performance compared to --imean and --iscale which existed before' for input_info in app_inputs_info: - print(input_info) - print(input_info.mean) if input_info.mean.size: if not warned: logger.warning(warn_msg) @@ -399,51 +397,52 @@ def print_perf_counters_sort(perf_counts_list,sort_flag="sort"): total_detail_data = sorted(total_detail_data,key=lambda tmp_data:tmp_data[-4],reverse=True) total_detail_data = [tmp_data for tmp_data in total_detail_data if str(tmp_data[1])!="Status.NOT_RUN"] print_detail_result(total_detail_data) - print(f'Total time: {total_time} microseconds') - print(f'Total CPU time: {total_time_cpu} microseconds') + print(f'Total time: {total_time / 1000:.3f} milliseconds') + print(f'Total CPU time: {total_time_cpu / 1000:.3f} milliseconds') print(f'Total proportion: {"%.2f"%(round(total_real_time_proportion)*100)} % \n') return total_detail_data def print_detail_result(result_list): """ Print_perf_counters_sort result """ - max_layer_name = 30 + max_print_length = 20 for tmp_result in result_list: node_name = tmp_result[0] layerStatus = tmp_result[1] layerType = tmp_result[2] real_time = tmp_result[3] cpu_time = tmp_result[4] - real_proportion = "%.2f"%(tmp_result[5]*100) + real_proportion = "%.2f" % (tmp_result[5] * 100) if real_proportion == "0.00": real_proportion = "N/A" execType = tmp_result[6] - print(f"{node_name[:max_layer_name - 4] + '...' if (len(node_name) >= max_layer_name) else node_name:<30}" - f"{str(layerStatus):<20}" - f"{'layerType: ' + layerType:<30}" - f"{'realTime: ' + str(real_time):<20}" - f"{'cpu: ' + str(cpu_time):<15}" - f"{'proportion: '+ str(real_proportion)+'%':<20}" - f"{'execType: ' + execType:<20}") + print(f"{node_name[:max_print_length - 4] + '...' if (len(node_name) >= max_print_length) else node_name:<20} " + f"{str(layerStatus):<20} " + f"layerType: {layerType[:max_print_length - 4] + '...' if (len(layerType) >= max_print_length) else layerType:<20} " + f"execType: {execType:<20} " + f"realTime (ms): {real_time / timedelta(milliseconds=1):<10.3f} " + f"cpuTime (ms): {cpu_time / timedelta(milliseconds=1):<10.3f}" + f"proportion: {str(real_proportion +'%'):<8}") def print_perf_counters(perf_counts_list): - max_layer_name = 30 + max_print_length = 20 for ni in range(len(perf_counts_list)): perf_counts = perf_counts_list[ni] total_time = timedelta() total_time_cpu = timedelta() logger.info(f"Performance counts for {ni}-th infer request") for pi in perf_counts: - print(f"{pi.node_name[:max_layer_name - 4] + '...' if (len(pi.node_name) >= max_layer_name) else pi.node_name:<30}" - f"{str(pi.status):<15}" - f"{'layerType: ' + pi.node_type:<30}" - f"{'realTime: ' + str((pi.real_time // timedelta(microseconds=1)) / 1000.0):<20}" - f"{'cpu: ' + str((pi.cpu_time // timedelta(microseconds=1)) / 1000.0):<20}" - f"{'execType: ' + pi.exec_type:<20}") + print(f"{pi.node_name[:max_print_length - 4] + '...' if (len(pi.node_name) >= max_print_length) else pi.node_name:<20} " + f"{str(pi.status):<20} " + f"layerType: {pi.node_type[:max_print_length - 4] + '...' if (len(pi.node_type) >= max_print_length) else pi.node_type:<20} " + f"execType: {pi.exec_type:<20} " + f"realTime (ms): {pi.real_time / timedelta(milliseconds=1):<10.3f} " + f"cpuTime (ms): {pi.cpu_time / timedelta(milliseconds=1):<10.3f}") + total_time += pi.real_time total_time_cpu += pi.cpu_time - print(f'Total time: {(total_time // timedelta(microseconds=1)) / 1000.0} seconds') - print(f'Total CPU time: {(total_time_cpu // timedelta(microseconds=1)) / 1000.0} seconds\n') + print(f'Total time: {total_time / timedelta(milliseconds=1)} milliseconds') + print(f'Total CPU time: {total_time_cpu / timedelta(milliseconds=1)} milliseconds\n') def get_command_line_arguments(argv):