Benchmark sort opt (#12350)

* Benchmark_app python and c/c++ script add op profiling feature

* Fix compile bug about benchmark c/c++ op profiling

* Fix Compile Bug

* Fix issue in PR#12350 : 1. report_type info update; 2. priority of -pc and -pcsort; 3.align format between c++ and python; 4.check 'proportion' for 0 and print N/A; 5.detect the confusing print info

* Fix 8/17 review suggestion

* Fix 8/23 suggestion

* Fix the clang-format issue

* Fix the win-cc issue

* Fix win-cc issue about conversion from 'double' to 'float', possible loss of data

* Push PR by ODT group account

Co-authored-by: Chen Peter <peter.chen@intel.com>
Co-authored-by: Fiona Zhao <fiona.zhao@intel.com>
This commit is contained in:
18582088138
2022-10-19 22:59:12 +08:00
committed by GitHub
parent c95b3e5138
commit 152511daa8
9 changed files with 444 additions and 7 deletions

View File

@@ -93,6 +93,7 @@ void StatisticsReport::dump_performance_counters_request(CsvDumper& dumper, cons
total_cpu += layer.cpu_time;
dumper.endLine();
}
dumper << "Total"
<< ""
<< ""
@@ -102,6 +103,55 @@ void StatisticsReport::dump_performance_counters_request(CsvDumper& dumper, cons
dumper.endLine();
}
void StatisticsReport::dump_sort_performance_counters_request(CsvDumper& dumper,
const PerformanceCounters& perfCounts) {
std::chrono::microseconds total = std::chrono::microseconds::zero();
std::chrono::microseconds total_cpu = std::chrono::microseconds::zero();
int layersize = 0;
dumper << "layerName"
<< "execStatus"
<< "layerType"
<< "execType";
dumper << "realTime (ms)"
<< "cpuTime (ms)"
<< " %";
dumper.endLine();
for (const auto& layer : perfCounts) {
if (status_names[(int)layer.status] == "EXECUTED") {
total += layer.real_time;
total_cpu += layer.cpu_time;
}
}
// sort perfcounter
std::vector<ov::ProfilingInfo> profiling{std::begin(perfCounts), std::end(perfCounts)};
std::sort(profiling.begin(), profiling.end(), sort_profiling_descend);
for (const auto& layer : profiling) {
if (status_names[(int)layer.status] == "EXECUTED") {
dumper << layer.node_name; // layer name
dumper << ((int)layer.status < (sizeof(status_names) / sizeof(status_names[0]))
? status_names[(int)layer.status]
: "INVALID_STATUS");
dumper << layer.node_type << layer.exec_type;
dumper << std::to_string(layer.real_time.count() / 1000.0)
<< std::to_string(layer.cpu_time.count() / 1000.0);
dumper << (layer.real_time * 1.0 / total) * 100;
dumper.endLine();
layersize += 1;
}
}
dumper << "Total"
<< ""
<< ""
<< "";
dumper << total.count() / 1000.0 << total_cpu.count() / 1000.0 << 100.0;
dumper.endLine();
dumper.endLine();
}
StatisticsReport::PerformanceCounters StatisticsReport::get_average_performance_counters(
const std::vector<PerformanceCounters>& perfCounts) {
StatisticsReport::PerformanceCounters performanceCountersAvg;
@@ -148,6 +198,10 @@ void StatisticsReport::dump_performance_counters(const std::vector<PerformanceCo
}
} else if (_config.report_type == averageCntReport) {
dump_performance_counters_request(dumper, get_average_performance_counters(perfCounts));
} else if (_config.report_type == sortDetailedCntReport) {
for (auto& pc : perfCounts) {
dump_sort_performance_counters_request(dumper, pc);
}
} else {
throw std::logic_error("PM data can only be collected for average or detailed report types");
}
@@ -206,6 +260,10 @@ void StatisticsReportJSON::dump_performance_counters(const std::vector<Performan
} else if (_config.report_type == averageCntReport) {
js["report_type"] = "average";
js["avg_performance"] = perf_counters_to_json(get_average_performance_counters(perfCounts));
} else if (_config.report_type == sortDetailedCntReport) {
for (auto& pc : perfCounts) {
js["detailed_performance"].push_back(sort_perf_counters_to_json(pc));
}
} else {
throw std::logic_error("PM data can only be collected for average or detailed report types");
}
@@ -242,6 +300,42 @@ const nlohmann::json StatisticsReportJSON::perf_counters_to_json(
return js;
}
const nlohmann::json StatisticsReportJSON::sort_perf_counters_to_json(
const StatisticsReport::PerformanceCounters& perfCounts) {
std::chrono::microseconds total = std::chrono::microseconds::zero();
std::chrono::microseconds total_cpu = std::chrono::microseconds::zero();
nlohmann::json js;
js["nodes"] = nlohmann::json::array();
for (const auto& layer : perfCounts) {
total += layer.real_time;
total_cpu += layer.cpu_time;
}
// sort perfcounter
std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(perfCounts), std::end(perfCounts)};
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_profiling_descend);
for (const auto& layer : sortPerfCounts) {
nlohmann::json item;
item["name"] = layer.node_name; // layer name
item["status"] =
((int)layer.status < (sizeof(status_names) / sizeof(status_names[0])) ? status_names[(int)layer.status]
: "INVALID_STATUS");
item["node_type"] = layer.node_type;
item["exec_type"] = layer.exec_type;
item["real_time"] = layer.real_time.count() / 1000.0;
item["cpu_time"] = layer.cpu_time.count() / 1000.0;
item["%"] = std::round(layer.real_time * 10000.0 / total) / 100;
js["nodes"].push_back(item);
}
js["total_real_time"] = total.count() / 1000.0;
js["total_cpu_time"] = total_cpu.count() / 1000.0;
return js;
}
void LatencyMetrics::write_to_stream(std::ostream& stream) const {
std::ios::fmtflags fmt(std::cout.flags());
stream << data_shape << ";" << std::fixed << std::setprecision(2) << median_or_percentile << ";" << avg << ";"