[BENCHMARK_APP] Align pc reports, revert format print to previous, excel friendly (#14644)
* [Python] Align pc output in Python benchmark, update sorted output to new headers, print floats with 6-digit precision, fix date format printing to excel-friendly * [PYTHON/C++] Change precision of raports and pc counters report, simplify code * [C++] Align C++ benchamrk_app report to Python's by setting the precision value to 3 * [C++] Fix CsvDumper precision * [C++/PYTHON] Align pcand pcsort outputs, merge C++ printSort methods into one * Update samples/cpp/common/utils/include/samples/common.hpp Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com> * Update samples/cpp/common/utils/include/samples/common.hpp Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com> Co-authored-by: Michal Lukaszewski <michal.lukaszewski@intel.com> Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>
This commit is contained in:
parent
c7a1468fb7
commit
f4ff1cbdf3
@ -22,7 +22,7 @@ void StatisticsReport::add_parameters(const Category& category, const Parameters
|
|||||||
}
|
}
|
||||||
|
|
||||||
void StatisticsReport::dump() {
|
void StatisticsReport::dump() {
|
||||||
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv");
|
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv", 3);
|
||||||
|
|
||||||
auto dump_parameters = [&dumper](const Parameters& parameters) {
|
auto dump_parameters = [&dumper](const Parameters& parameters) {
|
||||||
for (auto& parameter : parameters) {
|
for (auto& parameter : parameters) {
|
||||||
@ -88,7 +88,7 @@ void StatisticsReport::dump_performance_counters_request(CsvDumper& dumper, cons
|
|||||||
? status_names[(int)layer.status]
|
? status_names[(int)layer.status]
|
||||||
: "INVALID_STATUS");
|
: "INVALID_STATUS");
|
||||||
dumper << layer.node_type << layer.exec_type;
|
dumper << layer.node_type << layer.exec_type;
|
||||||
dumper << std::to_string(layer.real_time.count() / 1000.0) << std::to_string(layer.cpu_time.count() / 1000.0);
|
dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0;
|
||||||
total += layer.real_time;
|
total += layer.real_time;
|
||||||
total_cpu += layer.cpu_time;
|
total_cpu += layer.cpu_time;
|
||||||
dumper.endLine();
|
dumper.endLine();
|
||||||
@ -135,8 +135,7 @@ void StatisticsReport::dump_sort_performance_counters_request(CsvDumper& dumper,
|
|||||||
? status_names[(int)layer.status]
|
? status_names[(int)layer.status]
|
||||||
: "INVALID_STATUS");
|
: "INVALID_STATUS");
|
||||||
dumper << layer.node_type << layer.exec_type;
|
dumper << layer.node_type << layer.exec_type;
|
||||||
dumper << std::to_string(layer.real_time.count() / 1000.0)
|
dumper << layer.real_time.count() / 1000.0 << layer.cpu_time.count() / 1000.0;
|
||||||
<< std::to_string(layer.cpu_time.count() / 1000.0);
|
|
||||||
dumper << (layer.real_time * 1.0 / total) * 100;
|
dumper << (layer.real_time * 1.0 / total) * 100;
|
||||||
dumper.endLine();
|
dumper.endLine();
|
||||||
layersize += 1;
|
layersize += 1;
|
||||||
@ -191,7 +190,7 @@ void StatisticsReport::dump_performance_counters(const std::vector<PerformanceCo
|
|||||||
slog::info << "Performance counters are empty. No reports are dumped." << slog::endl;
|
slog::info << "Performance counters are empty. No reports are dumped." << slog::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_" + _config.report_type + "_report.csv");
|
CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_" + _config.report_type + "_report.csv", 3);
|
||||||
if (_config.report_type == detailedCntReport) {
|
if (_config.report_type == detailedCntReport) {
|
||||||
for (auto& pc : perfCounts) {
|
for (auto& pc : perfCounts) {
|
||||||
dump_performance_counters_request(dumper, pc);
|
dump_performance_counters_request(dumper, pc);
|
||||||
|
@ -1001,57 +1001,78 @@ inline std::string getFullDeviceName(ov::Core& core, std::string device) {
|
|||||||
static UNUSED void printPerformanceCounts(std::vector<ov::ProfilingInfo> performanceData,
|
static UNUSED void printPerformanceCounts(std::vector<ov::ProfilingInfo> performanceData,
|
||||||
std::ostream& stream,
|
std::ostream& stream,
|
||||||
std::string deviceName,
|
std::string deviceName,
|
||||||
bool bshowHeader = true) {
|
bool bshowHeader = true,
|
||||||
|
int precision = 3) {
|
||||||
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
|
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
|
||||||
|
std::chrono::microseconds totalTimeCpu = std::chrono::microseconds::zero();
|
||||||
// Print performance counts
|
// Print performance counts
|
||||||
if (bshowHeader) {
|
if (bshowHeader) {
|
||||||
stream << std::endl << "performance counts:" << std::endl << std::endl;
|
stream << std::endl << "Performance counts:" << std::endl << std::endl;
|
||||||
}
|
}
|
||||||
std::ios::fmtflags fmt(std::cout.flags());
|
std::ios::fmtflags fmt(std::cout.flags());
|
||||||
|
stream << std::fixed << std::setprecision(precision);
|
||||||
|
|
||||||
for (const auto& it : performanceData) {
|
for (const auto& it : performanceData) {
|
||||||
std::string toPrint(it.node_name);
|
|
||||||
const int maxLayerName = 30;
|
|
||||||
|
|
||||||
if (it.node_name.length() >= maxLayerName) {
|
|
||||||
toPrint = it.node_name.substr(0, maxLayerName - 5);
|
|
||||||
toPrint += "...";
|
|
||||||
}
|
|
||||||
|
|
||||||
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
|
|
||||||
switch (it.status) {
|
|
||||||
case ov::ProfilingInfo::Status::EXECUTED:
|
|
||||||
stream << std::setw(15) << std::left << "EXECUTED ";
|
|
||||||
break;
|
|
||||||
case ov::ProfilingInfo::Status::NOT_RUN:
|
|
||||||
stream << std::setw(15) << std::left << "NOT_RUN ";
|
|
||||||
break;
|
|
||||||
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
|
|
||||||
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
|
|
||||||
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
|
|
||||||
stream << std::setw(25) << std::left << "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
|
|
||||||
stream << std::setw(25) << std::left << "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
|
|
||||||
stream << std::endl;
|
|
||||||
if (it.real_time.count() > 0) {
|
if (it.real_time.count() > 0) {
|
||||||
totalTime += it.real_time;
|
totalTime += it.real_time;
|
||||||
}
|
}
|
||||||
|
if (it.cpu_time.count() > 0) {
|
||||||
|
totalTimeCpu += it.cpu_time;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string toPrint(it.node_name);
|
||||||
|
const int maxPrintLength = 20;
|
||||||
|
|
||||||
|
if (it.node_name.length() >= maxPrintLength) {
|
||||||
|
toPrint = it.node_name.substr(0, maxPrintLength - 5);
|
||||||
|
toPrint += "...";
|
||||||
|
}
|
||||||
|
|
||||||
|
stream << std::setw(maxPrintLength) << std::left << toPrint << " ";
|
||||||
|
switch (it.status) {
|
||||||
|
case ov::ProfilingInfo::Status::EXECUTED:
|
||||||
|
stream << std::setw(21) << std::left << "EXECUTED ";
|
||||||
|
break;
|
||||||
|
case ov::ProfilingInfo::Status::NOT_RUN:
|
||||||
|
stream << std::setw(21) << std::left << "NOT_RUN ";
|
||||||
|
break;
|
||||||
|
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
|
||||||
|
stream << std::setw(21) << std::left << "OPTIMIZED_OUT ";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
stream << "layerType: ";
|
||||||
|
if (it.node_type.length() >= maxPrintLength) {
|
||||||
|
stream << std::setw(maxPrintLength) << std::left << it.node_type.substr(0, maxPrintLength - 3) + "..."
|
||||||
|
<< " ";
|
||||||
|
} else {
|
||||||
|
stream << std::setw(maxPrintLength) << std::left << it.node_type << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) << " ";
|
||||||
|
stream << "realTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
|
||||||
|
<< it.real_time.count() / 1000.0 << " ";
|
||||||
|
stream << "cpuTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
|
||||||
|
<< it.cpu_time.count() / 1000.0 << " ";
|
||||||
|
stream << std::endl;
|
||||||
}
|
}
|
||||||
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
|
stream << std::setw(25) << std::left << "Total time: " << std::fixed << std::setprecision(3)
|
||||||
<< " milliseconds" << std::endl;
|
<< totalTime.count() / 1000.0 << " milliseconds" << std::endl;
|
||||||
std::cout << std::endl;
|
stream << std::setw(25) << std::left << "Total CPU time: " << std::fixed << std::setprecision(3)
|
||||||
std::cout << "Full device name: " << deviceName << std::endl;
|
<< totalTimeCpu.count() / 1000.0 << " milliseconds" << std::endl;
|
||||||
std::cout << std::endl;
|
stream << std::endl;
|
||||||
|
stream << "Full device name: " << deviceName << std::endl;
|
||||||
|
stream << std::endl;
|
||||||
std::cout.flags(fmt);
|
std::cout.flags(fmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
static UNUSED void printPerformanceCounts(ov::InferRequest request,
|
static UNUSED void printPerformanceCounts(ov::InferRequest request,
|
||||||
std::ostream& stream,
|
std::ostream& stream,
|
||||||
std::string deviceName,
|
std::string deviceName,
|
||||||
bool bshowHeader = true) {
|
bool bshowHeader = true,
|
||||||
|
int precision = 3) {
|
||||||
auto performanceMap = request.get_profiling_info();
|
auto performanceMap = request.get_profiling_info();
|
||||||
printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader);
|
printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader, precision);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline std::string double_to_string(const double number) {
|
static inline std::string double_to_string(const double number) {
|
||||||
@ -1124,183 +1145,78 @@ static inline void fill_tensor_random(ov::Tensor tensor) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static UNUSED void printPerformanceCountsNoSort(std::vector<ov::ProfilingInfo> performanceData,
|
|
||||||
std::ostream& stream,
|
|
||||||
std::string deviceName,
|
|
||||||
bool bshowHeader = true) {
|
|
||||||
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
|
|
||||||
// Print performance counts
|
|
||||||
if (bshowHeader) {
|
|
||||||
stream << std::endl << "performance counts:" << std::endl << std::endl;
|
|
||||||
}
|
|
||||||
std::ios::fmtflags fmt(std::cout.flags());
|
|
||||||
|
|
||||||
for (const auto& it : performanceData) {
|
|
||||||
if (it.real_time.count() > 0) {
|
|
||||||
totalTime += it.real_time;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (totalTime.count() != 0) {
|
|
||||||
for (const auto& it : performanceData) {
|
|
||||||
std::string toPrint(it.node_name);
|
|
||||||
const int maxLayerName = 30;
|
|
||||||
|
|
||||||
if (it.node_name.length() >= maxLayerName) {
|
|
||||||
toPrint = it.node_name.substr(0, maxLayerName - 5);
|
|
||||||
toPrint += "...";
|
|
||||||
}
|
|
||||||
|
|
||||||
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
|
|
||||||
switch (it.status) {
|
|
||||||
case ov::ProfilingInfo::Status::EXECUTED:
|
|
||||||
stream << std::setw(15) << std::left << "EXECUTED ";
|
|
||||||
break;
|
|
||||||
case ov::ProfilingInfo::Status::NOT_RUN:
|
|
||||||
stream << std::setw(15) << std::left << "NOT_RUN ";
|
|
||||||
break;
|
|
||||||
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
|
|
||||||
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
|
|
||||||
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
|
|
||||||
stream << std::setw(25) << std::left
|
|
||||||
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
|
|
||||||
stream << std::setw(25) << std::left
|
|
||||||
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
|
|
||||||
|
|
||||||
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
|
|
||||||
std::stringstream opt_proportion_ss;
|
|
||||||
opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
|
|
||||||
std::string opt_proportion_str = opt_proportion_ss.str();
|
|
||||||
if (opt_proportion_str == "0.00") {
|
|
||||||
opt_proportion_str = "N/A";
|
|
||||||
}
|
|
||||||
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
|
|
||||||
|
|
||||||
stream << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
|
|
||||||
<< " milliseconds" << std::endl;
|
|
||||||
std::cout << std::endl;
|
|
||||||
std::cout << "Full device name: " << deviceName << std::endl;
|
|
||||||
std::cout << std::endl;
|
|
||||||
std::cout.flags(fmt);
|
|
||||||
}
|
|
||||||
|
|
||||||
static UNUSED bool sort_pc_descend(const ov::ProfilingInfo& profiling1, const ov::ProfilingInfo& profiling2) {
|
static UNUSED bool sort_pc_descend(const ov::ProfilingInfo& profiling1, const ov::ProfilingInfo& profiling2) {
|
||||||
return profiling1.real_time > profiling2.real_time;
|
return profiling1.real_time > profiling2.real_time;
|
||||||
}
|
}
|
||||||
|
|
||||||
static UNUSED void printPerformanceCountsDescendSort(std::vector<ov::ProfilingInfo> performanceData,
|
static UNUSED void printPerformanceCountsSort(std::vector<ov::ProfilingInfo> performanceData,
|
||||||
std::ostream& stream,
|
std::ostream& stream,
|
||||||
std::string deviceName,
|
std::string deviceName,
|
||||||
bool bshowHeader = true) {
|
std::string sorttype,
|
||||||
|
bool bshowHeader = true,
|
||||||
|
int precision = 3) {
|
||||||
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
|
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
|
||||||
|
std::chrono::microseconds totalTimeCpu = std::chrono::microseconds::zero();
|
||||||
|
|
||||||
// Print performance counts
|
// Print performance counts
|
||||||
if (bshowHeader) {
|
if (bshowHeader) {
|
||||||
stream << std::endl << "performance counts:" << std::endl << std::endl;
|
stream << std::endl << "Performance counts:" << std::endl << std::endl;
|
||||||
}
|
}
|
||||||
std::ios::fmtflags fmt(std::cout.flags());
|
std::ios::fmtflags fmt(std::cout.flags());
|
||||||
|
stream << std::fixed << std::setprecision(precision);
|
||||||
|
|
||||||
for (const auto& it : performanceData) {
|
for (const auto& it : performanceData) {
|
||||||
if (it.real_time.count() > 0) {
|
if (it.real_time.count() > 0) {
|
||||||
totalTime += it.real_time;
|
totalTime += it.real_time;
|
||||||
}
|
}
|
||||||
}
|
if (it.cpu_time.count() > 0) {
|
||||||
if (totalTime.count() != 0) {
|
totalTimeCpu += it.cpu_time;
|
||||||
// sort perfcounter
|
|
||||||
std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
|
|
||||||
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
|
|
||||||
|
|
||||||
for (const auto& it : sortPerfCounts) {
|
|
||||||
std::string toPrint(it.node_name);
|
|
||||||
const int maxLayerName = 30;
|
|
||||||
|
|
||||||
if (it.node_name.length() >= maxLayerName) {
|
|
||||||
toPrint = it.node_name.substr(0, maxLayerName - 5);
|
|
||||||
toPrint += "...";
|
|
||||||
}
|
|
||||||
|
|
||||||
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
|
|
||||||
switch (it.status) {
|
|
||||||
case ov::ProfilingInfo::Status::EXECUTED:
|
|
||||||
stream << std::setw(15) << std::left << "EXECUTED ";
|
|
||||||
break;
|
|
||||||
case ov::ProfilingInfo::Status::NOT_RUN:
|
|
||||||
stream << std::setw(15) << std::left << "NOT_RUN ";
|
|
||||||
break;
|
|
||||||
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
|
|
||||||
stream << std::setw(15) << std::left << "OPTIMIZED_OUT ";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
|
|
||||||
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
|
|
||||||
stream << std::setw(25) << std::left
|
|
||||||
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
|
|
||||||
stream << std::setw(25) << std::left
|
|
||||||
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
|
|
||||||
|
|
||||||
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
|
|
||||||
std::stringstream opt_proportion_ss;
|
|
||||||
opt_proportion_ss << std::fixed << std::setprecision(2) << opt_proportion;
|
|
||||||
std::string opt_proportion_str = opt_proportion_ss.str();
|
|
||||||
if (opt_proportion_str == "0.00") {
|
|
||||||
opt_proportion_str = "N/A";
|
|
||||||
}
|
|
||||||
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
|
|
||||||
|
|
||||||
stream << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
|
|
||||||
<< " milliseconds" << std::endl;
|
|
||||||
std::cout << std::endl;
|
|
||||||
std::cout << "Full device name: " << deviceName << std::endl;
|
|
||||||
std::cout << std::endl;
|
|
||||||
std::cout.flags(fmt);
|
|
||||||
}
|
|
||||||
|
|
||||||
static UNUSED void printPerformanceCountsSimpleSort(std::vector<ov::ProfilingInfo> performanceData,
|
|
||||||
std::ostream& stream,
|
|
||||||
std::string deviceName,
|
|
||||||
bool bshowHeader = true) {
|
|
||||||
std::chrono::microseconds totalTime = std::chrono::microseconds::zero();
|
|
||||||
// Print performance counts
|
|
||||||
if (bshowHeader) {
|
|
||||||
stream << std::endl << "performance counts:" << std::endl << std::endl;
|
|
||||||
}
|
|
||||||
std::ios::fmtflags fmt(std::cout.flags());
|
|
||||||
|
|
||||||
for (const auto& it : performanceData) {
|
|
||||||
if (it.real_time.count() > 0) {
|
|
||||||
totalTime += it.real_time;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (totalTime.count() != 0) {
|
if (totalTime.count() != 0) {
|
||||||
// sort perfcounter
|
|
||||||
std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
|
std::vector<ov::ProfilingInfo> sortPerfCounts{std::begin(performanceData), std::end(performanceData)};
|
||||||
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
|
if (sorttype == pcSort || sorttype == pcSimpleSort) {
|
||||||
|
std::sort(sortPerfCounts.begin(), sortPerfCounts.end(), sort_pc_descend);
|
||||||
|
}
|
||||||
|
|
||||||
for (const auto& it : sortPerfCounts) {
|
for (const auto& it : sortPerfCounts) {
|
||||||
if (it.status == ov::ProfilingInfo::Status::EXECUTED) {
|
if ((sorttype == pcSimpleSort && it.status == ov::ProfilingInfo::Status::EXECUTED) ||
|
||||||
|
sorttype != pcSimpleSort) {
|
||||||
std::string toPrint(it.node_name);
|
std::string toPrint(it.node_name);
|
||||||
const int maxLayerName = 30;
|
const int maxPrintLength = 20;
|
||||||
|
|
||||||
if (it.node_name.length() >= maxLayerName) {
|
if (it.node_name.length() >= maxPrintLength) {
|
||||||
toPrint = it.node_name.substr(0, maxLayerName - 5);
|
toPrint = it.node_name.substr(0, maxPrintLength - 5);
|
||||||
toPrint += "...";
|
toPrint += "...";
|
||||||
}
|
}
|
||||||
|
|
||||||
stream << std::setw(maxLayerName) << std::left << toPrint << " ";
|
stream << std::setw(maxPrintLength) << std::left << toPrint << " ";
|
||||||
stream << std::setw(15) << std::left << "EXECUTED ";
|
switch (it.status) {
|
||||||
stream << std::setw(30) << std::left << "layerType: " + std::string(it.node_type) + " ";
|
case ov::ProfilingInfo::Status::EXECUTED:
|
||||||
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) + " ";
|
stream << std::setw(21) << std::left << "EXECUTED ";
|
||||||
stream << std::setw(25) << std::left
|
break;
|
||||||
<< "realTime (ms): " + std::to_string(it.real_time.count() / 1000.0) + " ";
|
case ov::ProfilingInfo::Status::NOT_RUN:
|
||||||
stream << std::setw(25) << std::left
|
stream << std::setw(21) << std::left << "NOT_RUN ";
|
||||||
<< "cpuTime (ms): " + std::to_string(it.cpu_time.count() / 1000.0) + " ";
|
break;
|
||||||
|
case ov::ProfilingInfo::Status::OPTIMIZED_OUT:
|
||||||
|
stream << std::setw(21) << std::left << "OPTIMIZED_OUT ";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
stream << "layerType: ";
|
||||||
|
if (it.node_type.length() >= maxPrintLength) {
|
||||||
|
stream << std::setw(maxPrintLength) << std::left
|
||||||
|
<< it.node_type.substr(0, maxPrintLength - 3) + "..."
|
||||||
|
<< " ";
|
||||||
|
} else {
|
||||||
|
stream << std::setw(maxPrintLength) << std::left << it.node_type << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
stream << std::setw(30) << std::left << "execType: " + std::string(it.exec_type) << " ";
|
||||||
|
stream << "realTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
|
||||||
|
<< it.real_time.count() / 1000.0 << " ";
|
||||||
|
stream << "cpuTime (ms): " << std::setw(10) << std::left << std::fixed << std::setprecision(3)
|
||||||
|
<< it.cpu_time.count() / 1000.0 << " ";
|
||||||
|
|
||||||
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
|
double opt_proportion = it.real_time.count() * 100.0 / totalTime.count();
|
||||||
std::stringstream opt_proportion_ss;
|
std::stringstream opt_proportion_ss;
|
||||||
@ -1310,29 +1226,14 @@ static UNUSED void printPerformanceCountsSimpleSort(std::vector<ov::ProfilingInf
|
|||||||
opt_proportion_str = "N/A";
|
opt_proportion_str = "N/A";
|
||||||
}
|
}
|
||||||
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
|
stream << std::setw(20) << std::left << "proportion: " + opt_proportion_str + "%";
|
||||||
|
|
||||||
stream << std::endl;
|
stream << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stream << std::setw(25) << std::left << "Total time: " + std::to_string(totalTime.count() / 1000.0)
|
stream << std::setw(25) << std::left << "Total time: " << totalTime.count() / 1000.0 << " milliseconds"
|
||||||
<< " milliseconds" << std::endl;
|
<< std::endl;
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
std::cout << "Full device name: " << deviceName << std::endl;
|
std::cout << "Full device name: " << deviceName << std::endl;
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
std::cout.flags(fmt);
|
std::cout.flags(fmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
static UNUSED void printPerformanceCountsSort(std::vector<ov::ProfilingInfo> performanceData,
|
|
||||||
std::ostream& stream,
|
|
||||||
std::string deviceName,
|
|
||||||
std::string sorttype,
|
|
||||||
bool bshowHeader = true) {
|
|
||||||
if (sorttype == pcNoSort) {
|
|
||||||
printPerformanceCountsNoSort(performanceData, stream, deviceName, bshowHeader);
|
|
||||||
} else if (sorttype == pcSort) {
|
|
||||||
printPerformanceCountsDescendSort(performanceData, stream, deviceName, bshowHeader);
|
|
||||||
} else if (sorttype == pcSimpleSort) {
|
|
||||||
printPerformanceCountsSimpleSort(performanceData, stream, deviceName, bshowHeader);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
#include <iosfwd>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -35,8 +36,9 @@ public:
|
|||||||
* @brief A constructor. Disables dumping in case dump file cannot be created
|
* @brief A constructor. Disables dumping in case dump file cannot be created
|
||||||
* @param enabled - True if dumping is enabled by default.
|
* @param enabled - True if dumping is enabled by default.
|
||||||
* @param name - name of file to dump to. File won't be created if first parameter is false.
|
* @param name - name of file to dump to. File won't be created if first parameter is false.
|
||||||
|
* @param precision - floating point numbers' decimal places to print.
|
||||||
*/
|
*/
|
||||||
explicit CsvDumper(bool enabled = true, const std::string& name = "") : canDump(enabled) {
|
explicit CsvDumper(bool enabled = true, const std::string& name = "", const int precision = 3) : canDump(enabled) {
|
||||||
if (!canDump) {
|
if (!canDump) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -45,6 +47,8 @@ public:
|
|||||||
if (!file) {
|
if (!file) {
|
||||||
slog::warn << "Cannot create dump file! Disabling dump." << slog::endl;
|
slog::warn << "Cannot create dump file! Disabling dump." << slog::endl;
|
||||||
canDump = false;
|
canDump = false;
|
||||||
|
} else {
|
||||||
|
setPrecision(precision);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -57,6 +61,18 @@ public:
|
|||||||
delimiter = c;
|
delimiter = c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Sets a precision used to print floating point values
|
||||||
|
* @param precision - Decimal places to print
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
void setPrecision(int precision) {
|
||||||
|
if (canDump) {
|
||||||
|
file.precision(precision);
|
||||||
|
file.setf(std::ios::fixed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Overloads operator to organize streaming values to file. Does nothing if dumping is
|
* @brief Overloads operator to organize streaming values to file. Does nothing if dumping is
|
||||||
* disabled Adds delimiter at the end of value provided
|
* disabled Adds delimiter at the end of value provided
|
||||||
|
@ -65,17 +65,17 @@ class StatisticsReport:
|
|||||||
f.write(self.csv_separator.join(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)\n']))
|
f.write(self.csv_separator.join(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)\n']))
|
||||||
for pi in prof_info:
|
for pi in prof_info:
|
||||||
f.write(self.csv_separator.join([pi.node_name, str(pi.status), pi.node_type, pi.exec_type,
|
f.write(self.csv_separator.join([pi.node_name, str(pi.status), pi.node_type, pi.exec_type,
|
||||||
str((pi.real_time // timedelta(microseconds=1))/1000.0),
|
f"{pi.real_time / timedelta(milliseconds=1):.3f}",
|
||||||
str((pi.cpu_time // timedelta(microseconds=1))/1000.0)]))
|
f"{pi.cpu_time / timedelta(milliseconds=1):.3f}"]))
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
total += pi.real_time
|
total += pi.real_time
|
||||||
total_cpu += pi.cpu_time
|
total_cpu += pi.cpu_time
|
||||||
f.write(self.csv_separator.join(['Total','','','',
|
f.write(self.csv_separator.join(['Total', '', '', '',
|
||||||
str((total // timedelta(microseconds=1))/1000.0),
|
f"{total / timedelta(milliseconds=1):.3f}",
|
||||||
str((total_cpu // timedelta(microseconds=1))/1000.0)]))
|
f"{total_cpu / timedelta(milliseconds=1):.3f}"]))
|
||||||
f.write('\n\n')
|
f.write('\n\n')
|
||||||
|
|
||||||
def dump_performance_counters_sorted(self,prof_sorted_info):
|
def dump_performance_counters_sorted(self, prof_sorted_info):
|
||||||
"""Save sorted performance counters into csv file.
|
"""Save sorted performance counters into csv file.
|
||||||
"""
|
"""
|
||||||
filename = os.path.join(self.config.report_folder, f'benchmark_sorted_report.csv')
|
filename = os.path.join(self.config.report_folder, f'benchmark_sorted_report.csv')
|
||||||
@ -83,16 +83,18 @@ class StatisticsReport:
|
|||||||
total_cpu = 0
|
total_cpu = 0
|
||||||
with open(filename, 'w') as f:
|
with open(filename, 'w') as f:
|
||||||
writer = csv.writer(f)
|
writer = csv.writer(f)
|
||||||
writer.writerow(['LayerName', 'ExecStatus', 'LayerType', 'ExecType', 'RealTime (ms)', 'CPUTime (ms)' , 'Proportion(%) \n'])
|
writer.writerow(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)' , 'proportion (%)\n'])
|
||||||
for tmp_prof in prof_sorted_info:
|
for tmp_prof in prof_sorted_info:
|
||||||
writer.writerow([tmp_prof[0], str(tmp_prof[1]),
|
writer.writerow([tmp_prof[0], str(tmp_prof[1]),
|
||||||
tmp_prof[2], tmp_prof[6],
|
tmp_prof[2], tmp_prof[6],
|
||||||
str(tmp_prof[3]), str(tmp_prof[4]),
|
f"{tmp_prof[3] / 1000:.3f}", # Divide by 1000
|
||||||
|
f"{tmp_prof[4] / 1000:.3f}",
|
||||||
str("%.2f"%(tmp_prof[5]*100))+"%"])
|
str("%.2f"%(tmp_prof[5]*100))+"%"])
|
||||||
total += tmp_prof[3]
|
total += tmp_prof[3]
|
||||||
total_cpu += tmp_prof[4]
|
total_cpu += tmp_prof[4]
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
writer.writerow(["Total time cost: RealTime %.2fms , CPUTime %.2fms"%(total,total_cpu)])
|
writer.writerow(["Total time: %.2f milliseconds"%(total / 1000)])
|
||||||
|
writer.writerow(["Total CPU time: %.2f milliseconds"%(total_cpu / 1000)])
|
||||||
f.write('\n\n')
|
f.write('\n\n')
|
||||||
logger.info(f'Sorted performance counters report is stored to {filename}')
|
logger.info(f'Sorted performance counters report is stored to {filename}')
|
||||||
|
|
||||||
|
@ -74,11 +74,9 @@ def get_element_type(precision):
|
|||||||
|
|
||||||
def fuse_mean_scale(preproc: PrePostProcessor, app_inputs_info):
|
def fuse_mean_scale(preproc: PrePostProcessor, app_inputs_info):
|
||||||
# TODO: remove warning after 23.3 release
|
# TODO: remove warning after 23.3 release
|
||||||
warned = False;
|
warned = False
|
||||||
warn_msg = 'Mean/scale values are fused into the model. This slows down performance compared to --imean and --iscale which existed before'
|
warn_msg = 'Mean/scale values are fused into the model. This slows down performance compared to --imean and --iscale which existed before'
|
||||||
for input_info in app_inputs_info:
|
for input_info in app_inputs_info:
|
||||||
print(input_info)
|
|
||||||
print(input_info.mean)
|
|
||||||
if input_info.mean.size:
|
if input_info.mean.size:
|
||||||
if not warned:
|
if not warned:
|
||||||
logger.warning(warn_msg)
|
logger.warning(warn_msg)
|
||||||
@ -399,51 +397,52 @@ def print_perf_counters_sort(perf_counts_list,sort_flag="sort"):
|
|||||||
total_detail_data = sorted(total_detail_data,key=lambda tmp_data:tmp_data[-4],reverse=True)
|
total_detail_data = sorted(total_detail_data,key=lambda tmp_data:tmp_data[-4],reverse=True)
|
||||||
total_detail_data = [tmp_data for tmp_data in total_detail_data if str(tmp_data[1])!="Status.NOT_RUN"]
|
total_detail_data = [tmp_data for tmp_data in total_detail_data if str(tmp_data[1])!="Status.NOT_RUN"]
|
||||||
print_detail_result(total_detail_data)
|
print_detail_result(total_detail_data)
|
||||||
print(f'Total time: {total_time} microseconds')
|
print(f'Total time: {total_time / 1000:.3f} milliseconds')
|
||||||
print(f'Total CPU time: {total_time_cpu} microseconds')
|
print(f'Total CPU time: {total_time_cpu / 1000:.3f} milliseconds')
|
||||||
print(f'Total proportion: {"%.2f"%(round(total_real_time_proportion)*100)} % \n')
|
print(f'Total proportion: {"%.2f"%(round(total_real_time_proportion)*100)} % \n')
|
||||||
return total_detail_data
|
return total_detail_data
|
||||||
|
|
||||||
def print_detail_result(result_list):
|
def print_detail_result(result_list):
|
||||||
""" Print_perf_counters_sort result
|
""" Print_perf_counters_sort result
|
||||||
"""
|
"""
|
||||||
max_layer_name = 30
|
max_print_length = 20
|
||||||
for tmp_result in result_list:
|
for tmp_result in result_list:
|
||||||
node_name = tmp_result[0]
|
node_name = tmp_result[0]
|
||||||
layerStatus = tmp_result[1]
|
layerStatus = tmp_result[1]
|
||||||
layerType = tmp_result[2]
|
layerType = tmp_result[2]
|
||||||
real_time = tmp_result[3]
|
real_time = tmp_result[3]
|
||||||
cpu_time = tmp_result[4]
|
cpu_time = tmp_result[4]
|
||||||
real_proportion = "%.2f"%(tmp_result[5]*100)
|
real_proportion = "%.2f" % (tmp_result[5] * 100)
|
||||||
if real_proportion == "0.00":
|
if real_proportion == "0.00":
|
||||||
real_proportion = "N/A"
|
real_proportion = "N/A"
|
||||||
execType = tmp_result[6]
|
execType = tmp_result[6]
|
||||||
print(f"{node_name[:max_layer_name - 4] + '...' if (len(node_name) >= max_layer_name) else node_name:<30}"
|
print(f"{node_name[:max_print_length - 4] + '...' if (len(node_name) >= max_print_length) else node_name:<20} "
|
||||||
f"{str(layerStatus):<20}"
|
f"{str(layerStatus):<20} "
|
||||||
f"{'layerType: ' + layerType:<30}"
|
f"layerType: {layerType[:max_print_length - 4] + '...' if (len(layerType) >= max_print_length) else layerType:<20} "
|
||||||
f"{'realTime: ' + str(real_time):<20}"
|
f"execType: {execType:<20} "
|
||||||
f"{'cpu: ' + str(cpu_time):<15}"
|
f"realTime (ms): {real_time / timedelta(milliseconds=1):<10.3f} "
|
||||||
f"{'proportion: '+ str(real_proportion)+'%':<20}"
|
f"cpuTime (ms): {cpu_time / timedelta(milliseconds=1):<10.3f}"
|
||||||
f"{'execType: ' + execType:<20}")
|
f"proportion: {str(real_proportion +'%'):<8}")
|
||||||
|
|
||||||
def print_perf_counters(perf_counts_list):
|
def print_perf_counters(perf_counts_list):
|
||||||
max_layer_name = 30
|
max_print_length = 20
|
||||||
for ni in range(len(perf_counts_list)):
|
for ni in range(len(perf_counts_list)):
|
||||||
perf_counts = perf_counts_list[ni]
|
perf_counts = perf_counts_list[ni]
|
||||||
total_time = timedelta()
|
total_time = timedelta()
|
||||||
total_time_cpu = timedelta()
|
total_time_cpu = timedelta()
|
||||||
logger.info(f"Performance counts for {ni}-th infer request")
|
logger.info(f"Performance counts for {ni}-th infer request")
|
||||||
for pi in perf_counts:
|
for pi in perf_counts:
|
||||||
print(f"{pi.node_name[:max_layer_name - 4] + '...' if (len(pi.node_name) >= max_layer_name) else pi.node_name:<30}"
|
print(f"{pi.node_name[:max_print_length - 4] + '...' if (len(pi.node_name) >= max_print_length) else pi.node_name:<20} "
|
||||||
f"{str(pi.status):<15}"
|
f"{str(pi.status):<20} "
|
||||||
f"{'layerType: ' + pi.node_type:<30}"
|
f"layerType: {pi.node_type[:max_print_length - 4] + '...' if (len(pi.node_type) >= max_print_length) else pi.node_type:<20} "
|
||||||
f"{'realTime: ' + str((pi.real_time // timedelta(microseconds=1)) / 1000.0):<20}"
|
f"execType: {pi.exec_type:<20} "
|
||||||
f"{'cpu: ' + str((pi.cpu_time // timedelta(microseconds=1)) / 1000.0):<20}"
|
f"realTime (ms): {pi.real_time / timedelta(milliseconds=1):<10.3f} "
|
||||||
f"{'execType: ' + pi.exec_type:<20}")
|
f"cpuTime (ms): {pi.cpu_time / timedelta(milliseconds=1):<10.3f}")
|
||||||
|
|
||||||
total_time += pi.real_time
|
total_time += pi.real_time
|
||||||
total_time_cpu += pi.cpu_time
|
total_time_cpu += pi.cpu_time
|
||||||
print(f'Total time: {(total_time // timedelta(microseconds=1)) / 1000.0} seconds')
|
print(f'Total time: {total_time / timedelta(milliseconds=1)} milliseconds')
|
||||||
print(f'Total CPU time: {(total_time_cpu // timedelta(microseconds=1)) / 1000.0} seconds\n')
|
print(f'Total CPU time: {total_time_cpu / timedelta(milliseconds=1)} milliseconds\n')
|
||||||
|
|
||||||
|
|
||||||
def get_command_line_arguments(argv):
|
def get_command_line_arguments(argv):
|
||||||
|
Loading…
Reference in New Issue
Block a user