Files
openvino/inference-engine/samples/benchmark_app/statistics_report.cpp

225 lines
9.4 KiB
C++
Raw Normal View History

2019-04-12 18:25:53 +03:00
// Copyright (C) 2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include <vector>
#include <utility>
#include <map>
#include <algorithm>
#include "statistics_report.hpp"
2019-08-09 19:02:42 +03:00
void StatisticsReport::addPerfCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &pmStat) {
if (_config.report_type == averageCntReport || _config.report_type == detailedCntReport) {
2019-04-12 18:25:53 +03:00
// collect per-iteration statistics only in case of enabled median/detailed statistic collecting
_performanceCounters.push_back(pmStat);
}
}
2019-08-09 19:02:42 +03:00
void StatisticsReport::addLatencies(const std::vector<double> &latencies) {
_latencies.insert(_latencies.end(), latencies.begin(), latencies.end());
}
void StatisticsReport::dump(const double &fps, const size_t &iteration_number, const double &totalExecTime) {
2019-04-12 18:25:53 +03:00
if (_config.report_type.empty()) {
slog::info << "Statistics collecting was not requested. No reports are dumped." << slog::endl;
return;
}
std::string separator =
#if defined _WIN32 || defined __CYGWIN__
# if defined UNICODE
L"\\";
# else
"\\";
# endif
#else
"/";
#endif
if (_config.report_folder.empty())
separator = "";
CsvDumper dumper(true, _config.report_folder + separator + "benchmark_" + _config.report_type + "_report.csv");
// resulting number of columns in csv file depends on the report_type. If it's noCntReport, then
// no PM data is collected and there are only 3 columns in the file (in configuration section). If it's
2019-08-09 19:02:42 +03:00
// averageCntReport then median PM values are collected per each layer and the number of columns is 6.
2019-04-12 18:25:53 +03:00
// Example from GPU:
//
// layer name;exec status;layer type;exec type;real time;cpu time;
// conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;615;3;
// Here, all the data are taken from InferenceEngine::InferenceEngineProfileInfo.
//
2019-08-09 19:02:42 +03:00
// In case of detailedCntReport the number of columns is 4 + _config.nireq * 2, because first 4 parameters
2019-04-12 18:25:53 +03:00
// are the same but realTime and cpuTime can be different on each iteration (example from 5 GPU requests):
// conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;630,3;617,3;616,3;615,3;617,3;
size_t numOfColumns = 0;
if (_config.report_type == noCntReport) {
numOfColumns = 3;
2019-08-09 19:02:42 +03:00
} else if (_config.report_type == averageCntReport) {
2019-04-12 18:25:53 +03:00
numOfColumns = 6;
} else {
// for detailedCntReport
2019-08-09 19:02:42 +03:00
numOfColumns = 4 + _config.nireq * 2;
2019-04-12 18:25:53 +03:00
}
auto completeCsvRow = [](CsvDumper &dumper, size_t numOfColumns, size_t filled) {
for (size_t i = 0; i < numOfColumns - filled; i++)
dumper << "";
dumper.endLine();
};
// dump execution configuration
dumper << "Configuration setup";
completeCsvRow(dumper, numOfColumns, 1);
dumper << "config option" << "CLI parameter" << "value";
completeCsvRow(dumper, numOfColumns, 3);
dumper << "target device" << " -d" << _config.device;
completeCsvRow(dumper, numOfColumns, 3);
dumper << "execution mode" << " -api" << _config.api;
completeCsvRow(dumper, numOfColumns, 3);
dumper << "batch size" << " -b" << _config.batch;
completeCsvRow(dumper, numOfColumns, 3);
dumper << "number of iterations" << " -niter" << _config.niter;
completeCsvRow(dumper, numOfColumns, 3);
dumper << "number of parallel infer requests" << " -nireq" << _config.nireq;
completeCsvRow(dumper, numOfColumns, 3);
2019-08-09 19:02:42 +03:00
dumper << "duration in ms" << " -t" << _config.duration;
completeCsvRow(dumper, numOfColumns, 3);
2019-04-12 18:25:53 +03:00
dumper << "number of CPU threads" << " -nthreads" << _config.cpu_nthreads;
completeCsvRow(dumper, numOfColumns, 3);
2019-08-09 19:02:42 +03:00
for (auto& item : _config.nstreams)
dumper << "number of " << item.first << " streams" << " -nstreams" << item.second;
completeCsvRow(dumper, numOfColumns, 3);
2019-04-12 18:25:53 +03:00
dumper << "CPU pinning enabled" << " -pin" << _config.cpu_pin;
completeCsvRow(dumper, numOfColumns, 3);
dumper.endLine();
// write PM data from each iteration
if (!_performanceCounters.empty()) {
2019-08-09 19:02:42 +03:00
if (_config.report_type != averageCntReport && _config.report_type != detailedCntReport) {
throw std::logic_error("PM data can only be collected for average or detailed report types");
2019-04-12 18:25:53 +03:00
}
// this vector is sorted according to network layers execution order.
auto performanceMapSorted = preparePmStatistics();
dumper << "Performance counters";
completeCsvRow(dumper, numOfColumns, 1);
dumper << "layer name" << "exec status" << "layer type" << "exec type";
2019-08-09 19:02:42 +03:00
if (_config.report_type == averageCntReport) {
dumper << "average real time" << "average cpu time";
2019-04-12 18:25:53 +03:00
completeCsvRow(dumper, numOfColumns, 6);
} else {
// detailedCntReport case
for (size_t i = 0; i< _performanceCounters.size(); i++) {
2019-08-09 19:02:42 +03:00
dumper << "realTime_req" + std::to_string(i) << "cpuTime_req" + std::to_string(i);
2019-04-12 18:25:53 +03:00
}
completeCsvRow(dumper, numOfColumns, 4 + _performanceCounters.size() * 2);
}
for (const auto &layer : performanceMapSorted) {
dumper << layer.first; // layer name
2019-08-09 19:02:42 +03:00
2019-04-12 18:25:53 +03:00
switch (layer.second.status) {
case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
dumper << "EXECUTED";
break;
case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
dumper << "NOT_RUN";
break;
case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
dumper << "OPTIMIZED_OUT";
break;
}
dumper << layer.second.layer_type << layer.second.exec_type;
2019-08-09 19:02:42 +03:00
if (_config.report_type == averageCntReport) {
// write average realTime and cpuTime from each processed request for current layer
2019-04-12 18:25:53 +03:00
dumper <<
2019-08-09 19:02:42 +03:00
std::to_string(std::accumulate(_perLayerRealTime[layer.first].begin(),
_perLayerRealTime[layer.first].end(), 0.0) / _perLayerRealTime[layer.first].size() / 1000.0) <<
std::to_string(std::accumulate(_perLayerCpuTime[layer.first].begin(),
_perLayerCpuTime[layer.first].end(), 0.0) / _perLayerCpuTime[layer.first].size() / 1000.0);
2019-04-12 18:25:53 +03:00
} else {
// write all realTime and cpuTime from each processed request for current layer
2019-08-09 19:02:42 +03:00
for (size_t i = 0; i < _config.nireq; i++) {
2019-04-12 18:25:53 +03:00
dumper << std::to_string(_perLayerRealTime[layer.first][i] / 1000.0) << std::to_string(_perLayerCpuTime[layer.first][i] / 1000.0);
}
}
dumper.endLine();
}
dumper.endLine();
}
if (_config.report_type == detailedCntReport) {
dumper << "Statistics";
completeCsvRow(dumper, numOfColumns, 1);
dumper << "metric";
2019-08-09 19:02:42 +03:00
for (size_t i = 0; i < _totalLayersTime.size(); i++) {
2019-04-12 18:25:53 +03:00
// detailedCntReport case
2019-08-09 19:02:42 +03:00
dumper << "req" + std::to_string(i);
2019-04-12 18:25:53 +03:00
}
2019-08-09 19:02:42 +03:00
completeCsvRow(dumper, numOfColumns, 4 + _totalLayersTime.size());
2019-04-12 18:25:53 +03:00
dumper << "latencies";
2019-08-09 19:02:42 +03:00
for (const auto &lat : _totalLayersTime) {
dumper << lat / 1000.0;
2019-04-12 18:25:53 +03:00
}
2019-08-09 19:02:42 +03:00
completeCsvRow(dumper, numOfColumns, _totalLayersTime.size());
2019-04-12 18:25:53 +03:00
dumper.endLine();
}
dumper << "Execution results";
completeCsvRow(dumper, numOfColumns, 1);
2019-08-09 19:02:42 +03:00
dumper << "number of iterations" << iteration_number;
2019-04-12 18:25:53 +03:00
completeCsvRow(dumper, numOfColumns, 2);
dumper << "latency" << getMedianValue<double>(_latencies);
completeCsvRow(dumper, numOfColumns, 2);
dumper << "throughput" << fps;
completeCsvRow(dumper, numOfColumns, 2);
dumper << "total execution time" << totalExecTime;
completeCsvRow(dumper, numOfColumns, 2);
slog::info << "statistics report is stored to " << dumper.getFilename() << slog::endl;
}
double StatisticsReport::getMedianLatency() {
return getMedianValue<double>(_latencies);
}
std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>> StatisticsReport::preparePmStatistics() {
if (_performanceCounters.empty()) {
throw std::logic_error("preparePmStatistics() was called when no PM data was collected");
}
// sort PM data of first processed request according to layers execution order
auto performanceMapSorted = perfCountersSorted(_performanceCounters[0]);
// iterate over each processed infer request and handle its PM data
for (auto &pm : _performanceCounters) {
2019-08-09 19:02:42 +03:00
long long total = 0L;
2019-04-12 18:25:53 +03:00
// iterate over each layer from sorted vector and add required PM data to the per-layer maps
for (const auto & it : performanceMapSorted) {
_perLayerRealTime[it.first].push_back(pm[it.first].realTime_uSec);
_perLayerCpuTime[it.first].push_back(pm[it.first].cpu_uSec);
2019-08-09 19:02:42 +03:00
total += pm[it.first].realTime_uSec;
2019-04-12 18:25:53 +03:00
}
2019-08-09 19:02:42 +03:00
_totalLayersTime.push_back(total);
2019-04-12 18:25:53 +03:00
}
return performanceMapSorted;
}
template <typename T>
T StatisticsReport::getMedianValue(const std::vector<T> &vec) {
std::vector<T> sortedVec(vec);
std::sort(sortedVec.begin(), sortedVec.end());
return (sortedVec.size() % 2 != 0) ?
sortedVec[sortedVec.size() / 2ULL] :
(sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast<T>(2.0);
}