[PYTHON] Introduce Json Statistics Report aligned with C++ version (#15692)

* [PYTHON] Introduce Json Statistics Report aligned with C++ version * [PYTHON] Update README with new json_stats flag * [PYTHON] Fix missing StatisticsReportConfig compilation error * [PYTHON] Fix README formatting * [PYTHON] Fix indent, fix pcsort error thrown for timedelta/int type mismatch, fix some compilation errors * [PYTHON] Apply Pythonization ideas & fix JSON report showing incorrect category results * Update tools/benchmark_tool/openvino/tools/benchmark/utils/statistics_report.py Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com> * [PYHON] Align multiple-iterations behavior for reports --------- Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>
2023-03-13 10:58:40 +01:00
parent 1e757de195
commit 2eef025773
5 changed files with 215 additions and 48 deletions
--- a/tools/benchmark_tool/README.md
+++ b/tools/benchmark_tool/README.md
@@ -244,6 +244,9 @@ Statistics dumping options:
  -report_folder REPORT_FOLDER, --report_folder REPORT_FOLDER
                        Optional. Path to a folder where statistics report is stored.

+   -json_stats [JSON_STATS], --json_stats [JSON_STATS]
+                        Optional. Enables JSON-based statistics output (by default reporting system will use CSV format). Should be used together with -report_folder option.
+
  -pc [PERF_COUNTS], --perf_counts [PERF_COUNTS]
                        Optional. Report performance counters.

--- a/tools/benchmark_tool/openvino/tools/benchmark/main.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py
@@ -19,7 +19,8 @@ from openvino.tools.benchmark.utils.utils import next_step, get_number_iteration
    get_command_line_arguments, parse_value_per_device, parse_devices, get_inputs_info, \
    print_inputs_and_outputs_info, get_network_batch_size, load_config, dump_config, get_latency_groups, \
    check_for_static, can_measure_as_static, parse_value_for_virtual_device
-from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, averageCntReport, detailedCntReport
+from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, JsonStatisticsReport, CsvStatisticsReport, \
+    averageCntReport, detailedCntReport

 def parse_and_check_command_line():
    def arg_not_empty(arg_value,empty_value):
@@ -60,8 +61,9 @@ def main():

        command_line_arguments = get_command_line_arguments(sys.argv)
        if args.report_type:
-          statistics = StatisticsReport(StatisticsReport.Config(args.report_type, args.report_folder))
-          statistics.add_parameters(StatisticsReport.Category.COMMAND_LINE_PARAMETERS, command_line_arguments)
+            _statistics_class = JsonStatisticsReport if args.json_stats else CsvStatisticsReport
+            statistics = _statistics_class(StatisticsReport.Config(args.report_type, args.report_folder))
+            statistics.add_parameters(StatisticsReport.Category.COMMAND_LINE_PARAMETERS, command_line_arguments)

        def is_flag_set_in_command_line(flag):
            return any(x.strip('-') == flag for x, y in command_line_arguments)
--- a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
@@ -163,6 +163,8 @@ def parse_args():
                           "counters and latency for each executed infer request.")
    stat.add_argument('-report_folder', '--report_folder', type=str, required=False, default='',
                      help="Optional. Path to a folder where statistics report is stored.")
+    args.add_argument('-json_stats', '--json_stats', type=str2bool, required=False, default=False, nargs='?', const=True,
+                      help="Optional. Enables JSON-based statistics output (by default reporting system will use CSV format). Should be used together with -report_folder option.")
    stat.add_argument('-pc', '--perf_counts', type=str2bool, required=False, default=False, nargs='?', const=True,
                      help='Optional. Report performance counters.', )
    stat.add_argument('-pcsort', '--perf_counts_sort', type=str, required=False, default="",
--- a/tools/benchmark_tool/openvino/tools/benchmark/utils/statistics_report.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/statistics_report.py
@@ -1,11 +1,14 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

-from datetime import timedelta
 import os
-import sys
-from enum import Enum
+import abc
+import json
 import csv
+import numpy as np
+from enum import Enum
+from datetime import timedelta
+from typing import Dict, List, Tuple, Any
 from .logging import logger

 ## statistics reports types
@@ -14,7 +17,8 @@ averageCntReport = 'average_counters'
 detailedCntReport = 'detailed_counters'

 ## Responsible for collecting of statistics and dumping to .csv file
-class StatisticsReport:
+
+class StatisticsReport(metaclass = abc.ABCMeta):
    class Config():
        def __init__(self, report_type, report_folder):
            self.report_type = report_type
@@ -25,10 +29,9 @@ class StatisticsReport:
        RUNTIME_CONFIG = 1,
        EXECUTION_RESULTS = 2

-    def __init__(self, config):
+    def __init__(self, config) -> None:
        self.config = config
        self.parameters = {}
-        self.csv_separator = ';'

    def add_parameters(self, category, parameters):
        if category not in self.parameters.keys():
@@ -36,6 +39,25 @@ class StatisticsReport:
        else:
            self.parameters[category].extend(parameters)

+    @abc.abstractmethod
+    def dump(self):
+        pass
+
+    @abc.abstractclassmethod
+    def dump_performance_counters(self):
+        pass
+
+    @abc.abstractclassmethod
+    def dump_performance_counters_sorted(self):
+        pass
+
+@StatisticsReport.register
+class CsvStatisticsReport(StatisticsReport):
+
+    def __init__(self, config):
+        StatisticsReport.__init__(self, config)
+        self.csv_separator = ';'
+
    def dump(self):
        def dump_parameters(f, parameters):
            for k, v in parameters:
@@ -59,21 +81,59 @@ class StatisticsReport:

            logger.info(f"Statistics report is stored to {f.name}")

-    def dump_performance_counters_request(self, f, prof_info):
-        total = timedelta()
-        total_cpu = timedelta()
-        f.write(self.csv_separator.join(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)\n']))
-        for pi in prof_info:
-            f.write(self.csv_separator.join([pi.node_name, str(pi.status), pi.node_type, pi.exec_type, 
-                f"{pi.real_time / timedelta(milliseconds=1):.3f}", 
-                f"{pi.cpu_time / timedelta(milliseconds=1):.3f}"]))
-            f.write('\n')
-            total += pi.real_time
-            total_cpu += pi.cpu_time
-        f.write(self.csv_separator.join(['Total', '', '', '',
-            f"{total / timedelta(milliseconds=1):.3f}",
-            f"{total_cpu / timedelta(milliseconds=1):.3f}"]))
-        f.write('\n\n')
+    def dump_performance_counters(self, prof_info_list):
+        def dump_performance_counters_request(f, prof_info):
+            total, total_cpu = timedelta(), timedelta()
+
+            f.write(self.csv_separator.join(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)\n']))
+            for pi in prof_info:
+                f.write(self.csv_separator.join([pi.node_name, str(pi.status), pi.node_type, pi.exec_type,
+                    f"{pi.real_time / timedelta(milliseconds=1):.3f}",
+                    f"{pi.cpu_time / timedelta(milliseconds=1):.3f}"]))
+                f.write('\n')
+                total += pi.real_time
+                total_cpu += pi.cpu_time
+
+            f.write(self.csv_separator.join(['Total', '', '', '',
+                f"{total / timedelta(milliseconds=1):.3f}",
+                f"{total_cpu / timedelta(milliseconds=1):.3f}"]))
+            f.write('\n\n')
+
+        if self.config.report_type == '' or self.config.report_type == noCntReport:
+            logger.info("Statistics collecting for performance counters was not requested. No reports are dumped.")
+            return
+
+        if not prof_info_list:
+            logger.info('Performance counters are empty. No reports are dumped.')
+            return
+
+        filename = os.path.join(self.config.report_folder, f'benchmark_{self.config.report_type}_report.csv')
+        with open(filename, 'w') as f:
+            if self.config.report_type == detailedCntReport:
+                for prof_info in prof_info_list:
+                    dump_performance_counters_request(f, prof_info)
+            elif self.config.report_type == averageCntReport:
+                def get_average_performance_counters(prof_info_list):
+                    performance_counters_avg = []
+                    ## iterate over each processed infer request and handle its PM data
+                    for prof_info in prof_info_list:
+                        for pi in prof_info:
+                            item = next((x for x in performance_counters_avg if x.node_name == pi.node_name), None)
+                            if item:
+                                item.real_time += pi.real_time
+                                item.cpu_time += pi.cpu_time
+                            else:
+                                performance_counters_avg.append(pi)
+
+                    for pi in performance_counters_avg:
+                        pi.real_time /= len(prof_info_list)
+                        pi.cpu_time /= len(prof_info_list)
+                    return performance_counters_avg
+                dump_performance_counters_request(f, get_average_performance_counters(prof_info_list))
+            else:
+                raise Exception('PM data can only be collected for average or detailed report types')
+
+            logger.info(f'Performance counters report is stored to {filename}')

    def dump_performance_counters_sorted(self, prof_sorted_info):
        """Save sorted performance counters into csv file.
@@ -98,7 +158,77 @@ class StatisticsReport:
            f.write('\n\n')            
        logger.info(f'Sorted performance counters report is stored to {filename}')

-    def dump_performance_counters(self, prof_info_list):
+@StatisticsReport.register
+class JsonStatisticsReport(StatisticsReport):
+    def __init__(self, config) -> None:
+        StatisticsReport.__init__(self, config)
+
+    def dump(self):
+        def list_to_dict(parameters: List[Tuple[str, str]]) -> Dict[str, str]:
+            return {key: value for key, value in parameters}
+
+        filename = os.path.join(self.config.report_folder, 'benchmark_report.json')
+        with open(filename, 'w') as file:
+            json_statistics = {}
+            if self.Category.COMMAND_LINE_PARAMETERS in self.parameters.keys():
+                json_statistics["cmd_options"] = \
+                    list_to_dict(self.parameters[self.Category.COMMAND_LINE_PARAMETERS])
+
+            if self.Category.RUNTIME_CONFIG in self.parameters.keys():
+                json_statistics["configuration_setup"] = \
+                    list_to_dict(self.parameters[self.Category.RUNTIME_CONFIG])
+
+            if self.Category.EXECUTION_RESULTS in self.parameters.keys():
+                json_statistics["execution_results"] = \
+                    list_to_dict(self.parameters[self.Category.EXECUTION_RESULTS])
+
+            json.dump(json_statistics, file)
+            logger.info(f"Statistics report is stored to {file.name}")
+
+    def dump_performance_counters(self, prof_info_list: List[List[Any]]): #ProfilingInfo
+        def profiling_info_to_dict_list(prof_info_list):
+            
+            profiling_info_json_list = [0]*len(prof_info_list)
+            for i, profiling_info in enumerate(prof_info_list):
+
+                total, total_cpu = timedelta(), timedelta()
+                layers_info = [0] * len(profiling_info)
+                for l, layer in enumerate(profiling_info):
+                    layers_info[l] = {
+                        'name': layer.node_name,
+                        'node_type': layer.node_type,
+                        'status': str(layer.status),
+                        'real_time': f"{layer.real_time / timedelta(milliseconds=1):.3f}",
+                        'cpu_time': f"{layer.cpu_time / timedelta(milliseconds=1):.3f}",
+                        'exec_type': layer.exec_type
+                    }
+                    total += layer.real_time
+                    total_cpu += layer.cpu_time
+
+                profiling_info_json_list[i] = {
+                    'nodes': layers_info,
+                    'total_real_time': f"{total / timedelta(milliseconds=1):.3f}",
+                    'total_cpu_time': f"{total_cpu / timedelta(milliseconds=1):.3f}"
+                }
+
+            return profiling_info_json_list
+
+        def get_average_performance_counters(prof_info_list):
+            performance_counters_avg = []
+            for prof_info in prof_info_list:
+                for pi in prof_info:
+                    item = next((x for x in performance_counters_avg if x[0].node_name == pi.node_name), None)
+                    if item:
+                        item[0].real_time += pi.real_time
+                        item[0].cpu_time += pi.cpu_time
+                    else:
+                        performance_counters_avg.append([pi])
+            for pi in performance_counters_avg:
+                pi[0].real_time /= len(prof_info_list)
+                pi[0].cpu_time /= len(prof_info_list)
+
+            return performance_counters_avg
+
        if self.config.report_type == '' or self.config.report_type == noCntReport:
            logger.info("Statistics collecting for performance counters was not requested. No reports are dumped.")
            return
@@ -107,30 +237,60 @@ class StatisticsReport:
            logger.info('Performance counters are empty. No reports are dumped.')
            return

-        filename = os.path.join(self.config.report_folder, f'benchmark_{self.config.report_type}_report.csv')
-        with open(filename, 'w') as f:
+        filename = os.path.join(self.config.report_folder, f'benchmark_{self.config.report_type}_report.json')
+        with open(filename, 'w') as file:
            if self.config.report_type == detailedCntReport:
-                for prof_info in prof_info_list:
-                    self.dump_performance_counters_request(f, prof_info)
+                profiling_info_json =  profiling_info_to_dict_list(prof_info_list)
+                json_statistics = {
+                    'report_type': 'detailed',
+                    'detailed_performance': profiling_info_json
+                }
            elif self.config.report_type == averageCntReport:
-                def get_average_performance_counters(prof_info_list):
-                    performance_counters_avg = []
-                    ## iterate over each processed infer request and handle its PM data
-                    for prof_info in prof_info_list:
-                        for pi in prof_info:
-                            item = next((x for x in performance_counters_avg if x.node_name == pi.node_name), None)
-                            if item:
-                                item.real_time += pi.real_time
-                                item.cpu_time += pi.cpu_time
-                            else:
-                                performance_counters_avg.append(pi)
-
-                    for pi in performance_counters_avg:
-                        pi.real_time /= len(prof_info_list)
-                        pi.cpu_time /= len(prof_info_list)
-                    return performance_counters_avg
-                self.dump_performance_counters_request(f, get_average_performance_counters(prof_info_list))
+                prof_info_list_avg = get_average_performance_counters(prof_info_list)
+                profiling_info_json = profiling_info_to_dict_list(prof_info_list_avg)
+                json_statistics = {
+                    'report_type': 'average',
+                    'avg_performance': profiling_info_json[0]
+                }
            else:
                raise Exception('PM data can only be collected for average or detailed report types')

+            json.dump(json_statistics, file, indent=4)
            logger.info(f'Performance counters report is stored to {filename}')
+
+    def dump_performance_counters_sorted(self, prof_sorted_info) -> None:
+        def profiling_info_to_dict_list(prof_info_matrix: np.ndarray) -> List[Dict[str, str]]:
+            total, total_cpu = 0, 0
+
+            nodes_info_list = [0]*len(prof_info_matrix)
+            for i, info in enumerate(prof_info_matrix):
+                nodes_info_list[i] = {
+                    'name': info[0],
+                    'node_type': info[2],
+                    'status': str(info[1]),
+                    'real_time': f"{info[3] / 1000:.3f}",
+                    'cpu_time': f"{info[4] / 1000:.3f}",
+                    'exec_type': info[6],
+                    '%': str("%.2f"%(info[5] * 100))+"%"
+                }
+
+                total += info[3]
+                total_cpu += info[4]
+
+            prof_info_json = {
+                'nodes': nodes_info_list,
+                'total_real_time': f"{total / 1000:.3f}",
+                'total_cpu_time': f"{total_cpu / 1000:.3f}"
+            }
+
+            return prof_info_json
+
+        filename = os.path.join(self.config.report_folder, f'benchmark_sorted_report.json')
+        with open(filename, 'w') as file:
+            profiling_info_json = profiling_info_to_dict_list(prof_sorted_info)
+            json_statistics = {
+                'report_type': 'sorted',
+                'avg_performance': profiling_info_json
+            }
+            json.dump(json_statistics, file, indent=4)
+            logger.info(f'Sorted performance counters report is stored to {filename}')
--- a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
@@ -426,8 +426,8 @@ def print_detail_result(result_list):
                f"{str(layerStatus):<20} "
                f"layerType: {layerType[:max_print_length - 4] + '...' if (len(layerType) >= max_print_length) else layerType:<20} "
                f"execType: {execType:<20} "
-                f"realTime (ms): {real_time / timedelta(milliseconds=1):<10.3f} "
-                f"cpuTime (ms): {cpu_time / timedelta(milliseconds=1):<10.3f}"
+                f"realTime (ms): {real_time / 1000:<10.3f} "
+                f"cpuTime (ms): {cpu_time / 1000:<10.3f}"
                f"proportion: {str(real_proportion +'%'):<8}")

 def print_perf_counters(perf_counts_list):