[tools] some fixes for python benchmark (#9584)

* rename inference_engine to OpenVINO

* correct exception for batch

* check all inputs to find batch dimension before throwing exception

* correct warning about batch

* avoid set_shape in static case

* refactoring latency output

* message about benchmarking mode

* use new precision naming

* use pass manager instead offline_transformations
This commit is contained in:
Alexey Lebedev 2022-01-12 17:22:58 +03:00 committed by GitHub
parent 9cf3359958
commit 0a85417896
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 47 deletions

View File

@ -40,7 +40,7 @@ class Benchmark:
logger.info(f'CPU extensions is loaded {path_to_extension}')
def get_version_info(self) -> str:
logger.info(f"InferenceEngine:\n{'': <9}{'API version':.<24} {get_version()}")
logger.info(f"OpenVINO:\n{'': <9}{'API version':.<24} {get_version()}")
version_string = 'Device info\n'
for device, version in self.core.get_versions(self.device).items():
version_string += f"{'': <9}{device}\n"

View File

@ -60,7 +60,7 @@ def run(args):
is_network_compiled = True
print("Model is compiled")
# ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
# ------------------------------ 2. Loading OpenVINO ---------------------------------------------------
next_step(step_id=2)
benchmark = Benchmark(args.target_device, args.number_infer_requests,
@ -371,6 +371,11 @@ def run(args):
elif benchmark.inference_only and not allow_inference_only_or_sync:
raise Exception("Benchmarking dynamic model available with input filling in measurement loop only!")
if benchmark.inference_only:
logger.info("Benchmarking in inference only mode (inputs filling are not included in measurement loop).")
else:
logger.info("Benchmarking in full mode (inputs filling are included in measurement loop).")
# update batch size in case dynamic network with one data_shape
if benchmark.inference_only and batch_size.is_dynamic:
batch_size = Dimension(data_queue.batch_sizes[data_queue.current_group_id])
@ -390,7 +395,8 @@ def run(args):
data_tensors = data_queue.get_next_input()
for port, data_tensor in data_tensors.items():
input_tensor = request.get_input_tensor(port)
input_tensor.shape = data_tensor.shape
if not static_mode:
input_tensor.shape = data_tensor.shape
input_tensor.data[:] = data_tensor.data
if statistics:
@ -443,7 +449,7 @@ def run(args):
if args.dump_config:
dump_config(args.dump_config, config)
logger.info(f"Inference Engine configuration settings were dumped to {args.dump_config}")
logger.info(f"OpenVINO configuration settings were dumped to {args.dump_config}")
if args.exec_graph_path:
dump_exec_graph(compiled_model, args.exec_graph_path)
@ -512,25 +518,25 @@ def run(args):
statistics.dump()
print(f'Count: {iteration} iterations')
print(f'Duration: {get_duration_in_milliseconds(total_duration_sec):.2f} ms')
print(f'Count: {iteration} iterations')
print(f'Duration: {get_duration_in_milliseconds(total_duration_sec):.2f} ms')
if MULTI_DEVICE_NAME not in device_name:
print('Latency:')
if args.latency_percentile == 50 and static_mode:
print(f'Median: {median_latency_ms:.2f} ms')
print(f' Median: {median_latency_ms:.2f} ms')
elif args.latency_percentile != 50:
print(f'({args.latency_percentile} percentile): {median_latency_ms:.2f} ms')
print(f'AVG: {avg_latency_ms:.2f} ms')
print(f'MIN: {min_latency_ms:.2f} ms')
print(f'MAX: {max_latency_ms:.2f} ms')
print(f' AVG: {avg_latency_ms:.2f} ms')
print(f' MIN: {min_latency_ms:.2f} ms')
print(f' MAX: {max_latency_ms:.2f} ms')
if pcseq:
print("Latency for each data shape group: ")
for group in benchmark.latency_groups:
print(f"{str(group)}")
print(f'AVG: {group.avg:.2f} ms')
print(f'MIN: {group.min:.2f} ms')
print(f'MAX: {group.max:.2f} ms')
print(f" {str(group)}")
print(f' AVG: {group.avg:.2f} ms')
print(f' MIN: {group.min:.2f} ms')
print(f' MAX: {group.max:.2f} ms')
print(f'Throughput: {fps:.2f} FPS')

View File

@ -138,12 +138,12 @@ def parse_args():
" Please note, command line parameters have higher priority then parameters from configuration file.")
args.add_argument('-qb', '--quantization_bits', type=int, required=False, default=None, choices=[8, 16],
help="Optional. Weight bits for quantization: 8 (I8) or 16 (I16) ")
args.add_argument('-ip', '--input_precision', type=str, required=False, choices=['U8', 'FP16', 'FP32'],
args.add_argument('-ip', '--input_precision', type=str, required=False, choices=['u8', 'U8', 'f16','FP16', 'f32','FP32'],
help='Optional. Specifies precision for all input layers of the network.')
args.add_argument('-op', '--output_precision', type=str, required=False, choices=['U8', 'FP16', 'FP32'],
args.add_argument('-op', '--output_precision', type=str, required=False, choices=['u8', 'U8', 'f16','FP16', 'f32','FP32'],
help='Optional. Specifies precision for all output layers of the network.')
args.add_argument('-iop', '--input_output_precision', type=str, required=False,
help='Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers.')
help='Optional. Specifies precision for input and output layers by name. Example: -iop "input:f16, output:f16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers.')
args.add_argument('-cdir', '--cache_dir', type=str, required=False, default='',
help="Optional. Enable model caching to specified directory")
args.add_argument('-lfile', '--load_from_file', required=False, nargs='?', default=argparse.SUPPRESS,

View File

@ -3,10 +3,9 @@
from collections import defaultdict
import datetime
from openvino.runtime import Core, Model, PartialShape, Dimension, Layout
from openvino.runtime import Type
from openvino.runtime import Core, Model, PartialShape, Dimension, Layout, Type
from openvino.preprocess import PrePostProcessor
from openvino.offline_transformations_pybind import serialize
from openvino.runtime.passes import Manager
from .constants import DEVICE_DURATION_IN_SECS, UNKNOWN_DEVICE_TYPE, \
CPU_DEVICE_NAME, GPU_DEVICE_NAME
@ -29,7 +28,7 @@ def static_vars(**kwargs):
def next_step(additional_info='', step_id=0):
step_names = {
1: "Parsing and validating input arguments",
2: "Loading Inference Engine",
2: "Loading OpenVINO",
3: "Setting device configuration",
4: "Reading network files",
5: "Resizing network to match image sizes and given batch",
@ -68,7 +67,10 @@ def get_element_type(precision):
}
if precision in format_map.keys():
return format_map[precision]
raise Exception("Can't find openvino element type for precision: " + precision)
for element_type in format_map.values():
if element_type.get_type_name() == precision:
return element_type
raise Exception(f"Undefined precision: '{precision}' !")
def pre_post_processing(model: Model, app_inputs_info, input_precision: str, output_precision: str, input_output_precision: str):
@ -128,34 +130,17 @@ def _parse_arg_map(arg_map: str):
return parsed_map
def get_precision(element_type: Type):
format_map = {
'f32' : 'FP32',
'i32' : 'I32',
'i64' : 'I64',
'f16' : 'FP16',
'i16' : 'I16',
'u16' : 'U16',
'i8' : 'I8',
'u8' : 'U8',
'boolean' : 'BOOL',
}
if element_type.get_type_name() in format_map.keys():
return format_map[element_type.get_type_name()]
raise Exception("Can't find precision for openvino element type: " + str(element_type))
def print_inputs_and_outputs_info(model: Model):
inputs = model.inputs
input_names = get_input_output_names(inputs)
for i in range(len(inputs)):
logger.info(f"Model input '{input_names[i]}' precision {get_precision(inputs[i].element_type)}, "
logger.info(f"Model input '{input_names[i]}' precision {inputs[i].element_type.get_type_name()}, "
f"dimensions ({str(inputs[i].node.layout)}): "
f"{' '.join(str(x) for x in inputs[i].partial_shape)}")
outputs = model.outputs
output_names = get_input_output_names(outputs)
for i in range(len(outputs)):
logger.info(f"Model output '{output_names[i]}' precision {get_precision(outputs[i].element_type)}, "
logger.info(f"Model output '{output_names[i]}' precision {outputs[i].element_type.get_type_name()}, "
f"dimensions ({str(outputs[i].node.layout)}): "
f"{' '.join(str(x) for x in outputs[i].partial_shape)}")
@ -307,10 +292,11 @@ def process_help_inference_string(benchmark_app, device_number_streams):
return output_string
def dump_exec_graph(compiled_model, model_path, weight_path = None):
if not weight_path:
weight_path = model_path[:model_path.find(".xml")] + ".bin"
serialize(compiled_model.get_runtime_model(), model_path, weight_path)
def dump_exec_graph(compiled_model, model_path):
weight_path = model_path[:model_path.find(".xml")] + ".bin"
pass_manager = Manager()
pass_manager.register_pass("Serialize", model_path, weight_path)
pass_manager.run_passes(compiled_model.get_runtime_model())
@ -547,6 +533,7 @@ def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size,
layout_map = parse_input_parameters(layout_string, input_names)
batch_size = parse_batch_size(batch_size)
reshape = False
batch_found = False
input_info = []
for i in range(len(inputs)):
info = AppInputInfo()
@ -593,14 +580,15 @@ def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size,
elif info.layout == Layout():
supposed_batch = info.partial_shape[0]
if supposed_batch.is_dynamic or supposed_batch in [0, 1]:
logger.warning(f"Batch dimension is not specified in layout. "
logger.warning(f"Batch dimension is not specified for input '{info.name}'. "
"The first dimension will be interpreted as batch size.")
batch_index = 0
info.layout = Layout("N...")
if batch_index != -1 and info.partial_shape[batch_index] != batch_size:
info.partial_shape[batch_index] = batch_size
reshape = True
elif batch_index == -1:
batch_found = True
elif batch_index == -1 and not batch_found and i == len(inputs) - 1:
raise Exception(f"Batch dimension is not specified for this model!")
# Data shape