Files
openvino/samples/python/speech_sample/speech_sample.py
Alexey Lebedev 9a59e871eb [PYTHON API] fix hash operator for ports (#9673)
* Fix get_node call

* Add operators and define hash

* rename infer request property

* add new line

* remove unused var

* Move tensor getters to InferRequestWrapper

* check node hash

* add new line

* fix samples

Co-authored-by: Anastasia Kuporosova <anastasia.kuporosova@intel.com>
2022-01-18 21:09:03 +03:00

292 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2021 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import re
import sys
from timeit import default_timer
from typing import Dict
import numpy as np
from openvino.preprocess import PrePostProcessor
from openvino.runtime import Core, InferRequest, Layout, Type, set_batch
from arg_parser import parse_args
from file_options import read_utterance_file, write_utterance_file
from utils import (GNA_ATOM_FREQUENCY, GNA_CORE_FREQUENCY,
compare_with_reference, get_scale_factor, log,
parse_outputs_from_args, parse_scale_factors,
set_scale_factors)
def infer_data(data: Dict[str, np.ndarray], infer_request: InferRequest, cw_l: int = 0, cw_r: int = 0) -> np.ndarray:
"""Do a synchronous matrix inference"""
frames_to_infer = {}
result = {}
batch_size = infer_request.get_input_tensor(0).shape[0]
num_of_frames = next(iter(data.values())).shape[0]
for output in infer_request.model_outputs:
result[output.any_name] = np.ndarray((num_of_frames, np.prod(tuple(output.shape)[1:])))
for i in range(-cw_l, num_of_frames + cw_r, batch_size):
if i < 0:
index = 0
elif i >= num_of_frames:
index = num_of_frames - 1
else:
index = i
for _input in infer_request.model_inputs:
frames_to_infer[_input.any_name] = data[_input.any_name][index:index + batch_size]
num_of_frames_to_infer = len(frames_to_infer[_input.any_name])
# Add [batch_size - num_of_frames_to_infer] zero rows to 2d numpy array
# Used to infer fewer frames than the batch size
frames_to_infer[_input.any_name] = np.pad(
frames_to_infer[_input.any_name],
[(0, batch_size - num_of_frames_to_infer), (0, 0)],
)
frames_to_infer[_input.any_name] = frames_to_infer[_input.any_name].reshape(_input.tensor.shape)
frame_results = infer_request.infer(frames_to_infer)
if i - cw_r < 0:
continue
for output in frame_results.keys():
vector_result = frame_results[output].reshape((batch_size, result[output.any_name].shape[1]))
result[output.any_name][i - cw_r:i - cw_r + batch_size] = vector_result[:num_of_frames_to_infer]
return result
def main():
args = parse_args()
# --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------
log.info('Creating OpenVINO Runtime Core')
core = Core()
# --------------------------- Step 2. Read a model --------------------------------------------------------------------
if args.model:
log.info(f'Reading the model: {args.model}')
# (.xml and .bin files) or (.onnx file)
model = core.read_model(args.model)
# --------------------------- Step 3. Apply preprocessing -------------------------------------------------------------
if args.output_layers:
output_names, output_ports = parse_outputs_from_args(args)
model.add_outputs(list(zip(output_names, output_ports)))
ppp = PrePostProcessor(model)
for i in range(len(model.inputs)):
ppp.input(i).tensor() \
.set_element_type(Type.f32) \
.set_layout(Layout('NC')) # noqa: N400
ppp.input(i).model().set_layout(Layout('NC'))
for i in range(len(model.outputs)):
ppp.output(i).tensor().set_element_type(Type.f32)
model = ppp.build()
if args.context_window_left == args.context_window_right == 0:
set_batch(model, args.batch_size)
else:
set_batch(model, 1)
# ---------------------------Step 4. Configure plugin ---------------------------------------------------------
devices = args.device.replace('HETERO:', '').split(',')
plugin_config = {}
if 'GNA' in args.device:
gna_device_mode = devices[0] if '_' in devices[0] else 'GNA_AUTO'
devices[0] = 'GNA'
plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'
plugin_config['GNA_EXEC_TARGET'] = args.exec_target
# Set a GNA scale factor
if args.import_gna_model:
if args.scale_factor:
log.warning(f'Custom scale factor will be used for imported GNA model: {args.import_gna_model}')
set_scale_factors(plugin_config, parse_scale_factors(args))
else:
log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
else:
if args.scale_factor:
set_scale_factors(plugin_config, parse_scale_factors(args))
else:
scale_factors = []
for file_name in re.split(', |,', args.input):
first_utterance = next(iter(read_utterance_file(file_name).values()))
scale_factors.append(get_scale_factor(first_utterance))
log.info('Using scale factor(s) calculated from first utterance')
set_scale_factors(plugin_config, scale_factors)
if args.export_embedded_gna_model:
plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration
if args.performance_counter:
plugin_config['PERF_COUNT'] = 'YES'
device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]
# --------------------------- Step 5. Loading model to the device -----------------------------------------------------
log.info('Loading the model to the plugin')
if args.model:
compiled_model = core.compile_model(model, device_str, plugin_config)
else:
compiled_model = core.import_model(args.import_gna_model, device_str, plugin_config)
# --------------------------- Exporting GNA model using InferenceEngine AOT API ---------------------------------------
if args.export_gna_model:
log.info(f'Writing GNA Model to {args.export_gna_model}')
compiled_model.export_model(args.export_gna_model)
return 0
if args.export_embedded_gna_model:
log.info(f'Exported GNA embedded model to file {args.export_embedded_gna_model}')
log.info(f'GNA embedded model export done for GNA generation {args.embedded_gna_configuration}')
return 0
# --------------------------- Step 6. Set up input --------------------------------------------------------------------
if args.input_layers:
input_names = re.split(', |,', args.input_layers)
else:
input_names = [_input.any_name for _input in compiled_model.inputs]
if args.output_layers:
output_names, output_ports = parse_outputs_from_args(args)
# If a name of output layer contains a port number then concatenate output_names and output_ports
if ':' in compiled_model.outputs[0].any_name:
output_names = [f'{output_names[i]}:{output_ports[i]}' for i in range(len(output_names))]
else:
output_names = [compiled_model.outputs[0].any_name]
if args.input:
input_files = re.split(', |,', args.input)
if len(input_names) != len(input_files):
log.error(f'Number of network inputs ({len(compiled_model.inputs)}) is not equal '
f'to number of ark files ({len(input_files)})')
sys.exit(-3)
if args.reference:
reference_files = re.split(', |,', args.reference)
if len(output_names) != len(reference_files):
log.error('The number of reference files is not equal to the number of network outputs.')
sys.exit(-5)
if args.output:
output_files = re.split(', |,', args.output)
if len(output_names) != len(output_files):
log.error('The number of output files is not equal to the number of network outputs.')
sys.exit(-6)
file_data = [read_utterance_file(file_name) for file_name in input_files]
input_data = {
utterance_name: {
input_names[i]: file_data[i][utterance_name] for i in range(len(input_names))
}
for utterance_name in file_data[0].keys()
}
if args.reference:
references = {output_names[i]: read_utterance_file(reference_files[i]) for i in range(len(output_names))}
# --------------------------- Step 7. Create infer request ------------------------------------------------------------
infer_request = compiled_model.create_infer_request()
# --------------------------- Step 8. Do inference --------------------------------------------------------------------
log.info('Starting inference in synchronous mode')
results = {name: {} for name in output_names}
total_infer_time = 0
for i, key in enumerate(sorted(input_data)):
start_infer_time = default_timer()
# Reset states between utterance inferences to remove a memory impact
for state in infer_request.query_state():
state.reset()
result = infer_data(
input_data[key],
infer_request,
args.context_window_left,
args.context_window_right,
)
for name in output_names:
results[name][key] = result[name]
infer_time = default_timer() - start_infer_time
total_infer_time += infer_time
num_of_frames = file_data[0][key].shape[0]
avg_infer_time_per_frame = infer_time / num_of_frames
# --------------------------- Step 9. Process output ------------------------------------------------------------------
log.info('')
log.info(f'Utterance {i} ({key}):')
log.info(f'Total time in Infer (HW and SW): {infer_time * 1000:.2f}ms')
log.info(f'Frames in utterance: {num_of_frames}')
log.info(f'Average Infer time per frame: {avg_infer_time_per_frame * 1000:.2f}ms')
for name in output_names:
log.info('')
log.info(f'Output blob name: {name}')
log.info(f'Number scores per frame: {results[name][key].shape[1]}')
if args.reference:
log.info('')
compare_with_reference(results[name][key], references[name][key])
if args.performance_counter:
if 'GNA' in args.device:
total_cycles = infer_request.profiling_info[0].real_time.total_seconds()
stall_cycles = infer_request.profiling_info[1].real_time.total_seconds()
active_cycles = total_cycles - stall_cycles
frequency = 10**6
if args.arch == 'CORE':
frequency *= GNA_CORE_FREQUENCY
else:
frequency *= GNA_ATOM_FREQUENCY
total_inference_time = total_cycles / frequency
active_time = active_cycles / frequency
stall_time = stall_cycles / frequency
log.info('')
log.info('Performance Statistics of GNA Hardware')
log.info(f' Total Inference Time: {(total_inference_time * 1000):.4f} ms')
log.info(f' Active Time: {(active_time * 1000):.4f} ms')
log.info(f' Stall Time: {(stall_time * 1000):.4f} ms')
log.info('')
log.info(f'Total sample time: {total_infer_time * 1000:.2f}ms')
if args.output:
for i, name in enumerate(results):
write_utterance_file(output_files[i], results[name])
log.info(f'File {output_files[i]} was created!')
# ----------------------------------------------------------------------------------------------------------------------
log.info('This sample is an API example, '
'for any performance measurements please use the dedicated benchmark_app tool\n')
return 0
if __name__ == '__main__':
sys.exit(main())