[IE Python Speech Sample] Add --scale_factor and --performance_counter options (#6663)

* Adds perf counters, and scale factor args

* Adding defined choices for arch type for -a/--arch option

* changing print to logger, frequencies are now global consts

* change to log info formatting

* Fix style issues

* doc: Update inference-engine/ie_bridges/python/sample/speech_sample/README.md

* doc: Update inference-engine/ie_bridges/python/sample/speech_sample/README.md

* doc: Update inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py

* doc: Update inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py

Co-authored-by: Koyanagi, Ken <ken.koyanagi@intel.com>
Co-authored-by: Kate Generalova <kate.generalova@intel.com>
This commit is contained in:
Dmitry Pigasin
2021-09-03 23:23:00 +03:00
committed by GitHub
parent 005e7da325
commit bb84d11313
3 changed files with 53 additions and 5 deletions

View File

@@ -80,7 +80,8 @@ Usage message:
usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT
[-o OUTPUT] [-r REFERENCE] [-d DEVICE]
[-bs BATCH_SIZE] [-qb QUANTIZATION_BITS]
[-wg EXPORT_GNA_MODEL] [-iname INPUT_LAYERS]
[-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL] [-pc]
[-a {CORE,ATOM}] [-iname INPUT_LAYERS]
[-oname OUTPUT_LAYERS]
optional arguments:
@@ -94,9 +95,10 @@ optional arguments:
Options:
-h, --help Show this help message and exit.
-i INPUT, --input INPUT
Required. Path to an input file (.ark or .npz).
Required. Path to an input file (.ark or .npz).
-o OUTPUT, --output OUTPUT
Optional. Output file name to save inference results (.ark or .npz).
Optional. Output file name to save inference results
(.ark or .npz).
-r REFERENCE, --reference REFERENCE
Optional. Read reference score file and compare
scores.
@@ -113,9 +115,18 @@ Options:
-qb QUANTIZATION_BITS, --quantization_bits QUANTIZATION_BITS
Optional. Weight bits for quantization: 8 or 16
(default 16).
-sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
Optional. The user-specified input scale factor for
quantization.
-wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
Optional. Write GNA model to file using path/filename
provided.
-pc, --performance_counter
Optional. Enables performance report (specify -a to
ensure arch accurate results).
-a {CORE,ATOM}, --arch {CORE,ATOM}
Optional. Specify architecture. CORE, ATOM with the
combination of -pc.
-iname INPUT_LAYERS, --input_layers INPUT_LAYERS
Optional. Layer names for input blobs. The names are
separated with ",". Allows to change the order of

View File

@@ -28,10 +28,16 @@ def parse_args() -> argparse.Namespace:
args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
args.add_argument('-qb', '--quantization_bits', default=16, type=int,
help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
args.add_argument('-sf', '--scale_factor', type=float,
help='Optional. The user-specified input scale factor for quantization.')
args.add_argument('-wg', '--export_gna_model', type=str,
help='Optional. Write GNA model to file using path/filename provided.')
args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, help=argparse.SUPPRESS)
args.add_argument('-pc', '--performance_counter', action='store_true',
help='Optional. Enables performance report (specify -a to ensure arch accurate results).')
args.add_argument('-a', '--arch', default='CORE', type=str.upper, choices=['CORE', 'ATOM'],
help='Optional. Specify architecture. CORE, ATOM with the combination of -pc.')
args.add_argument('-iname', '--input_layers', type=str,
help='Optional. Layer names for input blobs. The names are separated with ",". '
'Allows to change the order of input layers for -i flag. Example: Input1,Input2')

View File

@@ -14,6 +14,10 @@ from arg_parser import parse_args
from file_options import read_utterance_file, write_utterance_file
from openvino.inference_engine import ExecutableNetwork, IECore, IENetwork
# Operating Frequency for GNA HW devices for Core and Atom architecture
GNA_CORE_FREQUENCY = 400
GNA_ATOM_FREQUENCY = 200
def get_scale_factor(matrix: np.ndarray) -> float:
"""Get scale factor for quantization using utterance matrix"""
@@ -143,21 +147,26 @@ def main():
plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'
# Get a GNA scale factor
# Set a GNA scale factor
if args.import_gna_model:
log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
elif args.scale_factor:
log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.')
plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor)
else:
utterances = read_utterance_file(args.input.split(',')[0])
key = sorted(utterances)[0]
scale_factor = get_scale_factor(utterances[key])
log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')
plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)
if args.export_embedded_gna_model:
plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration
if args.performance_counter:
plugin_config['PERF_COUNT'] = 'YES'
device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]
log.info('Loading the model to the plugin')
@@ -220,6 +229,7 @@ def main():
log.info('Starting inference in synchronous mode')
results = {blob_name: {} for blob_name in output_blobs}
infer_times = []
perf_counters = []
for key in sorted(input_data):
start_infer_time = default_timer()
@@ -235,6 +245,7 @@ def main():
results[blob_name][key] = result[blob_name]
infer_times.append(default_timer() - start_infer_time)
perf_counters.append(exec_net.requests[0].get_perf_counts())
# ---------------------------Step 8. Process output--------------------------------------------------------------------
for blob_name in output_blobs:
@@ -247,6 +258,26 @@ def main():
if args.reference:
compare_with_reference(results[blob_name][key], references[blob_name][key])
if args.performance_counter:
if 'GNA' in args.device:
pc = perf_counters[i]
total_cycles = int(pc['1.1 Total scoring time in HW']['real_time'])
stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time'])
active_cycles = total_cycles - stall_cycles
frequency = 10**6
if args.arch == 'CORE':
frequency *= GNA_CORE_FREQUENCY
else:
frequency *= GNA_ATOM_FREQUENCY
total_inference_time = total_cycles / frequency
active_time = active_cycles / frequency
stall_time = stall_cycles / frequency
log.info('')
log.info('Performance Statistics of GNA Hardware')
log.info(f' Total Inference Time: {(total_inference_time * 1000):.4f} ms')
log.info(f' Active Time: {(active_time * 1000):.4f} ms')
log.info(f' Stall Time: {(stall_time * 1000):.4f} ms')
log.info('')
log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms')