[IE Python Speech Sample] Add --scale_factor and --performance_counter options (#6663)

* Adds perf counters, and scale factor args

* Adding defined choices for arch type for -a/--arch option

* changing print to logger, frequencies are now global consts

* change to log info formatting

* Fix style issues

* doc: Update inference-engine/ie_bridges/python/sample/speech_sample/README.md

* doc: Update inference-engine/ie_bridges/python/sample/speech_sample/README.md

* doc: Update inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py

* doc: Update inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py

Co-authored-by: Koyanagi, Ken <ken.koyanagi@intel.com>
Co-authored-by: Kate Generalova <kate.generalova@intel.com>
This commit is contained in:
Dmitry Pigasin
2021-09-03 23:23:00 +03:00
committed by GitHub
parent 005e7da325
commit bb84d11313
3 changed files with 53 additions and 5 deletions

View File

@@ -80,7 +80,8 @@ Usage message:
usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT
[-o OUTPUT] [-r REFERENCE] [-d DEVICE] [-o OUTPUT] [-r REFERENCE] [-d DEVICE]
[-bs BATCH_SIZE] [-qb QUANTIZATION_BITS] [-bs BATCH_SIZE] [-qb QUANTIZATION_BITS]
[-wg EXPORT_GNA_MODEL] [-iname INPUT_LAYERS] [-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL] [-pc]
[-a {CORE,ATOM}] [-iname INPUT_LAYERS]
[-oname OUTPUT_LAYERS] [-oname OUTPUT_LAYERS]
optional arguments: optional arguments:
@@ -94,9 +95,10 @@ optional arguments:
Options: Options:
-h, --help Show this help message and exit. -h, --help Show this help message and exit.
-i INPUT, --input INPUT -i INPUT, --input INPUT
Required. Path to an input file (.ark or .npz). Required. Path to an input file (.ark or .npz).
-o OUTPUT, --output OUTPUT -o OUTPUT, --output OUTPUT
Optional. Output file name to save inference results (.ark or .npz). Optional. Output file name to save inference results
(.ark or .npz).
-r REFERENCE, --reference REFERENCE -r REFERENCE, --reference REFERENCE
Optional. Read reference score file and compare Optional. Read reference score file and compare
scores. scores.
@@ -113,9 +115,18 @@ Options:
-qb QUANTIZATION_BITS, --quantization_bits QUANTIZATION_BITS -qb QUANTIZATION_BITS, --quantization_bits QUANTIZATION_BITS
Optional. Weight bits for quantization: 8 or 16 Optional. Weight bits for quantization: 8 or 16
(default 16). (default 16).
-sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
Optional. The user-specified input scale factor for
quantization.
-wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL -wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
Optional. Write GNA model to file using path/filename Optional. Write GNA model to file using path/filename
provided. provided.
-pc, --performance_counter
Optional. Enables performance report (specify -a to
ensure arch accurate results).
-a {CORE,ATOM}, --arch {CORE,ATOM}
Optional. Specify architecture. CORE, ATOM with the
combination of -pc.
-iname INPUT_LAYERS, --input_layers INPUT_LAYERS -iname INPUT_LAYERS, --input_layers INPUT_LAYERS
Optional. Layer names for input blobs. The names are Optional. Layer names for input blobs. The names are
separated with ",". Allows to change the order of separated with ",". Allows to change the order of

View File

@@ -28,10 +28,16 @@ def parse_args() -> argparse.Namespace:
args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).') args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
args.add_argument('-qb', '--quantization_bits', default=16, type=int, args.add_argument('-qb', '--quantization_bits', default=16, type=int,
help='Optional. Weight bits for quantization: 8 or 16 (default 16).') help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
args.add_argument('-sf', '--scale_factor', type=float,
help='Optional. The user-specified input scale factor for quantization.')
args.add_argument('-wg', '--export_gna_model', type=str, args.add_argument('-wg', '--export_gna_model', type=str,
help='Optional. Write GNA model to file using path/filename provided.') help='Optional. Write GNA model to file using path/filename provided.')
args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS) args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, help=argparse.SUPPRESS) args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, help=argparse.SUPPRESS)
args.add_argument('-pc', '--performance_counter', action='store_true',
help='Optional. Enables performance report (specify -a to ensure arch accurate results).')
args.add_argument('-a', '--arch', default='CORE', type=str.upper, choices=['CORE', 'ATOM'],
help='Optional. Specify architecture. CORE, ATOM with the combination of -pc.')
args.add_argument('-iname', '--input_layers', type=str, args.add_argument('-iname', '--input_layers', type=str,
help='Optional. Layer names for input blobs. The names are separated with ",". ' help='Optional. Layer names for input blobs. The names are separated with ",". '
'Allows to change the order of input layers for -i flag. Example: Input1,Input2') 'Allows to change the order of input layers for -i flag. Example: Input1,Input2')

View File

@@ -14,6 +14,10 @@ from arg_parser import parse_args
from file_options import read_utterance_file, write_utterance_file from file_options import read_utterance_file, write_utterance_file
from openvino.inference_engine import ExecutableNetwork, IECore, IENetwork from openvino.inference_engine import ExecutableNetwork, IECore, IENetwork
# Operating Frequency for GNA HW devices for Core and Atom architecture
GNA_CORE_FREQUENCY = 400
GNA_ATOM_FREQUENCY = 200
def get_scale_factor(matrix: np.ndarray) -> float: def get_scale_factor(matrix: np.ndarray) -> float:
"""Get scale factor for quantization using utterance matrix""" """Get scale factor for quantization using utterance matrix"""
@@ -143,21 +147,26 @@ def main():
plugin_config['GNA_DEVICE_MODE'] = gna_device_mode plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}' plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'
# Get a GNA scale factor # Set a GNA scale factor
if args.import_gna_model: if args.import_gna_model:
log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}') log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
elif args.scale_factor:
log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.')
plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor)
else: else:
utterances = read_utterance_file(args.input.split(',')[0]) utterances = read_utterance_file(args.input.split(',')[0])
key = sorted(utterances)[0] key = sorted(utterances)[0]
scale_factor = get_scale_factor(utterances[key]) scale_factor = get_scale_factor(utterances[key])
log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.') log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')
plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor) plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)
if args.export_embedded_gna_model: if args.export_embedded_gna_model:
plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration
if args.performance_counter:
plugin_config['PERF_COUNT'] = 'YES'
device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0] device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]
log.info('Loading the model to the plugin') log.info('Loading the model to the plugin')
@@ -220,6 +229,7 @@ def main():
log.info('Starting inference in synchronous mode') log.info('Starting inference in synchronous mode')
results = {blob_name: {} for blob_name in output_blobs} results = {blob_name: {} for blob_name in output_blobs}
infer_times = [] infer_times = []
perf_counters = []
for key in sorted(input_data): for key in sorted(input_data):
start_infer_time = default_timer() start_infer_time = default_timer()
@@ -235,6 +245,7 @@ def main():
results[blob_name][key] = result[blob_name] results[blob_name][key] = result[blob_name]
infer_times.append(default_timer() - start_infer_time) infer_times.append(default_timer() - start_infer_time)
perf_counters.append(exec_net.requests[0].get_perf_counts())
# ---------------------------Step 8. Process output-------------------------------------------------------------------- # ---------------------------Step 8. Process output--------------------------------------------------------------------
for blob_name in output_blobs: for blob_name in output_blobs:
@@ -247,6 +258,26 @@ def main():
if args.reference: if args.reference:
compare_with_reference(results[blob_name][key], references[blob_name][key]) compare_with_reference(results[blob_name][key], references[blob_name][key])
if args.performance_counter:
if 'GNA' in args.device:
pc = perf_counters[i]
total_cycles = int(pc['1.1 Total scoring time in HW']['real_time'])
stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time'])
active_cycles = total_cycles - stall_cycles
frequency = 10**6
if args.arch == 'CORE':
frequency *= GNA_CORE_FREQUENCY
else:
frequency *= GNA_ATOM_FREQUENCY
total_inference_time = total_cycles / frequency
active_time = active_cycles / frequency
stall_time = stall_cycles / frequency
log.info('')
log.info('Performance Statistics of GNA Hardware')
log.info(f' Total Inference Time: {(total_inference_time * 1000):.4f} ms')
log.info(f' Active Time: {(active_time * 1000):.4f} ms')
log.info(f' Stall Time: {(stall_time * 1000):.4f} ms')
log.info('') log.info('')
log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms') log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms')