[IE Python Speech Sample] Add --scale_factor and --performance_counter options (#6663)
* Adds perf counters, and scale factor args * Adding defined choices for arch type for -a/--arch option * changing print to logger, frequencies are now global consts * change to log info formatting * Fix style issues * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/README.md * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/README.md * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py Co-authored-by: Koyanagi, Ken <ken.koyanagi@intel.com> Co-authored-by: Kate Generalova <kate.generalova@intel.com>
This commit is contained in:
@@ -80,7 +80,8 @@ Usage message:
|
||||
usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT
|
||||
[-o OUTPUT] [-r REFERENCE] [-d DEVICE]
|
||||
[-bs BATCH_SIZE] [-qb QUANTIZATION_BITS]
|
||||
[-wg EXPORT_GNA_MODEL] [-iname INPUT_LAYERS]
|
||||
[-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL] [-pc]
|
||||
[-a {CORE,ATOM}] [-iname INPUT_LAYERS]
|
||||
[-oname OUTPUT_LAYERS]
|
||||
|
||||
optional arguments:
|
||||
@@ -94,9 +95,10 @@ optional arguments:
|
||||
Options:
|
||||
-h, --help Show this help message and exit.
|
||||
-i INPUT, --input INPUT
|
||||
Required. Path to an input file (.ark or .npz).
|
||||
Required. Path to an input file (.ark or .npz).
|
||||
-o OUTPUT, --output OUTPUT
|
||||
Optional. Output file name to save inference results (.ark or .npz).
|
||||
Optional. Output file name to save inference results
|
||||
(.ark or .npz).
|
||||
-r REFERENCE, --reference REFERENCE
|
||||
Optional. Read reference score file and compare
|
||||
scores.
|
||||
@@ -113,9 +115,18 @@ Options:
|
||||
-qb QUANTIZATION_BITS, --quantization_bits QUANTIZATION_BITS
|
||||
Optional. Weight bits for quantization: 8 or 16
|
||||
(default 16).
|
||||
-sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
|
||||
Optional. The user-specified input scale factor for
|
||||
quantization.
|
||||
-wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
|
||||
Optional. Write GNA model to file using path/filename
|
||||
provided.
|
||||
-pc, --performance_counter
|
||||
Optional. Enables performance report (specify -a to
|
||||
ensure arch accurate results).
|
||||
-a {CORE,ATOM}, --arch {CORE,ATOM}
|
||||
Optional. Specify architecture. CORE, ATOM with the
|
||||
combination of -pc.
|
||||
-iname INPUT_LAYERS, --input_layers INPUT_LAYERS
|
||||
Optional. Layer names for input blobs. The names are
|
||||
separated with ",". Allows to change the order of
|
||||
|
||||
@@ -28,10 +28,16 @@ def parse_args() -> argparse.Namespace:
|
||||
args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
|
||||
args.add_argument('-qb', '--quantization_bits', default=16, type=int,
|
||||
help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
|
||||
args.add_argument('-sf', '--scale_factor', type=float,
|
||||
help='Optional. The user-specified input scale factor for quantization.')
|
||||
args.add_argument('-wg', '--export_gna_model', type=str,
|
||||
help='Optional. Write GNA model to file using path/filename provided.')
|
||||
args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
|
||||
args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, help=argparse.SUPPRESS)
|
||||
args.add_argument('-pc', '--performance_counter', action='store_true',
|
||||
help='Optional. Enables performance report (specify -a to ensure arch accurate results).')
|
||||
args.add_argument('-a', '--arch', default='CORE', type=str.upper, choices=['CORE', 'ATOM'],
|
||||
help='Optional. Specify architecture. CORE, ATOM with the combination of -pc.')
|
||||
args.add_argument('-iname', '--input_layers', type=str,
|
||||
help='Optional. Layer names for input blobs. The names are separated with ",". '
|
||||
'Allows to change the order of input layers for -i flag. Example: Input1,Input2')
|
||||
|
||||
@@ -14,6 +14,10 @@ from arg_parser import parse_args
|
||||
from file_options import read_utterance_file, write_utterance_file
|
||||
from openvino.inference_engine import ExecutableNetwork, IECore, IENetwork
|
||||
|
||||
# Operating Frequency for GNA HW devices for Core and Atom architecture
|
||||
GNA_CORE_FREQUENCY = 400
|
||||
GNA_ATOM_FREQUENCY = 200
|
||||
|
||||
|
||||
def get_scale_factor(matrix: np.ndarray) -> float:
|
||||
"""Get scale factor for quantization using utterance matrix"""
|
||||
@@ -143,21 +147,26 @@ def main():
|
||||
plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
|
||||
plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'
|
||||
|
||||
# Get a GNA scale factor
|
||||
# Set a GNA scale factor
|
||||
if args.import_gna_model:
|
||||
log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
|
||||
elif args.scale_factor:
|
||||
log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.')
|
||||
plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor)
|
||||
else:
|
||||
utterances = read_utterance_file(args.input.split(',')[0])
|
||||
key = sorted(utterances)[0]
|
||||
scale_factor = get_scale_factor(utterances[key])
|
||||
log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')
|
||||
|
||||
plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)
|
||||
|
||||
if args.export_embedded_gna_model:
|
||||
plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
|
||||
plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration
|
||||
|
||||
if args.performance_counter:
|
||||
plugin_config['PERF_COUNT'] = 'YES'
|
||||
|
||||
device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]
|
||||
|
||||
log.info('Loading the model to the plugin')
|
||||
@@ -220,6 +229,7 @@ def main():
|
||||
log.info('Starting inference in synchronous mode')
|
||||
results = {blob_name: {} for blob_name in output_blobs}
|
||||
infer_times = []
|
||||
perf_counters = []
|
||||
|
||||
for key in sorted(input_data):
|
||||
start_infer_time = default_timer()
|
||||
@@ -235,6 +245,7 @@ def main():
|
||||
results[blob_name][key] = result[blob_name]
|
||||
|
||||
infer_times.append(default_timer() - start_infer_time)
|
||||
perf_counters.append(exec_net.requests[0].get_perf_counts())
|
||||
|
||||
# ---------------------------Step 8. Process output--------------------------------------------------------------------
|
||||
for blob_name in output_blobs:
|
||||
@@ -247,6 +258,26 @@ def main():
|
||||
if args.reference:
|
||||
compare_with_reference(results[blob_name][key], references[blob_name][key])
|
||||
|
||||
if args.performance_counter:
|
||||
if 'GNA' in args.device:
|
||||
pc = perf_counters[i]
|
||||
total_cycles = int(pc['1.1 Total scoring time in HW']['real_time'])
|
||||
stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time'])
|
||||
active_cycles = total_cycles - stall_cycles
|
||||
frequency = 10**6
|
||||
if args.arch == 'CORE':
|
||||
frequency *= GNA_CORE_FREQUENCY
|
||||
else:
|
||||
frequency *= GNA_ATOM_FREQUENCY
|
||||
total_inference_time = total_cycles / frequency
|
||||
active_time = active_cycles / frequency
|
||||
stall_time = stall_cycles / frequency
|
||||
log.info('')
|
||||
log.info('Performance Statistics of GNA Hardware')
|
||||
log.info(f' Total Inference Time: {(total_inference_time * 1000):.4f} ms')
|
||||
log.info(f' Active Time: {(active_time * 1000):.4f} ms')
|
||||
log.info(f' Stall Time: {(stall_time * 1000):.4f} ms')
|
||||
|
||||
log.info('')
|
||||
|
||||
log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms')
|
||||
|
||||
Reference in New Issue
Block a user