[IE Python Speech Sample] Add --scale_factor and --performance_counter options (#6663)
* Adds perf counters, and scale factor args * Adding defined choices for arch type for -a/--arch option * changing print to logger, frequencies are now global consts * change to log info formatting * Fix style issues * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/README.md * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/README.md * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py Co-authored-by: Koyanagi, Ken <ken.koyanagi@intel.com> Co-authored-by: Kate Generalova <kate.generalova@intel.com>
This commit is contained in:
@@ -80,7 +80,8 @@ Usage message:
|
|||||||
usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT
|
usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT
|
||||||
[-o OUTPUT] [-r REFERENCE] [-d DEVICE]
|
[-o OUTPUT] [-r REFERENCE] [-d DEVICE]
|
||||||
[-bs BATCH_SIZE] [-qb QUANTIZATION_BITS]
|
[-bs BATCH_SIZE] [-qb QUANTIZATION_BITS]
|
||||||
[-wg EXPORT_GNA_MODEL] [-iname INPUT_LAYERS]
|
[-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL] [-pc]
|
||||||
|
[-a {CORE,ATOM}] [-iname INPUT_LAYERS]
|
||||||
[-oname OUTPUT_LAYERS]
|
[-oname OUTPUT_LAYERS]
|
||||||
|
|
||||||
optional arguments:
|
optional arguments:
|
||||||
@@ -94,9 +95,10 @@ optional arguments:
|
|||||||
Options:
|
Options:
|
||||||
-h, --help Show this help message and exit.
|
-h, --help Show this help message and exit.
|
||||||
-i INPUT, --input INPUT
|
-i INPUT, --input INPUT
|
||||||
Required. Path to an input file (.ark or .npz).
|
Required. Path to an input file (.ark or .npz).
|
||||||
-o OUTPUT, --output OUTPUT
|
-o OUTPUT, --output OUTPUT
|
||||||
Optional. Output file name to save inference results (.ark or .npz).
|
Optional. Output file name to save inference results
|
||||||
|
(.ark or .npz).
|
||||||
-r REFERENCE, --reference REFERENCE
|
-r REFERENCE, --reference REFERENCE
|
||||||
Optional. Read reference score file and compare
|
Optional. Read reference score file and compare
|
||||||
scores.
|
scores.
|
||||||
@@ -113,9 +115,18 @@ Options:
|
|||||||
-qb QUANTIZATION_BITS, --quantization_bits QUANTIZATION_BITS
|
-qb QUANTIZATION_BITS, --quantization_bits QUANTIZATION_BITS
|
||||||
Optional. Weight bits for quantization: 8 or 16
|
Optional. Weight bits for quantization: 8 or 16
|
||||||
(default 16).
|
(default 16).
|
||||||
|
-sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
|
||||||
|
Optional. The user-specified input scale factor for
|
||||||
|
quantization.
|
||||||
-wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
|
-wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
|
||||||
Optional. Write GNA model to file using path/filename
|
Optional. Write GNA model to file using path/filename
|
||||||
provided.
|
provided.
|
||||||
|
-pc, --performance_counter
|
||||||
|
Optional. Enables performance report (specify -a to
|
||||||
|
ensure arch accurate results).
|
||||||
|
-a {CORE,ATOM}, --arch {CORE,ATOM}
|
||||||
|
Optional. Specify architecture. CORE, ATOM with the
|
||||||
|
combination of -pc.
|
||||||
-iname INPUT_LAYERS, --input_layers INPUT_LAYERS
|
-iname INPUT_LAYERS, --input_layers INPUT_LAYERS
|
||||||
Optional. Layer names for input blobs. The names are
|
Optional. Layer names for input blobs. The names are
|
||||||
separated with ",". Allows to change the order of
|
separated with ",". Allows to change the order of
|
||||||
|
|||||||
@@ -28,10 +28,16 @@ def parse_args() -> argparse.Namespace:
|
|||||||
args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
|
args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
|
||||||
args.add_argument('-qb', '--quantization_bits', default=16, type=int,
|
args.add_argument('-qb', '--quantization_bits', default=16, type=int,
|
||||||
help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
|
help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
|
||||||
|
args.add_argument('-sf', '--scale_factor', type=float,
|
||||||
|
help='Optional. The user-specified input scale factor for quantization.')
|
||||||
args.add_argument('-wg', '--export_gna_model', type=str,
|
args.add_argument('-wg', '--export_gna_model', type=str,
|
||||||
help='Optional. Write GNA model to file using path/filename provided.')
|
help='Optional. Write GNA model to file using path/filename provided.')
|
||||||
args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
|
args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
|
||||||
args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, help=argparse.SUPPRESS)
|
args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, help=argparse.SUPPRESS)
|
||||||
|
args.add_argument('-pc', '--performance_counter', action='store_true',
|
||||||
|
help='Optional. Enables performance report (specify -a to ensure arch accurate results).')
|
||||||
|
args.add_argument('-a', '--arch', default='CORE', type=str.upper, choices=['CORE', 'ATOM'],
|
||||||
|
help='Optional. Specify architecture. CORE, ATOM with the combination of -pc.')
|
||||||
args.add_argument('-iname', '--input_layers', type=str,
|
args.add_argument('-iname', '--input_layers', type=str,
|
||||||
help='Optional. Layer names for input blobs. The names are separated with ",". '
|
help='Optional. Layer names for input blobs. The names are separated with ",". '
|
||||||
'Allows to change the order of input layers for -i flag. Example: Input1,Input2')
|
'Allows to change the order of input layers for -i flag. Example: Input1,Input2')
|
||||||
|
|||||||
@@ -14,6 +14,10 @@ from arg_parser import parse_args
|
|||||||
from file_options import read_utterance_file, write_utterance_file
|
from file_options import read_utterance_file, write_utterance_file
|
||||||
from openvino.inference_engine import ExecutableNetwork, IECore, IENetwork
|
from openvino.inference_engine import ExecutableNetwork, IECore, IENetwork
|
||||||
|
|
||||||
|
# Operating Frequency for GNA HW devices for Core and Atom architecture
|
||||||
|
GNA_CORE_FREQUENCY = 400
|
||||||
|
GNA_ATOM_FREQUENCY = 200
|
||||||
|
|
||||||
|
|
||||||
def get_scale_factor(matrix: np.ndarray) -> float:
|
def get_scale_factor(matrix: np.ndarray) -> float:
|
||||||
"""Get scale factor for quantization using utterance matrix"""
|
"""Get scale factor for quantization using utterance matrix"""
|
||||||
@@ -143,21 +147,26 @@ def main():
|
|||||||
plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
|
plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
|
||||||
plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'
|
plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'
|
||||||
|
|
||||||
# Get a GNA scale factor
|
# Set a GNA scale factor
|
||||||
if args.import_gna_model:
|
if args.import_gna_model:
|
||||||
log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
|
log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
|
||||||
|
elif args.scale_factor:
|
||||||
|
log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.')
|
||||||
|
plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor)
|
||||||
else:
|
else:
|
||||||
utterances = read_utterance_file(args.input.split(',')[0])
|
utterances = read_utterance_file(args.input.split(',')[0])
|
||||||
key = sorted(utterances)[0]
|
key = sorted(utterances)[0]
|
||||||
scale_factor = get_scale_factor(utterances[key])
|
scale_factor = get_scale_factor(utterances[key])
|
||||||
log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')
|
log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')
|
||||||
|
|
||||||
plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)
|
plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)
|
||||||
|
|
||||||
if args.export_embedded_gna_model:
|
if args.export_embedded_gna_model:
|
||||||
plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
|
plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
|
||||||
plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration
|
plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration
|
||||||
|
|
||||||
|
if args.performance_counter:
|
||||||
|
plugin_config['PERF_COUNT'] = 'YES'
|
||||||
|
|
||||||
device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]
|
device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]
|
||||||
|
|
||||||
log.info('Loading the model to the plugin')
|
log.info('Loading the model to the plugin')
|
||||||
@@ -220,6 +229,7 @@ def main():
|
|||||||
log.info('Starting inference in synchronous mode')
|
log.info('Starting inference in synchronous mode')
|
||||||
results = {blob_name: {} for blob_name in output_blobs}
|
results = {blob_name: {} for blob_name in output_blobs}
|
||||||
infer_times = []
|
infer_times = []
|
||||||
|
perf_counters = []
|
||||||
|
|
||||||
for key in sorted(input_data):
|
for key in sorted(input_data):
|
||||||
start_infer_time = default_timer()
|
start_infer_time = default_timer()
|
||||||
@@ -235,6 +245,7 @@ def main():
|
|||||||
results[blob_name][key] = result[blob_name]
|
results[blob_name][key] = result[blob_name]
|
||||||
|
|
||||||
infer_times.append(default_timer() - start_infer_time)
|
infer_times.append(default_timer() - start_infer_time)
|
||||||
|
perf_counters.append(exec_net.requests[0].get_perf_counts())
|
||||||
|
|
||||||
# ---------------------------Step 8. Process output--------------------------------------------------------------------
|
# ---------------------------Step 8. Process output--------------------------------------------------------------------
|
||||||
for blob_name in output_blobs:
|
for blob_name in output_blobs:
|
||||||
@@ -247,6 +258,26 @@ def main():
|
|||||||
if args.reference:
|
if args.reference:
|
||||||
compare_with_reference(results[blob_name][key], references[blob_name][key])
|
compare_with_reference(results[blob_name][key], references[blob_name][key])
|
||||||
|
|
||||||
|
if args.performance_counter:
|
||||||
|
if 'GNA' in args.device:
|
||||||
|
pc = perf_counters[i]
|
||||||
|
total_cycles = int(pc['1.1 Total scoring time in HW']['real_time'])
|
||||||
|
stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time'])
|
||||||
|
active_cycles = total_cycles - stall_cycles
|
||||||
|
frequency = 10**6
|
||||||
|
if args.arch == 'CORE':
|
||||||
|
frequency *= GNA_CORE_FREQUENCY
|
||||||
|
else:
|
||||||
|
frequency *= GNA_ATOM_FREQUENCY
|
||||||
|
total_inference_time = total_cycles / frequency
|
||||||
|
active_time = active_cycles / frequency
|
||||||
|
stall_time = stall_cycles / frequency
|
||||||
|
log.info('')
|
||||||
|
log.info('Performance Statistics of GNA Hardware')
|
||||||
|
log.info(f' Total Inference Time: {(total_inference_time * 1000):.4f} ms')
|
||||||
|
log.info(f' Active Time: {(active_time * 1000):.4f} ms')
|
||||||
|
log.info(f' Stall Time: {(stall_time * 1000):.4f} ms')
|
||||||
|
|
||||||
log.info('')
|
log.info('')
|
||||||
|
|
||||||
log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms')
|
log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms')
|
||||||
|
|||||||
Reference in New Issue
Block a user