[IE Python Speech Sample] Add --scale_factor and --performance_counter options (#6663)

* Adds perf counters, and scale factor args * Adding defined choices for arch type for -a/--arch option * changing print to logger, frequencies are now global consts * change to log info formatting * Fix style issues * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/README.md * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/README.md * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py * doc: Update inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py Co-authored-by: Koyanagi, Ken <ken.koyanagi@intel.com> Co-authored-by: Kate Generalova <kate.generalova@intel.com>
2021-09-03 23:23:00 +03:00
parent 005e7da325
commit bb84d11313
3 changed files with 53 additions and 5 deletions
--- a/inference-engine/ie_bridges/python/sample/speech_sample/README.md
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/README.md
@@ -80,7 +80,8 @@ Usage message:
 usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT       
                        [-o OUTPUT] [-r REFERENCE] [-d DEVICE]
                        [-bs BATCH_SIZE] [-qb QUANTIZATION_BITS]
-                        [-wg EXPORT_GNA_MODEL] [-iname INPUT_LAYERS]
+                        [-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL] [-pc]       
+                        [-a {CORE,ATOM}] [-iname INPUT_LAYERS]
                        [-oname OUTPUT_LAYERS]

 optional arguments:
@@ -94,9 +95,10 @@ optional arguments:
 Options:
  -h, --help            Show this help message and exit.
  -i INPUT, --input INPUT
-                        Required. Path to an input file (.ark or .npz).
+                        Required. Path to an input file (.ark or .npz).       
  -o OUTPUT, --output OUTPUT
-                        Optional. Output file name to save inference results (.ark or .npz).
+                        Optional. Output file name to save inference results  
+                        (.ark or .npz).
  -r REFERENCE, --reference REFERENCE
                        Optional. Read reference score file and compare
                        scores.
@@ -113,9 +115,18 @@ Options:
  -qb QUANTIZATION_BITS, --quantization_bits QUANTIZATION_BITS
                        Optional. Weight bits for quantization: 8 or 16
                        (default 16).
+  -sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
+                        Optional. The user-specified input scale factor for
+                        quantization.
  -wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
                        Optional. Write GNA model to file using path/filename
                        provided.
+  -pc, --performance_counter
+                        Optional. Enables performance report (specify -a to
+                        ensure arch accurate results).
+  -a {CORE,ATOM}, --arch {CORE,ATOM}
+                        Optional. Specify architecture. CORE, ATOM with the
+                        combination of -pc.
  -iname INPUT_LAYERS, --input_layers INPUT_LAYERS
                        Optional. Layer names for input blobs. The names are
                        separated with ",". Allows to change the order of
--- a/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py
@@ -28,10 +28,16 @@ def parse_args() -> argparse.Namespace:
    args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
    args.add_argument('-qb', '--quantization_bits', default=16, type=int,
                      help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
+    args.add_argument('-sf', '--scale_factor', type=float,
+                      help='Optional. The user-specified input scale factor for quantization.')
    args.add_argument('-wg', '--export_gna_model', type=str,
                      help='Optional. Write GNA model to file using path/filename provided.')
    args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
    args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, help=argparse.SUPPRESS)
+    args.add_argument('-pc', '--performance_counter', action='store_true',
+                      help='Optional. Enables performance report (specify -a to ensure arch accurate results).')
+    args.add_argument('-a', '--arch', default='CORE', type=str.upper, choices=['CORE', 'ATOM'],
+                      help='Optional. Specify architecture. CORE, ATOM with the combination of -pc.')
    args.add_argument('-iname', '--input_layers', type=str,
                      help='Optional. Layer names for input blobs. The names are separated with ",". '
                      'Allows to change the order of input layers for -i flag. Example: Input1,Input2')
--- a/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py
@@ -14,6 +14,10 @@ from arg_parser import parse_args
 from file_options import read_utterance_file, write_utterance_file
 from openvino.inference_engine import ExecutableNetwork, IECore, IENetwork

+# Operating Frequency for GNA HW devices for Core and Atom architecture
+GNA_CORE_FREQUENCY = 400
+GNA_ATOM_FREQUENCY = 200
+

 def get_scale_factor(matrix: np.ndarray) -> float:
    """Get scale factor for quantization using utterance matrix"""
@@ -143,21 +147,26 @@ def main():
        plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
        plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'

-        # Get a GNA scale factor
+        # Set a GNA scale factor
        if args.import_gna_model:
            log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
+        elif args.scale_factor:
+            log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.')
+            plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor)
        else:
            utterances = read_utterance_file(args.input.split(',')[0])
            key = sorted(utterances)[0]
            scale_factor = get_scale_factor(utterances[key])
            log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')
-
            plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)

        if args.export_embedded_gna_model:
            plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
            plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration

+        if args.performance_counter:
+            plugin_config['PERF_COUNT'] = 'YES'
+
    device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]

    log.info('Loading the model to the plugin')
@@ -220,6 +229,7 @@ def main():
    log.info('Starting inference in synchronous mode')
    results = {blob_name: {} for blob_name in output_blobs}
    infer_times = []
+    perf_counters = []

    for key in sorted(input_data):
        start_infer_time = default_timer()
@@ -235,6 +245,7 @@ def main():
            results[blob_name][key] = result[blob_name]

        infer_times.append(default_timer() - start_infer_time)
+        perf_counters.append(exec_net.requests[0].get_perf_counts())

 # ---------------------------Step 8. Process output--------------------------------------------------------------------
    for blob_name in output_blobs:
@@ -247,6 +258,26 @@ def main():
            if args.reference:
                compare_with_reference(results[blob_name][key], references[blob_name][key])

+            if args.performance_counter:
+                if 'GNA' in args.device:
+                    pc = perf_counters[i]
+                    total_cycles = int(pc['1.1 Total scoring time in HW']['real_time'])
+                    stall_cycles = int(pc['1.2 Stall scoring time in HW']['real_time'])
+                    active_cycles = total_cycles - stall_cycles
+                    frequency = 10**6
+                    if args.arch == 'CORE':
+                        frequency *= GNA_CORE_FREQUENCY
+                    else:
+                        frequency *= GNA_ATOM_FREQUENCY
+                    total_inference_time = total_cycles / frequency
+                    active_time = active_cycles / frequency
+                    stall_time = stall_cycles / frequency
+                    log.info('')
+                    log.info('Performance Statistics of GNA Hardware')
+                    log.info(f'   Total Inference Time: {(total_inference_time * 1000):.4f} ms')
+                    log.info(f'   Active Time: {(active_time * 1000):.4f} ms')
+                    log.info(f'   Stall Time:  {(stall_time * 1000):.4f} ms')
+
            log.info('')

    log.info(f'Total sample time: {sum(infer_times) * 1000:.2f}ms')