[IE Python Speech Sample] Enable --scale_factor for multiple input files (#7482)

* Enable --scale_factor for multiple input files * Small refactoring of getting a first utterance * Fix mypy issue * Update readme Co-authored-by: Dmitry Pigasin <dmitry.pigasin@intel.com>
2021-09-17 11:59:49 +03:00
parent ac8db25864
commit a6bdb8744f
3 changed files with 62 additions and 24 deletions
--- a/inference-engine/ie_bridges/python/sample/speech_sample/README.md
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/README.md
@@ -89,15 +89,15 @@ optional arguments:
                        Path to an .xml file with a trained model (required if
                        -rg is missing).
  -rg IMPORT_GNA_MODEL, --import_gna_model IMPORT_GNA_MODEL
-                        Read GNA model from file using path/filename provided 
+                        Read GNA model from file using path/filename provided
                        (required if -m is missing).

 Options:
  -h, --help            Show this help message and exit.
  -i INPUT, --input INPUT
-                        Required. Path to an input file (.ark or .npz).       
+                        Required. Path to an input file (.ark or .npz).
  -o OUTPUT, --output OUTPUT
-                        Optional. Output file name to save inference results  
+                        Optional. Output file name to save inference results
                        (.ark or .npz).
  -r REFERENCE, --reference REFERENCE
                        Optional. Read reference score file and compare
@@ -117,7 +117,8 @@ Options:
                        (default 16).
  -sf SCALE_FACTOR, --scale_factor SCALE_FACTOR
                        Optional. The user-specified input scale factor for
-                        quantization.
+                        quantization. If the network contains multiple inputs,
+                        provide scale factors by separating them with commas.
  -wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL
                        Optional. Write GNA model to file using path/filename
                        provided.
@@ -176,27 +177,30 @@ The sample application logs each step in a standard output stream.
 [ INFO ] Creating Inference Engine
 [ INFO ] Reading the network: wsj_dnn5b.xml
 [ INFO ] Configuring input and output blobs
-[ INFO ] Using scale factor of 2175.4322417 calculated from first utterance.
+[ INFO ] Using scale factor(s) calculated from first utterance
+[ INFO ] For input 0 using scale factor of 2175.4322418
 [ INFO ] Loading the model to the plugin
 [ INFO ] Starting inference in synchronous mode
 [ INFO ] Utterance 0 (4k0c0301)
+[ INFO ] Output blob name: affinetransform14/Fused_Add_
 [ INFO ] Frames in utterance: 1294
-[ INFO ] Total time in Infer (HW and SW): 5305.47ms
-[ INFO ] max error: 0.7051839
-[ INFO ] avg error: 0.0448387
-[ INFO ] avg rms error: 0.0582387        
-[ INFO ] stdev error: 0.0371649
+[ INFO ] Total time in Infer (HW and SW): 6211.45ms
+[ INFO ] max error: 0.7051840
+[ INFO ] avg error: 0.0448388
+[ INFO ] avg rms error: 0.0582387
+[ INFO ] stdev error: 0.0371650
 [ INFO ]
 [ INFO ] Utterance 1 (4k0c0302)
+[ INFO ] Output blob name: affinetransform14/Fused_Add_
 [ INFO ] Frames in utterance: 1005
-[ INFO ] Total time in Infer (HW and SW): 5031.53ms
+[ INFO ] Total time in Infer (HW and SW): 4742.27ms
 [ INFO ] max error: 0.7575974
 [ INFO ] avg error: 0.0452166
 [ INFO ] avg rms error: 0.0586013
 [ INFO ] stdev error: 0.0372769
-[ INFO ]
 ...
-[ INFO ] Total sample time: 38033.09ms
+[ INFO ] Total sample time: 40219.99ms
+[ INFO ] File result.npz was created!
 [ INFO ] This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool
 ```

--- a/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/arg_parser.py
@@ -28,8 +28,9 @@ def parse_args() -> argparse.Namespace:
    args.add_argument('-bs', '--batch_size', default=1, type=int, help='Optional. Batch size 1-8 (default 1).')
    args.add_argument('-qb', '--quantization_bits', default=16, type=int,
                      help='Optional. Weight bits for quantization: 8 or 16 (default 16).')
-    args.add_argument('-sf', '--scale_factor', type=float,
-                      help='Optional. The user-specified input scale factor for quantization.')
+    args.add_argument('-sf', '--scale_factor', type=str,
+                      help='Optional. The user-specified input scale factor for quantization. '
+                      'If the network contains multiple inputs, provide scale factors by separating them with commas.')
    args.add_argument('-wg', '--export_gna_model', type=str,
                      help='Optional. Write GNA model to file using path/filename provided.')
    args.add_argument('-we', '--export_embedded_gna_model', type=str, help=argparse.SUPPRESS)
--- a/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py
+++ b/inference-engine/ie_bridges/python/sample/speech_sample/speech_sample.py
@@ -103,6 +103,32 @@ def get_output_layer_list(net: Union[IENetwork, ExecutableNetwork],
        return [list(net.outputs.keys())[-1]]


+def parse_scale_factors(args: argparse.Namespace) -> list:
+    """Get a list of scale factors for input files"""
+    input_files = re.split(', |,', args.input)
+    scale_factors = re.split(', |,', str(args.scale_factor))
+    scale_factors = list(map(float, scale_factors))
+
+    if len(input_files) != len(scale_factors):
+        log.error(f'Incorrect command line for multiple inputs: {len(scale_factors)} scale factors provided for '
+                  f'{len(input_files)} input files.')
+        sys.exit(-7)
+
+    for i, scale_factor in enumerate(scale_factors):
+        if float(scale_factor) < 0:
+            log.error(f'Scale factor for input #{i} (counting from zero) is out of range (must be positive).')
+            sys.exit(-8)
+
+    return scale_factors
+
+
+def set_scale_factors(plugin_config: dict, scale_factors: list):
+    """Set a scale factor provided for each input"""
+    for i, scale_factor in enumerate(scale_factors):
+        log.info(f'For input {i} using scale factor of {scale_factor:.7f}')
+        plugin_config[f'GNA_SCALE_FACTOR_{i}'] = str(scale_factor)
+
+
 def main():
    log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout)
    args = parse_args()
@@ -149,16 +175,23 @@ def main():

        # Set a GNA scale factor
        if args.import_gna_model:
-            log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
-        elif args.scale_factor:
-            log.info(f'Using scale factor of {args.scale_factor:.7f} specified by user.')
-            plugin_config['GNA_SCALE_FACTOR'] = str(args.scale_factor)
+            if args.scale_factor:
+                log.warning(f'Custom scale factor will be used for imported GNA model: {args.import_gna_model}')
+                set_scale_factors(plugin_config, parse_scale_factors(args))
+            else:
+                log.info(f'Using scale factor from the imported GNA model: {args.import_gna_model}')
        else:
-            utterances = read_utterance_file(args.input.split(',')[0])
-            key = sorted(utterances)[0]
-            scale_factor = get_scale_factor(utterances[key])
-            log.info(f'Using scale factor of {scale_factor:.7f} calculated from first utterance.')
-            plugin_config['GNA_SCALE_FACTOR'] = str(scale_factor)
+            if args.scale_factor:
+                set_scale_factors(plugin_config, parse_scale_factors(args))
+            else:
+                scale_factors = []
+
+                for file_name in re.split(', |,', args.input):
+                    first_utterance = next(iter(read_utterance_file(file_name).values()))
+                    scale_factors.append(get_scale_factor(first_utterance))
+
+                log.info('Using scale factor(s) calculated from first utterance')
+                set_scale_factors(plugin_config, scale_factors)

        if args.export_embedded_gna_model:
            plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model