From 3144c5fab8e7d811da4f0c5a049111ef0c31d645 Mon Sep 17 00:00:00 2001
From: Maxim Gordeev <maxim.gordeev@intel.com>
Date: Thu, 17 Feb 2022 16:11:57 +0300
Subject: [PATCH] Added processing of layout for speech sample (#10254)

* Added processing of layout for speech sample

* fixed notes

* some improvements

* Code style format

* changed NCC value for NullStatement

* improved batch processing

* added loading batch for imported model

* fixed notes

* fixed notes

* added layout parameter to azure tests
---
 .../ncc_naming_style/openvino.style           |  2 +-
 samples/cpp/speech_sample/README.md           |  3 +-
 samples/cpp/speech_sample/main.cpp            | 54 ++++++++++++++++---
 samples/cpp/speech_sample/speech_sample.hpp   | 15 ++++--
 samples/cpp/speech_sample/utils.hpp           | 40 ++++++++++++++
 .../smoke_tests/test_speech_sample.py         |  1 +
 6 files changed, 103 insertions(+), 12 deletions(-)
diff --git a/cmake/developer_package/ncc_naming_style/openvino.style b/cmake/developer_package/ncc_naming_style/openvino.style
index d858df6b084..acb0a199e5d 100644
--- a/cmake/developer_package/ncc_naming_style/openvino.style
+++ b/cmake/developer_package/ncc_naming_style/openvino.style
@@ -99,7 +99,7 @@ CxxCatchStatement: '^.*$'
 CxxTryStatement: '^.*$'
 CxxForRangeStatement: '^.*$'
 MsAsmStatement: 'XXXX'
-NullStatement: 'XXXX'
+NullStatement: '^.*$'
 DeclarationStatement: '^.*$'
 TranslationUnit: 'XXXX'
 UnexposedAttribute: '^.*$'
diff --git a/samples/cpp/speech_sample/README.md b/samples/cpp/speech_sample/README.md
index 716b90d3b96..74ce696a106 100644
--- a/samples/cpp/speech_sample/README.md
+++ b/samples/cpp/speech_sample/README.md
@@ -107,7 +107,8 @@ Options:
     -q "<mode>"                Optional. Input quantization mode:  static (default), dynamic, or user (use with -sf).
     -qb "<integer>"            Optional. Weight bits for quantization: 8 or 16 (default)
     -sf "<double>"             Optional. User-specified input scale factor for quantization (use with -q user). If the network contains multiple inputs, provide scale factors by separating them with commas.
-    -bs "<integer>"            Optional. Batch size 1-8 (default 1)
+    -bs "<integer>"            Optional. Batch size 1-8
+    -layout "<string>"         Optional. Prompts how network layouts should be treated by application.For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.
     -r "<path>"                Optional. Read reference score file and compare scores. Example of usage: <reference.ark> or <reference.npz>
     -rg "<path>"               Read GNA model from file using path/filename provided (required if -m is missing).
     -wg "<path>"               Optional. Write GNA model to file using path/filename provided.
diff --git a/samples/cpp/speech_sample/main.cpp b/samples/cpp/speech_sample/main.cpp
index 0389a412cae..dc5fad5c4b6 100644
--- a/samples/cpp/speech_sample/main.cpp
+++ b/samples/cpp/speech_sample/main.cpp
@@ -83,7 +83,7 @@ int main(int argc, char* argv[]) {
         // -------------------------------------
         ov::Core core;
         slog::info << "Loading model files:" << slog::endl << FLAGS_m << slog::endl;
-        uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : (uint32_t)FLAGS_bs;
+        uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0 || !FLAGS_bs) ? 1 : (uint32_t)FLAGS_bs;
         std::shared_ptr<ov::Model> model;
         std::vector<std::string> outputs;
         std::vector<size_t> ports;
@@ -115,16 +115,38 @@ int main(int argc, char* argv[]) {
                 }
             }
             check_number_of_inputs(model->inputs().size(), numInputFiles);
-            const ov::Layout tensor_layout{"NC"};
             ov::preprocess::PrePostProcessor proc(model);
-            for (int i = 0; i < model->inputs().size(); i++) {
-                proc.input(i).tensor().set_element_type(ov::element::f32).set_layout(tensor_layout);
+            const auto& inputs = model->inputs();
+            std::map<std::string, std::string> custom_layouts;
+            if (!FLAGS_layout.empty()) {
+                custom_layouts = parse_input_layouts(FLAGS_layout, inputs);
+            }
+            for (const auto& input : inputs) {
+                const auto& item_name = input.get_any_name();
+                auto& in = proc.input(item_name);
+                in.tensor().set_element_type(ov::element::f32);
+                // Explicitly set inputs layout
+                if (custom_layouts.count(item_name) > 0) {
+                    in.model().set_layout(ov::Layout(custom_layouts.at(item_name)));
+                }
             }
             for (int i = 0; i < model->outputs().size(); i++) {
                 proc.output(i).tensor().set_element_type(ov::element::f32);
             }
             model = proc.build();
-            ov::set_batch(model, batchSize);
+            if (FLAGS_bs) {
+                if (FLAGS_layout.empty() &&
+                    std::any_of(inputs.begin(), inputs.end(), [](const ov::Output<ov::Node>& i) {
+                        return ov::layout::get_layout(i).empty();
+                    })) {
+                    throw std::logic_error(
+                        "-bs option is set to " + std::to_string(FLAGS_bs) +
+                        " but model does not contain layout information for any input. Please "
+                        "specify it explicitly using -layout option. For example, input1[NCHW], input2[NC] or [NC]");
+                } else {
+                    ov::set_batch(model, batchSize);
+                }
+            }
         }
         // ------------------------------ Get Available Devices ------------------------------------------------------
         auto isFeature = [&](const std::string xFeature) {
@@ -235,6 +257,22 @@ int main(int argc, char* argv[]) {
                 throw std::runtime_error("Cannot open model file " + FLAGS_rg);
             }
             executableNet = core.import_model(streamrq, deviceStr, genericPluginConfig);
+            // loading batch from exported model
+            const auto& imported_inputs = executableNet.inputs();
+            if (std::any_of(imported_inputs.begin(), imported_inputs.end(), [](const ov::Output<const ov::Node>& i) {
+                    return ov::layout::get_layout(i).empty();
+                })) {
+                slog::warn << "No batch dimension was found at any input, assuming batch to be 1." << slog::endl;
+                batchSize = 1;
+            } else {
+                for (auto& info : imported_inputs) {
+                    auto imported_layout = ov::layout::get_layout(info);
+                    if (ov::layout::has_batch(imported_layout)) {
+                        batchSize = (uint32_t)info.get_shape()[ov::layout::batch_idx(imported_layout)];
+                        break;
+                    }
+                }
+            }
         }
         // --------------------------- Exporting gna model using InferenceEngine AOT API---------------------
         if (!FLAGS_wg.empty()) {
@@ -251,7 +289,8 @@ int main(int argc, char* argv[]) {
             return 0;
         }
         // ---------------------------------------------------------------------------------------------------------
-        // --------------------------- Step 3. Create infer request --------------------------------------------------
+        // --------------------------- Step 3. Create infer request
+        // --------------------------------------------------
         std::vector<InferRequestStruct> inferRequests(1);
 
         for (auto& inferRequest : inferRequests) {
@@ -433,7 +472,8 @@ int main(int argc, char* argv[]) {
                                         outputBlob =
                                             inferRequest.inferRequest.get_tensor(executableNet.output(FLAGS_oname));
                                     }
-                                    // locked memory holder should be alive all time while access to its buffer happens
+                                    // locked memory holder should be alive all time while access to its buffer
+                                    // happens
                                     auto byteSize = numScoresPerFrame * sizeof(float);
                                     std::memcpy(outputFrame, outputBlob.data<float>(), byteSize);
                                 }
diff --git a/samples/cpp/speech_sample/speech_sample.hpp b/samples/cpp/speech_sample/speech_sample.hpp
index 90322c89d68..f398d709cb3 100644
--- a/samples/cpp/speech_sample/speech_sample.hpp
+++ b/samples/cpp/speech_sample/speech_sample.hpp
@@ -121,6 +121,11 @@ static const char output_layer_names_message[] = "Optional. Layer names for outp
 static const char input_layer_names_message[] = "Optional. Layer names for input blobs. "
                                                 "The names are separated with \",\" "
                                                 "Example: Input1,Input2 ";
+/// @brief message for inputs layer names
+static const char layout_message[] =
+    "Optional. Prompts how network layouts should be treated by application. "
+    "For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
+;
 
 /// @brief message for PWL max error percent
 static const char pwl_max_error_percent_message[] = "Optional. The maximum percent of error for PWL function."
@@ -176,8 +181,8 @@ DEFINE_int32(qb, 16, quantization_bits_message);
 /// @brief Scale factor for quantization
 DEFINE_string(sf, "", scale_factor_message);
 
-/// @brief Batch size (default 1)
-DEFINE_int32(bs, 1, batch_size_message);
+/// @brief Batch size (default 0)
+DEFINE_int32(bs, 0, batch_size_message);
 
 /// @brief Number of threads to use for inference on the CPU (also affects Hetero cases)
 DEFINE_int32(nthreads, 1, infer_num_threads_message);
@@ -194,6 +199,9 @@ DEFINE_string(oname, "", output_layer_names_message);
 /// @brief Input layer name
 DEFINE_string(iname, "", input_layer_names_message);
 
+/// @brief Input layer name
+DEFINE_string(layout, "", layout_message);
+
 /// @brief PWL max error percent
 DEFINE_double(pwl_me, 1.0, pwl_max_error_percent_message);
 
@@ -223,6 +231,7 @@ static void show_usage() {
     std::cout << "    -cw_r \"<integer>\"          " << context_window_message_r << std::endl;
     std::cout << "    -oname \"<string>\"          " << output_layer_names_message << std::endl;
     std::cout << "    -iname \"<string>\"          " << input_layer_names_message << std::endl;
+    std::cout << "    -layout \"<string>\"         " << layout_message << std::endl;
     std::cout << "    -pwl_me \"<double>\"         " << pwl_max_error_percent_message << std::endl;
     std::cout << "    -exec_target \"<string>\"    " << execution_target_message << std::endl;
     std::cout << "    -compile_target \"<string>\" " << compile_target_message << std::endl;
@@ -282,7 +291,7 @@ bool parse_and_check_command_line(int argc, char* argv[]) {
     }
 
     uint32_t batchSize = (uint32_t)FLAGS_bs;
-    if ((batchSize < 1) || (batchSize > 8)) {
+    if (batchSize && ((batchSize < 1) || (batchSize > 8))) {
         throw std::logic_error("Batch size out of range (1..8).");
     }
 
diff --git a/samples/cpp/speech_sample/utils.hpp b/samples/cpp/speech_sample/utils.hpp
index f49f2827dff..a07011e15a6 100644
--- a/samples/cpp/speech_sample/utils.hpp
+++ b/samples/cpp/speech_sample/utils.hpp
@@ -488,3 +488,43 @@ std::vector<std::string> convert_str_to_vector(std::string str) {
     }
     return blobName;
 }
+
+/**
+ * @brief Parse layout string like "input0[value0],input1[value1]" or "[value]" (applied to all inputs)
+ * @param layout_string input names with layout values
+ * @param input_info reference to vector of inputs
+ * @return map of inputs with layout values
+ */
+std::map<std::string, std::string> parse_input_layouts(const std::string& layout_string,
+                                                       const std::vector<ov::Output<ov::Node>>& input_info) {
+    // Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all
+    // inputs)
+    std::map<std::string, std::string> return_value;
+    std::string search_string = layout_string;
+    auto start_pos = search_string.find_first_of('[');
+    auto input_name = search_string.substr(0, start_pos);
+    while (start_pos != std::string::npos) {
+        auto end_pos = search_string.find_first_of(']');
+        if (end_pos == std::string::npos)
+            break;
+        if (start_pos)
+            input_name = search_string.substr(0, start_pos);
+        auto input_value = search_string.substr(start_pos + 1, end_pos - start_pos - 1);
+        if (!input_name.empty()) {
+            return_value[input_name] = input_value;
+        } else {
+            for (auto& item : input_info) {
+                return_value[item.get_any_name()] = input_value;
+            }
+        }
+        search_string = search_string.substr(end_pos + 1);
+        if (search_string.empty() || (search_string.front() != ',' && search_string.front() != '['))
+            break;
+        if (search_string.front() == ',')
+            search_string = search_string.substr(1);
+        start_pos = search_string.find_first_of('[');
+    }
+    if (!search_string.empty())
+        throw std::logic_error("Can't parse input parameter string: " + layout_string);
+    return return_value;
+}
diff --git a/tests/samples_tests/smoke_tests/test_speech_sample.py b/tests/samples_tests/smoke_tests/test_speech_sample.py
index 840e1541c72..0ee5095d719 100644
--- a/tests/samples_tests/smoke_tests/test_speech_sample.py
+++ b/tests/samples_tests/smoke_tests/test_speech_sample.py
@@ -22,6 +22,7 @@ log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=s
 
 test_data = get_tests(cmd_params={'i': [os.path.join('ark', 'dev93_10.ark')],
                                            'm': [os.path.join('wsj', 'FP32', 'wsj_dnn5b.xml')],
+                                           'layout': ["[NC]"],
                                            'bs': [1, 2],
                                            'o': ['res_output.ark'],
                                            'r': [os.path.join('ark', 'dev93_scores_10.ark')],