Added processing of layout for speech sample (#10254)

* Added processing of layout for speech sample * fixed notes * some improvements * Code style format * changed NCC value for NullStatement * improved batch processing * added loading batch for imported model * fixed notes * fixed notes * added layout parameter to azure tests
2022-02-17 16:11:57 +03:00
parent ccd7104108
commit 3144c5fab8
6 changed files with 103 additions and 12 deletions
--- a/samples/cpp/speech_sample/README.md
+++ b/samples/cpp/speech_sample/README.md
@@ -107,7 +107,8 @@ Options:
    -q "<mode>"                Optional. Input quantization mode:  static (default), dynamic, or user (use with -sf).
    -qb "<integer>"            Optional. Weight bits for quantization: 8 or 16 (default)
    -sf "<double>"             Optional. User-specified input scale factor for quantization (use with -q user). If the network contains multiple inputs, provide scale factors by separating them with commas.
-    -bs "<integer>"            Optional. Batch size 1-8 (default 1)
+    -bs "<integer>"            Optional. Batch size 1-8
+    -layout "<string>"         Optional. Prompts how network layouts should be treated by application.For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.
    -r "<path>"                Optional. Read reference score file and compare scores. Example of usage: <reference.ark> or <reference.npz>
    -rg "<path>"               Read GNA model from file using path/filename provided (required if -m is missing).
    -wg "<path>"               Optional. Write GNA model to file using path/filename provided.
--- a/samples/cpp/speech_sample/main.cpp
+++ b/samples/cpp/speech_sample/main.cpp
@@ -83,7 +83,7 @@ int main(int argc, char* argv[]) {
        // -------------------------------------
        ov::Core core;
        slog::info << "Loading model files:" << slog::endl << FLAGS_m << slog::endl;
-        uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : (uint32_t)FLAGS_bs;
+        uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0 || !FLAGS_bs) ? 1 : (uint32_t)FLAGS_bs;
        std::shared_ptr<ov::Model> model;
        std::vector<std::string> outputs;
        std::vector<size_t> ports;
@@ -115,16 +115,38 @@ int main(int argc, char* argv[]) {
                }
            }
            check_number_of_inputs(model->inputs().size(), numInputFiles);
-            const ov::Layout tensor_layout{"NC"};
            ov::preprocess::PrePostProcessor proc(model);
-            for (int i = 0; i < model->inputs().size(); i++) {
-                proc.input(i).tensor().set_element_type(ov::element::f32).set_layout(tensor_layout);
+            const auto& inputs = model->inputs();
+            std::map<std::string, std::string> custom_layouts;
+            if (!FLAGS_layout.empty()) {
+                custom_layouts = parse_input_layouts(FLAGS_layout, inputs);
+            }
+            for (const auto& input : inputs) {
+                const auto& item_name = input.get_any_name();
+                auto& in = proc.input(item_name);
+                in.tensor().set_element_type(ov::element::f32);
+                // Explicitly set inputs layout
+                if (custom_layouts.count(item_name) > 0) {
+                    in.model().set_layout(ov::Layout(custom_layouts.at(item_name)));
+                }
            }
            for (int i = 0; i < model->outputs().size(); i++) {
                proc.output(i).tensor().set_element_type(ov::element::f32);
            }
            model = proc.build();
-            ov::set_batch(model, batchSize);
+            if (FLAGS_bs) {
+                if (FLAGS_layout.empty() &&
+                    std::any_of(inputs.begin(), inputs.end(), [](const ov::Output<ov::Node>& i) {
+                        return ov::layout::get_layout(i).empty();
+                    })) {
+                    throw std::logic_error(
+                        "-bs option is set to " + std::to_string(FLAGS_bs) +
+                        " but model does not contain layout information for any input. Please "
+                        "specify it explicitly using -layout option. For example, input1[NCHW], input2[NC] or [NC]");
+                } else {
+                    ov::set_batch(model, batchSize);
+                }
+            }
        }
        // ------------------------------ Get Available Devices ------------------------------------------------------
        auto isFeature = [&](const std::string xFeature) {
@@ -235,6 +257,22 @@ int main(int argc, char* argv[]) {
                throw std::runtime_error("Cannot open model file " + FLAGS_rg);
            }
            executableNet = core.import_model(streamrq, deviceStr, genericPluginConfig);
+            // loading batch from exported model
+            const auto& imported_inputs = executableNet.inputs();
+            if (std::any_of(imported_inputs.begin(), imported_inputs.end(), [](const ov::Output<const ov::Node>& i) {
+                    return ov::layout::get_layout(i).empty();
+                })) {
+                slog::warn << "No batch dimension was found at any input, assuming batch to be 1." << slog::endl;
+                batchSize = 1;
+            } else {
+                for (auto& info : imported_inputs) {
+                    auto imported_layout = ov::layout::get_layout(info);
+                    if (ov::layout::has_batch(imported_layout)) {
+                        batchSize = (uint32_t)info.get_shape()[ov::layout::batch_idx(imported_layout)];
+                        break;
+                    }
+                }
+            }
        }
        // --------------------------- Exporting gna model using InferenceEngine AOT API---------------------
        if (!FLAGS_wg.empty()) {
@@ -251,7 +289,8 @@ int main(int argc, char* argv[]) {
            return 0;
        }
        // ---------------------------------------------------------------------------------------------------------
-        // --------------------------- Step 3. Create infer request --------------------------------------------------
+        // --------------------------- Step 3. Create infer request
+        // --------------------------------------------------
        std::vector<InferRequestStruct> inferRequests(1);

        for (auto& inferRequest : inferRequests) {
@@ -433,7 +472,8 @@ int main(int argc, char* argv[]) {
                                        outputBlob =
                                            inferRequest.inferRequest.get_tensor(executableNet.output(FLAGS_oname));
                                    }
-                                    // locked memory holder should be alive all time while access to its buffer happens
+                                    // locked memory holder should be alive all time while access to its buffer
+                                    // happens
                                    auto byteSize = numScoresPerFrame * sizeof(float);
                                    std::memcpy(outputFrame, outputBlob.data<float>(), byteSize);
                                }
--- a/samples/cpp/speech_sample/speech_sample.hpp
+++ b/samples/cpp/speech_sample/speech_sample.hpp
@@ -121,6 +121,11 @@ static const char output_layer_names_message[] = "Optional. Layer names for outp
 static const char input_layer_names_message[] = "Optional. Layer names for input blobs. "
                                                "The names are separated with \",\" "
                                                "Example: Input1,Input2 ";
+/// @brief message for inputs layer names
+static const char layout_message[] =
+    "Optional. Prompts how network layouts should be treated by application. "
+    "For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
+;

 /// @brief message for PWL max error percent
 static const char pwl_max_error_percent_message[] = "Optional. The maximum percent of error for PWL function."
@@ -176,8 +181,8 @@ DEFINE_int32(qb, 16, quantization_bits_message);
 /// @brief Scale factor for quantization
 DEFINE_string(sf, "", scale_factor_message);

-/// @brief Batch size (default 1)
-DEFINE_int32(bs, 1, batch_size_message);
+/// @brief Batch size (default 0)
+DEFINE_int32(bs, 0, batch_size_message);

 /// @brief Number of threads to use for inference on the CPU (also affects Hetero cases)
 DEFINE_int32(nthreads, 1, infer_num_threads_message);
@@ -194,6 +199,9 @@ DEFINE_string(oname, "", output_layer_names_message);
 /// @brief Input layer name
 DEFINE_string(iname, "", input_layer_names_message);

+/// @brief Input layer name
+DEFINE_string(layout, "", layout_message);
+
 /// @brief PWL max error percent
 DEFINE_double(pwl_me, 1.0, pwl_max_error_percent_message);

@@ -223,6 +231,7 @@ static void show_usage() {
    std::cout << "    -cw_r \"<integer>\"          " << context_window_message_r << std::endl;
    std::cout << "    -oname \"<string>\"          " << output_layer_names_message << std::endl;
    std::cout << "    -iname \"<string>\"          " << input_layer_names_message << std::endl;
+    std::cout << "    -layout \"<string>\"         " << layout_message << std::endl;
    std::cout << "    -pwl_me \"<double>\"         " << pwl_max_error_percent_message << std::endl;
    std::cout << "    -exec_target \"<string>\"    " << execution_target_message << std::endl;
    std::cout << "    -compile_target \"<string>\" " << compile_target_message << std::endl;
@@ -282,7 +291,7 @@ bool parse_and_check_command_line(int argc, char* argv[]) {
    }

    uint32_t batchSize = (uint32_t)FLAGS_bs;
-    if ((batchSize < 1) || (batchSize > 8)) {
+    if (batchSize && ((batchSize < 1) || (batchSize > 8))) {
        throw std::logic_error("Batch size out of range (1..8).");
    }

--- a/samples/cpp/speech_sample/utils.hpp
+++ b/samples/cpp/speech_sample/utils.hpp
@@ -488,3 +488,43 @@ std::vector<std::string> convert_str_to_vector(std::string str) {
    }
    return blobName;
 }
+
+/**
+ * @brief Parse layout string like "input0[value0],input1[value1]" or "[value]" (applied to all inputs)
+ * @param layout_string input names with layout values
+ * @param input_info reference to vector of inputs
+ * @return map of inputs with layout values
+ */
+std::map<std::string, std::string> parse_input_layouts(const std::string& layout_string,
+                                                       const std::vector<ov::Output<ov::Node>>& input_info) {
+    // Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all
+    // inputs)
+    std::map<std::string, std::string> return_value;
+    std::string search_string = layout_string;
+    auto start_pos = search_string.find_first_of('[');
+    auto input_name = search_string.substr(0, start_pos);
+    while (start_pos != std::string::npos) {
+        auto end_pos = search_string.find_first_of(']');
+        if (end_pos == std::string::npos)
+            break;
+        if (start_pos)
+            input_name = search_string.substr(0, start_pos);
+        auto input_value = search_string.substr(start_pos + 1, end_pos - start_pos - 1);
+        if (!input_name.empty()) {
+            return_value[input_name] = input_value;
+        } else {
+            for (auto& item : input_info) {
+                return_value[item.get_any_name()] = input_value;
+            }
+        }
+        search_string = search_string.substr(end_pos + 1);
+        if (search_string.empty() || (search_string.front() != ',' && search_string.front() != '['))
+            break;
+        if (search_string.front() == ',')
+            search_string = search_string.substr(1);
+        start_pos = search_string.find_first_of('[');
+    }
+    if (!search_string.empty())
+        throw std::logic_error("Can't parse input parameter string: " + layout_string);
+    return return_value;
+}