From ce8f164feaaf0756944a0af4dadd8c8d70842925 Mon Sep 17 00:00:00 2001
From: Zlobin Vladimir <vladimir.zlobin@intel.com>
Date: Fri, 14 Jul 2023 19:21:55 +0400
Subject: [PATCH] Fix -api sync for single -data_shape (#18463)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Fix -api sync for single -data_shape

Tickets 111187 and 111185

I wasn’t able to find C++ equivalent of Python’s `info.original_shape.is_static`. Later I realized that it shouldn’t be considered because -shape cmd arg should have higher priority for shape inference than model’s shape. So I removed it from Python.

Replace

`if benchmark.inference_only and batch_size.is_dynamic:`

with

`if allow_inference_only_or_sync and batch_size.is_dynamic:`

to reset batch_size to static in case of dynamic shape with single -data_shape

* Check only app_input_info.size() == 1 because if it's gretaer than 1, input shape is dynamic and there are more that one static shapes. Apply TODO
---
 samples/cpp/benchmark_app/main.cpp                  |  8 +++++---
 samples/cpp/benchmark_app/utils.cpp                 |  4 ++++
 samples/cpp/benchmark_app/utils.hpp                 |  1 +
 .../benchmark_tool/openvino/tools/benchmark/main.py |  6 +++---
 .../openvino/tools/benchmark/utils/utils.py         | 13 +------------
 5 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp
index 0ec8267a7c8..2bda41ca8b8 100644
--- a/samples/cpp/benchmark_app/main.cpp
+++ b/samples/cpp/benchmark_app/main.cpp
@@ -811,16 +811,18 @@ int main(int argc, char* argv[]) {
             }
         }
 
-        if (isDynamicNetwork && FLAGS_api == "sync") {
+        bool allow_inference_only_or_sync = can_measure_as_static(app_inputs_info);
+
+        if (!allow_inference_only_or_sync && FLAGS_api == "sync") {
             throw std::logic_error("Benchmarking of the model with dynamic shapes is available for async API only. "
-                                   "Please use -api async -nstreams 1 -nireq 1 to emulate sync behavior");
+                                   "Please use -api async -hint latency -nireq 1 to emulate sync behavior");
         }
 
         // Defining of benchmark mode
         // for static models inference only mode is used as default one
         bool inferenceOnly = FLAGS_inference_only;
         if (isDynamicNetwork) {
-            if (isFlagSetInCommandLine("inference_only") && inferenceOnly && app_inputs_info.size() != 1) {
+            if (isFlagSetInCommandLine("inference_only") && inferenceOnly && !allow_inference_only_or_sync) {
                 throw std::logic_error(
                     "Dynamic models with different input data shapes must be benchmarked only in full mode.");
             }
diff --git a/samples/cpp/benchmark_app/utils.cpp b/samples/cpp/benchmark_app/utils.cpp
index ce13d85ec78..9d3b2661156 100644
--- a/samples/cpp/benchmark_app/utils.cpp
+++ b/samples/cpp/benchmark_app/utils.cpp
@@ -108,6 +108,10 @@ std::vector<float> split_float(const std::string& s, char delim) {
     return result;
 }
 
+bool can_measure_as_static(const std::vector<benchmark_app::InputsInfo>& app_input_info) {
+    return app_input_info.size() == 1;
+}
+
 static const std::vector<std::string> meta_plugins{"MULTI", "HETERO", "AUTO"};
 bool is_virtual_device(const std::string& device_name) {
     return std::find(meta_plugins.begin(), meta_plugins.end(), device_name) != meta_plugins.end();
diff --git a/samples/cpp/benchmark_app/utils.hpp b/samples/cpp/benchmark_app/utils.hpp
index a15c4501ba3..2fa20f040cb 100644
--- a/samples/cpp/benchmark_app/utils.hpp
+++ b/samples/cpp/benchmark_app/utils.hpp
@@ -58,6 +58,7 @@ using InputsInfo = std::map<std::string, InputInfo>;
 using PartialShapes = std::map<std::string, ngraph::PartialShape>;
 }  // namespace benchmark_app
 
+bool can_measure_as_static(const std::vector<benchmark_app::InputsInfo>& app_input_info);
 bool is_virtual_device(const std::string& device_name);
 bool is_virtual_device_found(const std::vector<std::string>& device_names);
 void update_device_properties_setting(const std::string& device_name,
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py
index 821e81b97d0..47dbd8d9b09 100644
--- a/tools/benchmark_tool/openvino/tools/benchmark/main.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py
@@ -487,8 +487,8 @@ def main():
         static_mode = check_for_static(app_inputs_info)
         allow_inference_only_or_sync = can_measure_as_static(app_inputs_info)
         if not allow_inference_only_or_sync and benchmark.api_type == 'sync':
-            raise Exception("Benchmarking of the model with dynamic shapes is available for async API only."
-                                   "Please use -api async -nstreams 1 -nireq 1 to emulate sync behavior.")
+            raise Exception("Benchmarking of the model with dynamic shapes is available for async API only. "
+                            "Please use -api async -hint latency -nireq 1 to emulate sync behavior.")
 
         if benchmark.inference_only == None:
             if static_mode:
@@ -499,7 +499,7 @@ def main():
             raise Exception("Benchmarking dynamic model available with input filling in measurement loop only!")
 
         # update batch size in case dynamic network with one data_shape
-        if benchmark.inference_only and batch_size.is_dynamic:
+        if allow_inference_only_or_sync and batch_size.is_dynamic:
             batch_size = Dimension(data_queue.batch_sizes[data_queue.current_group_id])
 
         benchmark.latency_groups = get_latency_groups(app_inputs_info)
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
index d4925a7b270..51f88ab9751 100644
--- a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
@@ -78,19 +78,10 @@ def get_element_type(precision):
 
 
 def fuse_mean_scale(preproc: PrePostProcessor, app_inputs_info):
-    # TODO: remove warning after 23.3 release
-    warned = False
-    warn_msg = 'Mean/scale values are fused into the model. This slows down performance compared to --imean and --iscale which existed before'
     for input_info in app_inputs_info:
         if input_info.mean.size:
-            if not warned:
-                logger.warning(warn_msg)
-                warned = True
             preproc.input(input_info.name).preprocess().convert_element_type(Type.f32).mean(input_info.mean)
         if input_info.scale.size:
-            if not warned:
-                logger.warning(warn_msg)
-                warned = True
             preproc.input(input_info.name).preprocess().convert_element_type(Type.f32).scale(input_info.scale)
 
 
@@ -272,7 +263,7 @@ def check_for_static(app_input_info):
 
 def can_measure_as_static(app_input_info):
     for info in app_input_info:
-        if info.is_dynamic and (len(info.shapes) > 1 or info.original_shape.is_static):
+        if len(info.shapes) > 1:
             return False
     return True
 
@@ -559,7 +550,6 @@ class AppInputInfo:
     def __init__(self):
         self.element_type = None
         self.layout = Layout()
-        self.original_shape = None
         self.partial_shape = None
         self.data_shapes = []
         self.scale = np.empty([0])
@@ -650,7 +640,6 @@ def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size,
         # Input precision
         info.element_type = inputs[i].element_type
         # Shape
-        info.original_shape = inputs[i].partial_shape
         if info.name in shape_map:
             info.partial_shape = PartialShape(shape_map[info.name])
             reshape = True