Used new config for streams and threads (#10150)

* Used new config for streams and threads * Fixed review coments in ba * format fix * fixed hello_query_device * Added STL string io * fixed tests * Fixed test * Fixed build * fixed format * Fixed build * try fix win * other any io specialization * Fixed after merge * renamed streams * build fixed * fixed build * fixed format * fix for old mac build * Fixed type of exception * test fix
2022-02-11 09:22:45 +03:00
parent 437bc3280d
commit 1621a5a0b5
35 changed files with 908 additions and 690 deletions
--- a/samples/cpp/benchmark_app/README.md
+++ b/samples/cpp/benchmark_app/README.md
@@ -124,12 +124,12 @@ Options:
                                Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency
                                estimations the number of streams should be set to 1.
    -nthreads "<integer>"       Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
-    -enforcebf16="<true/false>" Optional. By default floating point operations execution in bfloat16 precision are enforced if supported by platform.
-    -pin "YES"/"HYBRID_AWARE"/"NUMA"/"NO"
+    -pin ("YES"|"CORE")/"HYBRID_AWARE"/"NUMA"/("NO"|"NONE")
                                Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice):
                                enabling threads->cores pinning ("YES", which is already default for a conventional CPU),
                                letting the runtime to decide on the threads->different core types ("HYBRID_AWARE", which is default on the hybrid CPUs)
                                threads->(NUMA)nodes ("NUMA") or completely disable ("NO") CPU inference threads pinning.
+    -infer_precision device_name:infer_precision1,device_name:infer_precision2 Optional. Hint to specifies inference precision
    -ip "U8"/"FP16"/"FP32"      Optional. Specifies precision for all input layers of the network.
    -op "U8"/"FP16"/"FP32"      Optional. Specifies precision for all output layers of the network.
    -iop                        Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required.
--- a/samples/cpp/benchmark_app/benchmark_app.hpp
+++ b/samples/cpp/benchmark_app/benchmark_app.hpp
@@ -191,8 +191,8 @@ static const char cache_dir_message[] = "Optional. Enables caching of loaded mod
 static const char load_from_file_message[] = "Optional. Loads model from file directly without ReadNetwork."
                                             " All CNNNetwork options (like re-shape) will be ignored";

-// @brief message for quantization bits
-static const char gna_qb_message[] = "Optional. Weight bits for quantization:  8 or 16 (default)";
+// @brief message for inference_precision
+static const char inference_precision_message[] = "Optional. Inference precission";

 static constexpr char inputs_precision_message[] = "Optional. Specifies precision for all input layers of the network.";

@@ -275,9 +275,6 @@ DEFINE_string(nstreams, "", infer_num_streams_message);
 /// @brief The percentile which will be reported in latency metric
 DEFINE_uint32(latency_percentile, 50, infer_latency_percentile_message);

-/// @brief Enforces bf16 execution with bfloat16 precision on systems having this capability
-DEFINE_bool(enforcebf16, false, enforce_bf16_message);
-
 /// @brief Define parameter for batch size <br>
 /// Default is 0 (that means don't specify)
 DEFINE_uint32(b, 0, batch_size_message);
@@ -329,8 +326,8 @@ DEFINE_string(data_shape, "", data_shape_message);
 /// @brief Define flag for layout shape <br>
 DEFINE_string(layout, "", layout_message);

-/// @brief Define flag for quantization bits (default 16)
-DEFINE_int32(qb, 16, gna_qb_message);
+/// @brief Define flag for inference precision
+DEFINE_string(infer_precision, "f32", inference_precision_message);

 /// @brief Specify precision for all input layers of the network
 DEFINE_string(ip, "", inputs_precision_message);
@@ -391,7 +388,6 @@ static void show_usage() {
    std::cout << std::endl << "  device-specific performance options:" << std::endl;
    std::cout << "    -nstreams \"<integer>\"     " << infer_num_streams_message << std::endl;
    std::cout << "    -nthreads \"<integer>\"     " << infer_num_threads_message << std::endl;
-    std::cout << "    -enforcebf16=<true/false>     " << enforce_bf16_message << std::endl;
    std::cout << "    -pin \"YES\"/\"HYBRID_AWARE\"/\"NO\"/\"NUMA\"   " << infer_threads_pinning_message << std::endl;
 #ifdef HAVE_DEVICE_MEM_SUPPORT
    std::cout << "    -use_device_mem           " << use_device_mem_message << std::endl;
@@ -405,7 +401,7 @@ static void show_usage() {
    std::cout << "    -pcseq                    " << pcseq_message << std::endl;
    std::cout << "    -dump_config              " << dump_config_message << std::endl;
    std::cout << "    -load_config              " << load_config_message << std::endl;
-    std::cout << "    -qb                       " << gna_qb_message << std::endl;
+    std::cout << "    -infer_precision \"<element type>\"" << inference_precision_message << std::endl;
    std::cout << "    -ip                          <value>     " << inputs_precision_message << std::endl;
    std::cout << "    -op                          <value>     " << outputs_precision_message << std::endl;
    std::cout << "    -iop                        \"<value>\"    " << iop_message << std::endl;
--- a/samples/cpp/benchmark_app/main.cpp
+++ b/samples/cpp/benchmark_app/main.cpp
@@ -157,7 +157,9 @@ int main(int argc, char* argv[]) {
        auto devices = parse_devices(device_name);

        // Parse nstreams per device
-        std::map<std::string, std::string> device_nstreams = parse_nstreams_value_per_device(devices, FLAGS_nstreams);
+        std::map<std::string, std::string> device_nstreams = parse_value_per_device(devices, FLAGS_nstreams);
+        std::map<std::string, std::string> device_infer_precision =
+            parse_value_per_device(devices, FLAGS_infer_precision);

        // Load device config file if specified
        std::map<std::string, ov::AnyMap> config;
@@ -243,9 +245,7 @@ int main(int argc, char* argv[]) {
        bool perf_counts = false;
        // Update config per device according to command line parameters
        for (auto& device : devices) {
-            if (!config.count(device))
-                config[device] = {};
-            auto& device_config = config.at(device);
+            auto& device_config = config[device];

            // high-level performance modes
            if (ov_perf_hint != ov::hint::PerformanceMode::UNDEFINED) {
@@ -276,24 +276,28 @@ int main(int argc, char* argv[]) {
            }
            perf_counts = (device_config.at(ov::enable_profiling.name()).as<bool>()) ? true : perf_counts;

+            auto supported_properties = core.get_property(device, ov::supported_properties);
+
+            auto supported = [&](const std::string& key) {
+                return std::find(std::begin(supported_properties), std::end(supported_properties), key) !=
+                       std::end(supported_properties);
+            };
            // the rest are individual per-device settings (overriding the values set with perf modes)
            auto setThroughputStreams = [&]() {
                std::string key = getDeviceTypeFromName(device) + "_THROUGHPUT_STREAMS";
-                if (device_nstreams.count(device)) {
+                auto it_device_nstreams = device_nstreams.find(device);
+                if (it_device_nstreams != device_nstreams.end()) {
                    // set to user defined value
                    auto supported_properties = core.get_property(device, ov::supported_properties);
-                    if (std::find(supported_properties.begin(), supported_properties.end(), key) !=
-                        supported_properties.end()) {
-                        device_config[key] = device_nstreams.at(device);
-                    } else if (std::find(supported_properties.begin(),
-                                         supported_properties.end(),
-                                         ov::streams::num.name()) != supported_properties.end()) {
+                    if (supported(key)) {
+                        device_config[key] = it_device_nstreams->second;
+                    } else if (supported(ov::num_streams.name())) {
                        // Use API 2.0 key for streams
-                        key = ov::streams::num.name();
-                        device_config[key] = device_nstreams.at(device);
+                        key = ov::num_streams.name();
+                        device_config[key] = it_device_nstreams->second;
                    } else {
                        throw std::logic_error("Device " + device + " doesn't support config key '" + key + "' " +
-                                               "and '" + ov::streams::num.name() + "'!" +
+                                               "and '" + ov::num_streams.name() + "'!" +
                                               "Please specify -nstreams for correct devices in format  "
                                               "<dev1>:<nstreams1>,<dev2>:<nstreams2>" +
                                               " or via configuration file.");
@@ -309,45 +313,66 @@ int main(int argc, char* argv[]) {
                               << slog::endl;
                    if (std::string::npos == device.find("MYRIAD")) {  // MYRIAD sets the default number of
                                                                       // streams implicitly (without _AUTO)
-                        auto supported_properties = core.get_property(device, ov::supported_properties);
-                        if (std::find(supported_properties.begin(), supported_properties.end(), key) !=
-                            supported_properties.end()) {
+                        if (supported(key)) {
                            device_config[key] = std::string(getDeviceTypeFromName(device) + "_THROUGHPUT_AUTO");
-                        } else if (std::find(supported_properties.begin(),
-                                             supported_properties.end(),
-                                             ov::streams::num.name()) != supported_properties.end()) {
+                        } else if (supported(ov::num_streams.name())) {
                            // Use API 2.0 key for streams
-                            key = ov::streams::num.name();
-                            device_config[key] = std::to_string(ov::streams::AUTO);
+                            key = ov::num_streams.name();
+                            device_config[key] = ov::NumStreams::AUTO;
                        }
                    }
                }
-                if (device_config.count(key))
-                    device_nstreams[device] = device_config.at(key).as<std::string>();
+                auto it_streams = device_config.find(ov::num_streams.name());
+                if (it_streams != device_config.end())
+                    device_nstreams[device] = it_streams->second.as<std::string>();
            };

+            auto set_infer_precision = [&] {
+                auto it_device_infer_precision = device_infer_precision.find(device);
+                if (it_device_infer_precision != device_infer_precision.end()) {
+                    // set to user defined value
+                    if (!supported(ov::hint::inference_precision.name())) {
+                        throw std::logic_error("Device " + device + " doesn't support config key '" +
+                                               ov::hint::inference_precision.name() + "'! " +
+                                               "Please specify -infer_precision for correct devices in format  "
+                                               "<dev1>:<infer_precision1>,<dev2>:<infer_precision2>" +
+                                               " or via configuration file.");
+                    }
+                    device_config.emplace(ov::hint::inference_precision(it_device_infer_precision->second));
+                }
+            };
+
+            auto fix_pin_option = [](const std::string& str) -> std::string {
+                if (str == "NO")
+                    return "NONE";
+                else if (str == "YES")
+                    return "CORE";
+                else
+                    return str;
+            };
+
+            if (supported(ov::inference_num_threads.name()) && isFlagSetInCommandLine("nthreads")) {
+                device_config.emplace(ov::inference_num_threads(FLAGS_nthreads));
+            }
+            if (supported(ov::affinity.name()) && isFlagSetInCommandLine("pin")) {
+                device_config.emplace(ov::affinity(fix_pin_option(FLAGS_pin)));
+            }
+
            if (device.find("CPU") != std::string::npos) {  // CPU supports few special performance-oriented keys
                // limit threading for CPU portion of inference
-                if (isFlagSetInCommandLine("nthreads"))
-                    device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads);
-
-                if (isFlagSetInCommandLine("enforcebf16"))
-                    device_config[CONFIG_KEY(ENFORCE_BF16)] = FLAGS_enforcebf16 ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO);
-
-                if (isFlagSetInCommandLine("pin")) {
-                    // set to user defined value
-                    device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin;
-                } else if (!device_config.count(CONFIG_KEY(CPU_BIND_THREAD))) {
-                    if ((device_name.find("MULTI") != std::string::npos) &&
+                if (!isFlagSetInCommandLine("pin")) {
+                    auto it_affinity = device_config.find(ov::affinity.name());
+                    if (it_affinity != device_config.end() && (device_name.find("MULTI") != std::string::npos) &&
                        (device_name.find("GPU") != std::string::npos)) {
                        slog::warn << "Turn off threads pinning for " << device
                                   << " device since multi-scenario with GPU device is used." << slog::endl;
-                        device_config[CONFIG_KEY(CPU_BIND_THREAD)] = CONFIG_VALUE(NO);
+                        it_affinity->second = ov::Affinity::NONE;
                    }
                }

                // for CPU execution, more throughput-oriented execution via streams
                setThroughputStreams();
+                set_infer_precision();
            } else if (device.find("GPU") != std::string::npos) {
                // for GPU execution, more throughput-oriented execution via streams
                setThroughputStreams();
@@ -365,25 +390,7 @@ int main(int argc, char* argv[]) {
                device_config.emplace(ov::log::level(ov::log::Level::WARNING));
                setThroughputStreams();
            } else if (device.find("GNA") != std::string::npos) {
-                if (FLAGS_qb == 8)
-                    device_config[GNA_CONFIG_KEY(PRECISION)] = "I8";
-                else
-                    device_config[GNA_CONFIG_KEY(PRECISION)] = "I16";
-            } else {
-                auto supported_properties = core.get_property(device, ov::supported_properties);
-                auto supported = [&](const std::string& key) {
-                    return std::find(std::begin(supported_properties), std::end(supported_properties), key) !=
-                           std::end(supported_properties);
-                };
-                if (supported(CONFIG_KEY(CPU_THREADS_NUM)) && isFlagSetInCommandLine("nthreads")) {
-                    device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads);
-                }
-                if (supported(CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) && isFlagSetInCommandLine("nstreams")) {
-                    device_config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = FLAGS_nstreams;
-                }
-                if (supported(CONFIG_KEY(CPU_BIND_THREAD)) && isFlagSetInCommandLine("pin")) {
-                    device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin;
-                }
+                set_infer_precision();
            }
        }

@@ -669,7 +676,7 @@ int main(int argc, char* argv[]) {
                const std::string key = getDeviceTypeFromName(ds.first) + "_THROUGHPUT_STREAMS";
                device_nstreams[ds.first] = core.get_property(ds.first, key).as<std::string>();
            } catch (const ov::Exception&) {
-                device_nstreams[ds.first] = core.get_property(ds.first, ov::streams::num.name()).as<std::string>();
+                device_nstreams[ds.first] = core.get_property(ds.first, ov::num_streams.name()).as<std::string>();
            }
        }

--- a/samples/cpp/benchmark_app/utils.cpp
+++ b/samples/cpp/benchmark_app/utils.cpp
@@ -118,8 +118,8 @@ std::vector<std::string> parse_devices(const std::string& device_string) {
    return devices;
 }

-std::map<std::string, std::string> parse_nstreams_value_per_device(const std::vector<std::string>& devices,
-                                                                   const std::string& values_string) {
+std::map<std::string, std::string> parse_value_per_device(const std::vector<std::string>& devices,
+                                                          const std::string& values_string) {
    //  Format: <device1>:<value1>,<device2>:<value2> or just <value>
    std::map<std::string, std::string> result;
    auto device_value_strings = split(values_string, ',');
--- a/samples/cpp/benchmark_app/utils.hpp
+++ b/samples/cpp/benchmark_app/utils.hpp
@@ -56,8 +56,8 @@ using PartialShapes = std::map<std::string, ngraph::PartialShape>;

 std::vector<std::string> parse_devices(const std::string& device_string);
 uint32_t device_default_device_duration_in_seconds(const std::string& device);
-std::map<std::string, std::string> parse_nstreams_value_per_device(const std::vector<std::string>& devices,
-                                                                   const std::string& values_string);
+std::map<std::string, std::string> parse_value_per_device(const std::vector<std::string>& devices,
+                                                          const std::string& values_string);
 std::string get_shape_string(const ov::Shape& shape);
 std::string get_shapes_string(const benchmark_app::PartialShapes& shapes);
 size_t get_batch_size(const benchmark_app::InputsInfo& inputs_info);