WIP:moving the auto-batching to the icore to let the MULT/AUTO support that, ALLOW_AUTO_BATCHING as a conventional config key. still fials hot device swap

2021-10-08 17:51:56 +03:00 · 2021-10-08 17:51:56 +03:00 · 776624ff2a
commit 776624ff2a
parent ac21d71321
4 changed files with 73 additions and 46 deletions
--- a/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp
+++ b/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp
@ -244,6 +244,10 @@ DECLARE_CONFIG_VALUE(THROUGHPUT);
 * usually this value comes from the actual use-case (e.g. number of video-cameras, or other sources of inputs)
 */
 DECLARE_CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS);
+/**
+ * @brief (Optional) config key that governs Auto-Batching
+ */
+DECLARE_CONFIG_KEY(ALLOW_AUTO_BATCHING);

 /**
 * @brief generic boolean values
--- a/inference-engine/src/inference_engine/src/ie_core.cpp
+++ b/inference-engine/src/inference_engine/src/ie_core.cpp
@ -11,6 +11,7 @@
 #include <mutex>
 #include <string>
 #include <vector>
+#include <ie_performance_hints.hpp>

 #include "cnn_network_ngraph_impl.hpp"
 #include "compilation_context.hpp"
@ -41,6 +42,7 @@
 #include "xml_parse_utils.h"

 using namespace InferenceEngine::PluginConfigParams;
+using namespace InferenceEngine;
 using namespace std::placeholders;

 namespace ov {
@ -489,11 +491,60 @@ public:
    }

    ie::SoExecutableNetworkInternal LoadNetwork(const ie::CNNNetwork& network,
-                                                const std::string& deviceName,
+                                                const std::string& deviceNameOrig,
                                                const std::map<std::string, std::string>& config) override {
        OV_ITT_SCOPE(FIRST_INFERENCE, ie::itt::domains::IE_LT, "Core::LoadNetwork::CNN");
-        bool forceDisableCache = config.count(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)) > 0;
-        auto parsed = parseDeviceNameIntoConfig(deviceName, config);
+        std::string deviceName = deviceNameOrig;
+        std::map<std::string, std::string> config_with_batch = config;
+
+        const auto& batch_mode = config_with_batch.find(CONFIG_KEY(ALLOW_AUTO_BATCHING));
+        if (batch_mode != config_with_batch.end() && batch_mode->second == CONFIG_VALUE(YES)) {
+            std::map<std::string, ie::Parameter> options;
+            options["MODEL_ADDRESS"] = &network;
+            auto optimalBatchSize = GetCPPPluginByName(DeviceIDParser(deviceNameOrig).getDeviceName()).
+                    get_metric(METRIC_KEY(OPTIMAL_BATCH), options).as<unsigned int>();
+            const auto &reqs = config.find(KEY_PERFORMANCE_HINT_NUM_REQUESTS);
+            if (reqs != config.end()) {
+                auto r = (uint)PerfHintsConfig::CheckPerformanceHintRequestValue(reqs->second);
+                std::cout << "!!!!!!!!!!!!!!!Detected reqs_limitation: " << r << std::endl;
+                optimalBatchSize = std::min(r, optimalBatchSize);
+            }
+            auto function = network.getFunction();
+            bool bDetectionOutput = false;
+            for (auto&& node : function->get_ops()) {
+                auto isDetectionOutputParent = [](decltype(node)& nd) {
+                    for (size_t n = 0; n < nd->get_input_size(); n++) {
+                        if (!std::strcmp("DetectionOutput", nd->get_input_node_ptr(n)->get_type_info().name))
+                            return true;
+                    }
+                    return false;
+                };
+
+                if (!std::strcmp("DetectionOutput", node->get_type_info().name) ||
+                        (!std::strcmp("Result", node->get_type_info().name) && isDetectionOutputParent(node))) {
+                    node->get_rt_info()["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>(deviceNameOrig);
+                    std::cout << "!!! AFF !!! type: " << node->get_type_info().name
+                        << ", name: " << node->get_friendly_name() << std::endl;
+                    bDetectionOutput = true;
+                } else {
+                    node->get_rt_info()["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>("BATCH");
+                }
+            }
+            if (optimalBatchSize > 1) {
+                if (bDetectionOutput) {
+                    deviceName = "HETERO:BATCH," + deviceNameOrig;
+                    std::cout << "HETERO code path!!!!" << std::endl;
+                    // config["AUTO_BATCH"] = deviceNameOrig+"("+ std::to_string(optimalBatchSize)+ ")";
+                    SetConfigForPlugins({{"AUTO_BATCH", deviceNameOrig + "(" + std::to_string(optimalBatchSize) + ")"}}, "BATCH");
+                } else {
+                     std::string deviceBatch = "BATCH:" + deviceNameOrig + "(" + std::to_string(optimalBatchSize) + ")";
+                    deviceName = deviceBatch;
+                }
+            }
+            config_with_batch.erase(batch_mode);
+        }
+        bool forceDisableCache = config_with_batch.count(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)) > 0;
+        auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch);
        if (forceDisableCache) {
            // remove this config key from parsed as plugins can throw unsupported exception
            parsed._config.erase(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE));
@ -1147,49 +1198,15 @@ CNNNetwork Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights
 }

 ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network,
-                                    const std::string& deviceNameOrig,
+                                    const std::string& deviceName,
                                    const std::map<std::string, std::string>& config) {
-    auto deviceName = deviceNameOrig;
-    if (deviceNameOrig.find("GPU") != std::string::npos) {
-        std::map<std::string, Parameter> options;
-        options["MODEL_ADDRESS"] = &network;
-        auto optimalBatchSize =
-            _impl->GetCPPPluginByName(DeviceIDParser(deviceName).getDeviceName()).
-                    get_metric(METRIC_KEY(OPTIMAL_BATCH), options).as<unsigned int>();
-        auto function = network.getFunction();
-        bool bDetectionOutput = false;
-        for (auto&& node : function->get_ops()) {
-            auto isDetectionOutputParent = [](decltype(node)& nd) {
-                for (size_t n = 0; n < nd->get_input_size(); n++) {
-                    if (!std::strcmp("DetectionOutput", nd->get_input_node_ptr(n)->get_type_info().name))
-                        return true;
-                }
-                return false;
-            };
+    std::map<std::string, std::string> config_with_batch = config;
+    // const auto& mode = config.find(KEY_PERFORMANCE_HINT);
+    // if (mode != config.end() && mode->second ==CONFIG_VALUE(THROUGHPUT) && deviceName.find("GPU") != std::string::npos)
+    if (deviceName.find("GPU") != std::string::npos)
+        config_with_batch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(YES);

-            if (!std::strcmp("DetectionOutput", node->get_type_info().name) ||
-                (!std::strcmp("Result", node->get_type_info().name) && isDetectionOutputParent(node))) {
-                node->get_rt_info()["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>(deviceNameOrig);
-                std::cout << "!!! AFF !!! type: " << node->get_type_info().name
-                          << ", name: " << node->get_friendly_name() << std::endl;
-                bDetectionOutput = true;
-            } else {
-                node->get_rt_info()["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>("BATCH");
-            }
-        }
-        if (optimalBatchSize > 1) {
-            if (bDetectionOutput) {
-                deviceName = "HETERO:BATCH," + deviceNameOrig;
-                std::cout << "HETERO code path!!!!" << std::endl;
-                // config["AUTO_BATCH"] = deviceNameOrig+"("+ std::to_string(optimalBatchSize)+ ")";
-                SetConfig({{"AUTO_BATCH", deviceNameOrig + "(" + std::to_string(optimalBatchSize) + ")"}}, "BATCH");
-            } else {
-                std::string deviceBatch = "BATCH:" + deviceNameOrig + "(" + std::to_string(optimalBatchSize) + ")";
-                deviceName = deviceBatch;
-            }
-        }
-    }
-    auto exec = _impl->LoadNetwork(network, deviceName, config);
+    auto exec = _impl->LoadNetwork(network, deviceName, config_with_batch);
    return {exec._so, exec._ptr};
 }

--- a/inference-engine/src/multi_device/multi_device_exec_network.cpp
+++ b/inference-engine/src/multi_device/multi_device_exec_network.cpp
@ -200,11 +200,15 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
        _workerRequests[p.deviceName];
        _inferPipelineTasksDeviceSpecific[p.deviceName] = NULL;
        const auto device = p.deviceName;
-        const auto deviceConfig = p.config;
+        auto deviceConfig = p.config;
+        if (device == "GPU") {
+           deviceConfig[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(YES);
+        }
        // will not wait for loading accelerator network,
        // so some parameters need to be transferred by value.
       _executor->run([&, modelPath, network, device, deviceConfig]() {
-            SoExecutableNetworkInternal executableNetwork;
+           std::cout << "DEVICE in AUTO:" << device << std::endl;
+           SoExecutableNetworkInternal executableNetwork;
            if (!modelPath.empty()) {
                executableNetwork = _core->LoadNetwork(modelPath, device, deviceConfig);
            } else {
@ -212,6 +216,7 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
            }

            GenerateWorkers(device, executableNetwork);
+            std::cout << "DEVICE in AUTO:" << device << " ENDED" <<std::endl;

            if (device.find("CPU") == std::string::npos) {
                _alreadyActualNetwork = true;
--- a/inference-engine/src/multi_device/multi_device_plugin.cpp
+++ b/inference-engine/src/multi_device/multi_device_plugin.cpp
@ -55,6 +55,7 @@ namespace {
    }
    std::vector<std::string> supported_configKeys = []() -> decltype(PerfHintsConfig::SupportedKeys()) {
                    auto res = PerfHintsConfig::SupportedKeys();
+                    res.push_back(PluginConfigParams::KEY_PERF_COUNT);
                    res.push_back(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
                    res.push_back(CONFIG_KEY_INTERNAL(MULTI_WORK_MODE_AS_AUTO));
                    return res;