Auto batch smart reshape (relies on the dim tracking) (#9964)

2022-02-10 20:43:06 +03:00 · 2022-02-10 20:43:06 +03:00 · e41e1f51a0
commit e41e1f51a0
parent 510e5fb746
2 changed files with 93 additions and 96 deletions
--- a/src/plugins/auto_batch/auto_batch.cpp
+++ b/src/plugins/auto_batch/auto_batch.cpp
@ -5,21 +5,24 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include "auto_batch.hpp"

-#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
-#include <ie_icore.hpp>
-#include <ie_ngraph_utils.hpp>
-#include <ie_performance_hints.hpp>
 #include <iostream>
 #include <map>
 #include <memory>
 #include <string>
-#include <transformations/utils/utils.hpp>
 #include <unordered_map>
-#include <unordered_set>
 #include <utility>
 #include <vector>

+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
+#include "dimension_tracker.hpp"
+#include "ie_icore.hpp"
+#include "ie_ngraph_utils.hpp"
+#include "ie_performance_hints.hpp"
+#include "openvino/pass/manager.hpp"
 #include "openvino/runtime/intel_gpu/properties.hpp"
+#include "transformations/common_optimizations/dimension_tracking.hpp"
+#include "transformations/init_node_info.hpp"
+#include "transformations/utils/utils.hpp"

 namespace AutoBatchPlugin {
 using namespace InferenceEngine;
@ -34,8 +37,7 @@ Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob, size
    auto sizePerBatch = batched_blob->size() / batch_num;
    auto layout = batched_blob->getTensorDesc().getLayout();
    SizeVector dims = batched_blob->getTensorDesc().getDims();
-    // the below code is a placeholder for the WIP (22.1) functionality
-    // that will check the reshaping by the batch is robust (CVS-51744)
+    // for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim
    if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW ||
        layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
        layout == InferenceEngine::Layout::NDHWC) {
@ -393,7 +395,7 @@ unsigned int AutoBatchExecutableNetwork::ParseTimeoutValue(const std::string& s)
 }

 std::shared_ptr<InferenceEngine::RemoteContext> AutoBatchExecutableNetwork::GetContext() const {
-    return _network->GetContext();
+    return _networkWithoutBatch->GetContext();
 }

 InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(
@ -504,12 +506,13 @@ std::pair<AutoBatchExecutableNetwork::WorkerInferRequest&, int> AutoBatchExecuta
 }

 InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequest() {
-    IInferRequestInternal::Ptr syncRequestImpl;
-    if (this->_plugin) {
-        const auto& core = _plugin->GetCore();
-        if (core && core->isNewAPI())
-            syncRequestImpl = CreateInferRequestImpl(_parameters, _results);
+    if (!_network) {
+        auto res = _networkWithoutBatch->CreateInferRequest();
+        res->setPointerToExecutableNetworkInternal(shared_from_this());
+        return res;
    }
+    // trying to create the new API request first
+    IInferRequestInternal::Ptr syncRequestImpl = CreateInferRequestImpl(_parameters, _results);
    if (!syncRequestImpl)
        syncRequestImpl = CreateInferRequestImpl(_networkInputs, _networkOutputs);
    syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this());
@ -523,7 +526,8 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
 }

 std::shared_ptr<ngraph::Function> AutoBatchExecutableNetwork::GetExecGraphInfo() {
-    return _network->GetExecGraphInfo() ? _network->GetExecGraphInfo() : _networkWithoutBatch->GetExecGraphInfo();
+    return _network && _network->GetExecGraphInfo() ? _network->GetExecGraphInfo()
+                                                    : _networkWithoutBatch->GetExecGraphInfo();
 }

 void AutoBatchExecutableNetwork::SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) {
@ -542,10 +546,10 @@ InferenceEngine::Parameter AutoBatchExecutableNetwork::GetConfig(const std::stri
        return it->second;
    } else {
        // find config key among networks config keys
-        auto param = _network->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+        auto param = _networkWithoutBatch->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
        for (auto&& configKey : param.as<std::vector<std::string>>()) {
            if (configKey == name) {
-                return _network->GetConfig(configKey);
+                return _networkWithoutBatch->GetConfig(configKey);
            }
        }
        IE_THROW(NotFound) << name << " not found in the ExecutableNetwork config";
@ -556,18 +560,18 @@ InferenceEngine::Parameter AutoBatchExecutableNetwork::GetMetric(const std::stri
    if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
        auto reqs = 0;
        try {
-            auto hint = _network->GetConfig(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
+            auto hint = _networkWithoutBatch->GetConfig(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
            reqs = InferenceEngine::PerfHintsConfig::CheckPerformanceHintRequestValue(hint);
            if (!reqs)  // no limitations from user, let's deduce the full blown #requests
                // (multiplied by the devices capabilities to run multiple <batched> requests for further perf)
                reqs = _device.batchForDevice *
-                       _network->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
+                       _networkWithoutBatch->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
        } catch (const InferenceEngine::Exception& iie) {
        }
        reqs = std::max(reqs, _device.batchForDevice);  // round up to the possible  user's value
        IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, reqs);
    } else if (name == METRIC_KEY(NETWORK_NAME)) {
-        IE_SET_METRIC_RETURN(NETWORK_NAME, _network->GetMetric(METRIC_KEY(NETWORK_NAME)).as<std::string>());
+        IE_SET_METRIC_RETURN(NETWORK_NAME, _networkWithoutBatch->GetMetric(METRIC_KEY(NETWORK_NAME)).as<std::string>());
    } else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
        IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
                             {METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),
@ -649,12 +653,15 @@ RemoteContext::Ptr AutoBatchInferencePlugin::CreateContext(const InferenceEngine
    auto cfg = config;
    auto it = cfg.find(CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG));
    if (it == cfg.end())
-        IE_THROW() << "Value for KEY_AUTO_BATCH is not set";
+        IE_THROW() << "Value for KEY_AUTO_BATCH_DEVICE_CONFIG is not set";

    auto val = it->second.as<std::string>();
+    auto core = GetCore();
+    if (!core)
+        return nullptr;
    auto metaDevice = ParseMetaDevice(val, std::map<std::string, std::string>());
    cfg.erase(it);
-    return GetCore()->CreateContext(metaDevice.deviceName, cfg);
+    return core->CreateContext(metaDevice.deviceName, cfg);
 }

 Parameter AutoBatchInferencePlugin::GetConfig(const std::string& name,
@ -735,64 +742,70 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
    const InferenceEngine::CNNNetwork& network,
    const std::shared_ptr<InferenceEngine::RemoteContext> ctx,
    const std::map<std::string, std::string>& config) {
-    if (GetCore() == nullptr) {
+    auto core = GetCore();
+    if (core == nullptr) {
        IE_THROW() << "Please, work with Auto-Batching device via InferencEngine::Core object";
    }
-
    auto fullConfig = mergeConfigs(_config, config);
    auto device_batch = fullConfig.find(CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG));
    if (device_batch == fullConfig.end()) {
        IE_THROW() << "KEY_AUTO_BATCH key is not set for BATCH device";
    }
-
    auto metaDevice = ParseMetaDevice(device_batch->second, fullConfig);
    const auto& deviceName = metaDevice.deviceName;
    const auto& deviceConfig = metaDevice.config;
-    auto config_without_autobatch = config, deviceConfigNoAutoBatch = deviceConfig;
+    auto deviceConfigNoAutoBatch = deviceConfig;
    // avoid recursive auto-batching
-    config_without_autobatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO);
    deviceConfigNoAutoBatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO);

-    auto function = network.getFunction();
+    std::set<std::string> batched_inputs;
    // check that the auto-batching is applicable in general
    try {
+        // if applicable, the Auto-Batching is implicitly enabled via the performance hints
+        const auto tput = CONFIG_VALUE(THROUGHPUT);
+        const bool bTputInPlg = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>() == tput;
+        const auto& mode = deviceConfig.find(CONFIG_KEY(PERFORMANCE_HINT));
+        const bool bTputInLoadCfg = (mode != deviceConfig.end() && mode->second == tput);
+        // if the auto-batching is enabled implicitly, we shall check the dims carefully, to avoid outstanding failures
+        const bool check_dims = (bTputInPlg || bTputInLoadCfg);
+        CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network));
+        auto function = clonedNetwork.getFunction();
+        // find the batch dim
+        ov::pass::Manager m;
+        m.register_pass<ngraph::pass::InitNodeInfo>();
+        if (check_dims)
+            m.register_pass<ov::pass::FindBatch>();
+        else
+            m.register_pass<ov::pass::FindBatchDontTrack>();
+        m.run_passes(function);
        // do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts
-        // the below code is a placeholder for the WIP (22.1) functionality
-        // that will check the reshaping by the batch is robust (CVS-51744)
-        const InputsDataMap inputInfo = network.getInputsInfo();
-        bool atLeastOneInputIsBatched = false;
-        for (const InputsDataMap::value_type& item : inputInfo) {
-            auto layout = item.second->getTensorDesc().getLayout();
-            if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW ||
-                layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
-                layout == InferenceEngine::Layout::NDHWC) {
-                if (1 != item.second->getTensorDesc().getDims()[0])  // do not reshape/re-batch batched networks
+        // input(s) should have the batch dim as the first dim or none (current limitation of the auto-batching impl)
+        const auto& params = function->get_parameters();
+        for (size_t input_id = 0; input_id < params.size(); input_id++) {
+            const auto& input = params[input_id];
+            const auto& shape = input->get_partial_shape();
+            // currently no plugin support batched execution for dynamic networks
+            if (shape.is_dynamic())
+                IE_THROW(NotImplemented) << "Auto-batching does not support dynamic networks!";
+            // check the batch dim: either 0th (and the original batch size of 1) or none
+            if (shape.size() && ov::DimensionTracker::get_label(shape[0])) {
+                const auto& static_shape = input->get_shape();
+                if (static_shape[0] != 1)
                    IE_THROW(NotImplemented) << "Auto-batching does not reshape/re-batch originally batched networks!";
-                else
-                    atLeastOneInputIsBatched = true;
+                batched_inputs.insert(
+                    ngraph::op::util::get_ie_output_name(params[input_id]->output(0)));  // batched dim for the input
+            } else {
+                // if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch
+                for (size_t s = 1; s < shape.size(); s++)
+                    if (ov::DimensionTracker::get_label(shape[s]))
+                        IE_THROW(NotImplemented)
+                            << "Auto-batching operates only networks with inputs/outputs batched by 0th dimension";
            }
        }
-        bool atLeastOneOutputIsBatched = false;
-        const OutputsDataMap outputInfo = network.getOutputsInfo();
-        for (const OutputsDataMap::value_type& item : outputInfo) {
-            auto layout = item.second->getTensorDesc().getLayout();
-            if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW ||
-                layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
-                layout == InferenceEngine::Layout::NDHWC) {
-                if (1 != item.second->getTensorDesc().getDims()[0])  // do not reshape/re-batch batched networks
-                    IE_THROW(NotImplemented) << "Auto-batching does not reshape/re-batch originally batched networks!";
-                else
-                    atLeastOneOutputIsBatched = true;
-            }
-        }
-        if (!atLeastOneInputIsBatched || !atLeastOneOutputIsBatched)
-            IE_THROW(NotImplemented)
-                << "Auto-batching supports only networks featuring inputs/outputs with the batched layouts !";
+        if (!batched_inputs.size())
+            IE_THROW(NotImplemented) << "Auto-batching supports only networks with inputs featuring batched dim!";
    } catch (...) {
-        // fallback to loading as if no Auto-Batching was involved
-        auto res = GetCore()->LoadNetwork(network, deviceName, deviceConfigNoAutoBatch);
-        _additionalSOPtrs.push_back(res._so);
-        return res._ptr;
+        metaDevice.batchForDevice = 1;
    }

    if (!metaDevice.batchForDevice) {
@ -802,9 +815,8 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
        // let's query the optimal batch size
        std::map<std::string, InferenceEngine::Parameter> options;
        options["MODEL_PTR"] = std::const_pointer_cast<ngraph::Function>(network.getFunction());
-        auto optBatchSize =
-            GetCore()->GetMetric(deviceName, METRIC_KEY(OPTIMAL_BATCH_SIZE), options).as<unsigned int>();
-        auto res = GetCore()->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
+        auto optBatchSize = core->GetMetric(deviceName, METRIC_KEY(OPTIMAL_BATCH_SIZE), options).as<unsigned int>();
+        auto res = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS)).as<std::string>();
        requests = PerfHintsConfig::CheckPerformanceHintRequestValue(res);
        const auto& reqs = config.find(CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS));
        if (reqs != config.end())
@ -817,8 +829,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN

    const auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT);
    const auto perfConfigInTargetPlugin =
-        GetCore()->GetConfig(deviceName, PluginConfigParams::KEY_PERF_COUNT).as<std::string>() ==
-        PluginConfigParams::YES;
+        core->GetConfig(deviceName, PluginConfigParams::KEY_PERF_COUNT).as<std::string>() == PluginConfigParams::YES;
    const bool enablePerfCounters = perfConfigInTargetPlugin || ((fullConfig.end() != perfConfig) &&
                                                                 (perfConfig->second == PluginConfigParams::YES));
    auto report_footprint = [](std::shared_ptr<ICore> pCore, std::string device) -> size_t {
@ -833,11 +844,11 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN

    size_t batch1_footprint = 0;
    if (deviceName.find("GPU") != std::string::npos)
-        batch1_footprint = report_footprint(GetCore(), deviceName);
-    auto executableNetworkWithoutBatch = ctx ? GetCore()->LoadNetwork(network, ctx, deviceConfigNoAutoBatch)
-                                             : GetCore()->LoadNetwork(network, deviceName, deviceConfigNoAutoBatch);
+        batch1_footprint = report_footprint(core, deviceName);
+    auto executableNetworkWithoutBatch = ctx ? core->LoadNetwork(network, ctx, deviceConfigNoAutoBatch)
+                                             : core->LoadNetwork(network, deviceName, deviceConfigNoAutoBatch);
    if (deviceName.find("GPU") != std::string::npos) {
-        batch1_footprint = report_footprint(GetCore(), deviceName) - batch1_footprint;
+        batch1_footprint = report_footprint(core, deviceName) - batch1_footprint;
        if (batch1_footprint) {
            const auto total_mem =
                GetCore()->GetMetric(deviceName, GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE)).as<uint64_t>();
@ -855,36 +866,20 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
    }

    InferenceEngine::SoExecutableNetworkInternal executableNetworkWithBatch;
-    if (metaDevice.batchForDevice > 1) {
+    if (metaDevice.batchForDevice > 1 && batched_inputs.size()) {
        try {
-            CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network));
-            const InputsDataMap inputInfo = clonedNetwork.getInputsInfo();
-            ICNNNetwork::InputShapes shapes = clonedNetwork.getInputShapes();
-            for (const InputsDataMap::value_type& item : inputInfo) {
-                auto layout = item.second->getTensorDesc().getLayout();
-                // the below code is a placeholder for the WIP (22.1) functionality
-                // that will check the reshaping by the batch is robust (CVS-51744)
-                if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW ||
-                    layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
-                    layout == InferenceEngine::Layout::NDHWC) {
-                    assert(1 == shapes[item.first][0]);  // do not reshape/re-batch originally batched networks
-                    shapes[item.first][0] = metaDevice.batchForDevice;
-                }
-            }
-            clonedNetwork.reshape(shapes);
-            executableNetworkWithBatch =
-                ctx ? GetCore()->LoadNetwork(CNNNetwork{clonedNetwork}, ctx, deviceConfigNoAutoBatch)
-                    : GetCore()->LoadNetwork(CNNNetwork{clonedNetwork}, deviceName, deviceConfigNoAutoBatch);
+            CNNNetwork reshaped(InferenceEngine::details::cloneNetwork(network));
+            ICNNNetwork::InputShapes shapes = reshaped.getInputShapes();
+            for (const auto& input : batched_inputs)
+                shapes[input][0] = metaDevice.batchForDevice;
+            reshaped.reshape(shapes);
+            executableNetworkWithBatch = ctx ? core->LoadNetwork(reshaped, ctx, deviceConfigNoAutoBatch)
+                                             : core->LoadNetwork(reshaped, deviceName, deviceConfigNoAutoBatch);
        } catch (...) {
-            executableNetworkWithBatch = {nullptr, nullptr};
+            metaDevice.batchForDevice = 1;
        }
    }

-    if (!executableNetworkWithBatch) {
-        executableNetworkWithBatch = executableNetworkWithoutBatch;
-        metaDevice.batchForDevice = 1;
-    }
-
    return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkWithBatch,
                                                        executableNetworkWithoutBatch,
                                                        metaDevice,
@ -902,13 +897,16 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadE
 InferenceEngine::QueryNetworkResult AutoBatchInferencePlugin::QueryNetwork(
    const InferenceEngine::CNNNetwork& network,
    const std::map<std::string, std::string>& config) const {
+    auto core = GetCore();
+    if (!core)
+        return InferenceEngine::QueryNetworkResult();
    auto cfg = config;
    for (auto c : cfg) {
        if (c.first == CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG)) {
            auto val = c.second;
            cfg.erase(c.first);
            auto metaDevice = ParseMetaDevice(val, cfg);
-            return GetCore()->QueryNetwork(network, metaDevice.deviceName, cfg);
+            return core->QueryNetwork(network, metaDevice.deviceName, cfg);
        }
    }
    IE_THROW() << "Value for KEY_AUTO_BATCH is not set";
--- a/src/plugins/auto_batch/auto_batch.hpp
+++ b/src/plugins/auto_batch/auto_batch.hpp
@ -173,7 +173,6 @@ protected:
        const InferenceEngine::CNNNetwork& network,
        const std::shared_ptr<InferenceEngine::RemoteContext> context,
        const std::map<std::string, std::string>& config);
-    std::vector<std::shared_ptr<void>> _additionalSOPtrs;
 };

 }  // namespace AutoBatchPlugin