diff --git a/src/inference/src/check_network_batchable.cpp b/src/inference/src/check_network_batchable.cpp new file mode 100644 index 00000000000..8ce148dd152 --- /dev/null +++ b/src/inference/src/check_network_batchable.cpp @@ -0,0 +1,79 @@ +#include "check_network_batchable.hpp" + +#include "dimension_tracker.hpp" +#include "ie_ngraph_utils.hpp" +#include "ngraph/opsets/opset.hpp" +#include "openvino/op/detection_output.hpp" +#include "openvino/op/ops.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/common_optimizations/dimension_tracking.hpp" +#include "transformations/init_node_info.hpp" + +namespace InferenceEngine { +namespace details { + +NetworkBatchAbility isNetworkBatchable(const CNNNetwork& orig_network, + const std::string& deviceNameWithoutBatch, + bool strictly_track_dims) { + CNNNetwork clonedNetwork(cloneNetwork(orig_network)); + auto function = clonedNetwork.getFunction(); + // find the batch dim + ov::pass::Manager m; + m.register_pass(); + m.register_pass(true, strictly_track_dims); + m.run_passes(function); + bool any_batched_inputs = false; + // do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts + // input(s) should have the batch dim as the first dim or none (current limitation of the auto-batching impl) + const auto& params = function->get_parameters(); + for (size_t input_id = 0; input_id < params.size(); input_id++) { + const auto& input = params[input_id]; + const auto& shape = input->get_partial_shape(); + // currently no plugin support batched execution for dynamic networks + if (shape.is_dynamic()) + return NetworkBatchAbility::NO; + // check the batch dim: either 0th (and the original batch size of 1) or none + if (shape.size() && ov::DimensionTracker::get_label(shape[0])) { + const auto& static_shape = input->get_shape(); + if (static_shape[0] != 1) + return NetworkBatchAbility::NO; + else + any_batched_inputs = true; + } else { + // if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch + for (size_t s = 1; s < shape.size(); s++) + if (ov::DimensionTracker::get_label(shape[s])) + return NetworkBatchAbility::NO; + } + } + if (!any_batched_inputs) + return NetworkBatchAbility::NO; + + for (auto&& node : orig_network.getFunction()->get_ops()) + node->get_rt_info()["affinity"] = "BATCH"; // default affinity (ignored if HETERO is not triggered) + // have to execute the DetectionOutput separately (without batching) + // as this layer does mix-in the values from the different inputs (batch id) + bool bDetectionOutput = false; + for (auto& result_node : orig_network.getFunction()->get_results()) { + auto do_node = result_node->input_value(0).get_node_shared_ptr(); + std::shared_ptr convert_node; + if (ov::is_type(do_node)) { // cases with do->convert->result + convert_node = do_node; + do_node = convert_node->get_input_node_shared_ptr(0); + } + // the code below doesn't need to separate the versions (opsets) of the DetectionOutput + // so base class check is enough + auto detectionOutputBase = std::dynamic_pointer_cast(do_node); + if (detectionOutputBase) { + result_node->get_rt_info()["affinity"] = deviceNameWithoutBatch; + do_node->get_rt_info()["affinity"] = deviceNameWithoutBatch; + if (convert_node) + convert_node->get_rt_info()["affinity"] = deviceNameWithoutBatch; + bDetectionOutput = true; + } + } + return bDetectionOutput ? NetworkBatchAbility::WITH_HETERO : NetworkBatchAbility::AS_IS; +} + +} // namespace details +} // namespace InferenceEngine \ No newline at end of file diff --git a/src/inference/src/check_network_batchable.hpp b/src/inference/src/check_network_batchable.hpp new file mode 100644 index 00000000000..ed0efdff4d3 --- /dev/null +++ b/src/inference/src/check_network_batchable.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once + +#include + +#include "cnn_network_ngraph_impl.hpp" + +namespace InferenceEngine { +namespace details { +/** + * @brief Checks if the input network is batch-able (e.g. no dynamic inputs, inputs has the batch dimension, etc) + * @param function A ngraph function to check for automatic-batching applicability + * @return An enum value indicating whether the network can be safely batched (with HETERO or as is) or not + */ +enum NetworkBatchAbility : uint32_t { NO = 0, AS_IS, WITH_HETERO }; +NetworkBatchAbility isNetworkBatchable(const CNNNetwork& network, + const std::string& deviceNoBatch, + bool strictly_track_dims); + +} // namespace details +} // namespace InferenceEngine diff --git a/src/inference/src/ie_core.cpp b/src/inference/src/ie_core.cpp index fac529fc855..b62a7b176d5 100644 --- a/src/inference/src/ie_core.cpp +++ b/src/inference/src/ie_core.cpp @@ -13,6 +13,7 @@ #include #include "any_copy.hpp" +#include "check_network_batchable.hpp" #include "cnn_network_ngraph_impl.hpp" #include "compilation_context.hpp" #include "cpp/ie_cnn_network.h" @@ -557,6 +558,8 @@ public: std::string& deviceName, std::map& config) { std::string deviceNameWithBatchSize, deviceNameWithoutBatch; + // fully strict dims tracking by default (Auto-Batching is enabled implicitly) + bool strictly_check_dims = true; if (deviceName.find("BATCH") != std::string::npos) { // explicitly enabled Auto-Batching auto pos = deviceName.find_first_of(":"); @@ -564,6 +567,9 @@ public: return; // BATCH device is already configured via the config deviceNameWithBatchSize = deviceName.substr(pos + 1); deviceNameWithoutBatch = DeviceIDParser::getBatchDevice(deviceNameWithBatchSize); + // when user sets the BATCH device explicitly, we may check the dims less strictly + // as the result is being checked by the user + strictly_check_dims = false; } else { // check whether the Auto-Batching is disabled explicitly const auto& batch_mode = config.find(ov::hint::allow_auto_batching.name()); @@ -594,38 +600,18 @@ public: if (bExclReqsEnabled || (!bTputInPlg && !bTputInLoadCfg)) return; } - auto function = network.getFunction(); - // have to execute the DetectionOutput separately (without batching) - // as this layer mix-in the values from the different inputs (batch id) - bool bDetectionOutput = false; - const std::string detectionOutputOpName = ngraph::op::DetectionOutput::get_type_info_static().name; - const std::string resultOpName = ngraph::op::Result::get_type_info_static().name; - for (auto&& node : function->get_ops()) { - auto isDetectionOutputParent = [&detectionOutputOpName](decltype(node)& nd) { - for (size_t n = 0; n < nd->get_input_size(); n++) { - // the code below doesn't need to separate the versions (opsets) of the DetectionOutput - // so type_info name check is enough - // (if in a future there will be a new ver that doesn't mix the batch, this will be new op) - if (detectionOutputOpName == nd->get_input_node_ptr(n)->get_type_info().name) - return true; - } - return false; - }; - - if ((detectionOutputOpName == node->get_type_info().name) || - ((resultOpName == node->get_type_info().name) && isDetectionOutputParent(node))) { - node->get_rt_info()["affinity"] = deviceNameWithoutBatch; - bDetectionOutput = true; - } else { - node->get_rt_info()["affinity"] = "BATCH"; - } - } auto batchConfig = deviceNameWithBatchSize.empty() ? deviceNameWithoutBatch : deviceNameWithBatchSize; - if (bDetectionOutput) { + auto res = InferenceEngine::details::isNetworkBatchable(network, deviceNameWithoutBatch, strictly_check_dims); + switch (res) { + case InferenceEngine::details::NetworkBatchAbility::NO: + return; + case InferenceEngine::details::NetworkBatchAbility::AS_IS: + deviceName = "BATCH:" + batchConfig; + break; + case InferenceEngine::details::NetworkBatchAbility::WITH_HETERO: deviceName = "HETERO:BATCH," + deviceNameWithoutBatch; config[CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG)] = batchConfig; - } else { - deviceName = "BATCH:" + batchConfig; + break; } } diff --git a/src/plugins/auto_batch/auto_batch.cpp b/src/plugins/auto_batch/auto_batch.cpp index 53c3ef7fca4..0c7d15514d8 100644 --- a/src/plugins/auto_batch/auto_batch.cpp +++ b/src/plugins/auto_batch/auto_batch.cpp @@ -841,7 +841,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN // find the batch dim ov::pass::Manager m; m.register_pass(); - m.register_pass(true, check_dims); + m.register_pass(false, check_dims); m.run_passes(function); // do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts // input(s) should have the batch dim as the first dim (current limitation of the auto-batching impl) @@ -871,6 +871,8 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN for (size_t output_id = 0; output_id < results.size(); output_id++) { const auto& output = results[output_id]; const auto& shape = output->get_output_partial_shape(0); + if (shape.is_dynamic()) + IE_THROW(NotImplemented) << "Auto-batching does not support dynamic networks!"; // check the batch dim: either 0th (and the original batch size of 1) or none if (shape.size() && ov::DimensionTracker::get_label(shape[0])) { if (shape[0] != 1)