avoiding layouts (#10560)

2022-02-22 12:15:19 +03:00 · 2022-02-22 12:15:19 +03:00 · 5247fdfcaf
commit 5247fdfcaf
parent 100fff83bf
2 changed files with 122 additions and 24 deletions
--- a/src/plugins/auto_batch/auto_batch.cpp
+++ b/src/plugins/auto_batch/auto_batch.cpp
@ -30,19 +30,19 @@ using namespace InferenceEngine;
 std::vector<std::string> supported_configKeys = {CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), CONFIG_KEY(AUTO_BATCH_TIMEOUT)};
 template <Precision::ePrecision precision>
-Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob, size_t batch_id, size_t batch_num) {
+Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob,
                                                    std::string name,
                                                    const std::set<std::string>& batched_names,
                                                    size_t batch_id,
                                                    size_t batch_num) {
    typedef typename PrecisionTrait<precision>::value_type TYPE;
    typedef typename std::add_pointer<TYPE>::type TYPEPTR;
    auto ptr = batched_blob->buffer().as<TYPEPTR>();
    auto sizePerBatch = batched_blob->size() / batch_num;
    auto layout = batched_blob->getTensorDesc().getLayout();
    SizeVector dims = batched_blob->getTensorDesc().getDims();
    // for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim
-    if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW ||
+    if (batched_names.count(name)) {
        layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC ||
        layout == InferenceEngine::Layout::NDHWC) {
        dims[0] = 1;
        assert(batched_blob->getTensorDesc().getPrecision() == precision);
        return make_shared_blob<TYPE>({precision, dims, batched_blob->getTensorDesc().getLayout()},
                                      ptr + sizePerBatch * batch_id,
                                      sizePerBatch);
@ -57,27 +57,32 @@ AutoBatchInferRequest::AutoBatchInferRequest(const std::vector<std::shared_ptr<c
                                             const std::vector<std::shared_ptr<const ov::Node>>& outputs,
                                             AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
                                             int batch_id,
-                                             int num_batch)
+                                             int num_batch,
                                             const std::set<std::string>& batchedInputs,
                                             const std::set<std::string>& batchedOutputs)
    : IInferRequestInternal(inputs, outputs),
      _myBatchedRequestWrapper(workerRequest),
      _batchId(batch_id),
      _batchSize(num_batch) {
-    ShareBlobsWithBatchRequest();
+    ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs);
 }
 AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs,
                                             const OutputsDataMap& networkOutputs,
                                             AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest,
                                             int batch_id,
-                                             int num_batch)
+                                             int num_batch,
                                             const std::set<std::string>& batchedInputs,
                                             const std::set<std::string>& batchedOutputs)
    : IInferRequestInternal(networkInputs, networkOutputs),
      _myBatchedRequestWrapper(workerRequest),
      _batchId(batch_id),
      _batchSize(num_batch) {
-    ShareBlobsWithBatchRequest();
+    ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs);
 }
-void AutoBatchInferRequest::ShareBlobsWithBatchRequest() {
+void AutoBatchInferRequest::ShareBlobsWithBatchRequest(const std::set<std::string>& batchedInputs,
                                                       const std::set<std::string>& batchedOutputs) {
    // Allocate all input blobs
    for (const auto& it : _networkInputs) {
        auto blob = _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first);
@ -86,78 +91,104 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest() {
        case InferenceEngine::Precision::FP32:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::I32:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::I8:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::I16:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::U16:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::U32:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::FP64:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::FP16:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::BF16:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::U64:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::I64:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::U8:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::BOOL:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedInputs,
                _batchId,
                _batchSize);
            break;
@ -174,78 +205,104 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest() {
        case InferenceEngine::Precision::FP32:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP32>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::I32:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I32>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::I8:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I8>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::I16:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I16>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::U16:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U16>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::U32:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U32>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::FP64:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP64>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::FP16:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::FP16>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::BF16:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BF16>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::U64:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U64>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::I64:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::I64>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::U8:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::U8>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
        case InferenceEngine::Precision::BOOL:
            res = create_shared_blob_on_top_of_batched_blob<InferenceEngine::Precision::BOOL>(
                _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first),
                it.first,
                batchedOutputs,
                _batchId,
                _batchSize);
            break;
@ -371,12 +428,16 @@ AutoBatchExecutableNetwork::AutoBatchExecutableNetwork(
    const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch,
    const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch,
    const DeviceInformation& networkDevice,
-    const std::unordered_map<std::string, InferenceEngine::Parameter>& config)
+    const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
    const std::set<std::string>& batchedInputs,
    const std::set<std::string>& batchedOutputs)
    : InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr,
                                                          std::make_shared<InferenceEngine::ImmediateExecutor>()),
      _network{networkWithBatch},
      _networkWithoutBatch{networkWithoutBatch},
-      _config{config} {
+      _config{config},
      _batchedInputs(batchedInputs),
      _batchedOutputs(batchedOutputs) {
    // WA for gcc 4.8 ( fails compilation with member init-list)
    _device = networkDevice;
    auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT));
@ -411,7 +472,9 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
                                                   networkOutputs,
                                                   workerRequestPtrAndId.first,
                                                   workerRequestPtrAndId.second,
-                                                   _device.batchForDevice);
+                                                   _device.batchForDevice,
                                                   _batchedInputs,
                                                   _batchedOutputs);
 }
 InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl(
@ -427,7 +490,9 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn
                                                   outputs,
                                                   workerRequestPtrAndId.first,
                                                   workerRequestPtrAndId.second,
-                                                   _device.batchForDevice);
+                                                   _device.batchForDevice,
                                                   _batchedInputs,
                                                   _batchedOutputs);
 }
 std::pair<AutoBatchExecutableNetwork::WorkerInferRequest&, int> AutoBatchExecutableNetwork::GetWorkerInferRequest() {
@ -761,6 +826,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
    deviceConfigNoAutoBatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO);
    std::set<std::string> batched_inputs;
    std::set<std::string> batched_outputs;
    // check that the auto-batching is applicable in general
    try {
        // if applicable, the Auto-Batching is implicitly enabled via the performance hints
@ -768,7 +834,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
        const bool bTputInPlg = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>() == tput;
        const auto& mode = deviceConfig.find(CONFIG_KEY(PERFORMANCE_HINT));
        const bool bTputInLoadCfg = (mode != deviceConfig.end() && mode->second == tput);
-        // if the auto-batching is enabled implicitly, we shall check the dims carefully, to avoid outstanding failures
+        // if the auto-batching is enabled implicitly, check the dims carefully, to avoid outstanding failures
        const bool check_dims = (bTputInPlg || bTputInLoadCfg);
        CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network));
        auto function = clonedNetwork.getFunction();
@ -778,7 +844,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
        m.register_pass<ov::pass::FindBatch>(true, check_dims);
        m.run_passes(function);
        // do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts
-        // input(s) should have the batch dim as the first dim or none (current limitation of the auto-batching impl)
+        // input(s) should have the batch dim as the first dim (current limitation of the auto-batching impl)
        const auto& params = function->get_parameters();
        for (size_t input_id = 0; input_id < params.size(); input_id++) {
            const auto& input = params[input_id];
@ -801,8 +867,28 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
                            << "Auto-batching operates only networks with inputs/outputs batched by 0th dimension";
            }
        }
-        if (!batched_inputs.size())
+        const auto& results = function->get_results();
-            IE_THROW(NotImplemented) << "Auto-batching supports only networks with inputs featuring batched dim!";
+        for (size_t output_id = 0; output_id < results.size(); output_id++) {
            const auto& output = results[output_id];
            const auto& shape = output->get_output_partial_shape(0);
            // check the batch dim: either 0th (and the original batch size of 1) or none
            if (shape.size() && ov::DimensionTracker::get_label(shape[0])) {
                if (shape[0] != 1)
                    IE_THROW(NotImplemented) << "Auto-batching does not reshape/re-batch originally batched networks!";
                const auto& node = output->input_value(0);
                batched_outputs.insert(ngraph::op::util::get_ie_output_name(
                    ov::Output<const ov::Node>(node.get_node(), node.get_index())));
            } else {
                // if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch
                for (size_t s = 1; s < shape.size(); s++)
                    if (ov::DimensionTracker::get_label(shape[s]))
                        IE_THROW(NotImplemented)
                            << "Auto-batching operates only networks with outputs batched by 0th dimension";
            }
        }
        if (!batched_inputs.size() || !batched_outputs.size())
            IE_THROW(NotImplemented)
                << "Auto-batching supports only networks with inputs/outputs featuring batched dim!";
    } catch (...) {
        metaDevice.batchForDevice = 1;
    }
@ -878,7 +964,9 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN
    return std::make_shared<AutoBatchExecutableNetwork>(executableNetworkWithBatch,
                                                        executableNetworkWithoutBatch,
                                                        metaDevice,
-                                                        networkConfig);
+                                                        networkConfig,
                                                        batched_inputs,
                                                        batched_outputs);
 }
 InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl(
--- a/src/plugins/auto_batch/auto_batch.hpp
+++ b/src/plugins/auto_batch/auto_batch.hpp
@ -49,7 +49,9 @@ public:
        const InferenceEngine::SoExecutableNetworkInternal& networkForDevice,
        const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch,
        const DeviceInformation& networkDevices,
-        const std::unordered_map<std::string, InferenceEngine::Parameter>& config);
+        const std::unordered_map<std::string, InferenceEngine::Parameter>& config,
        const std::set<std::string>& batchedIntputs,
        const std::set<std::string>& batchedOutputs);
    void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
    InferenceEngine::Parameter GetConfig(const std::string& name) const override;
@ -80,6 +82,9 @@ protected:
    bool _needPerfCounters = false;
    std::atomic_size_t _numRequestsCreated = {0};
    std::atomic_int _timeOut = {0};  // in ms
    const std::set<std::string> _batchedInputs;
    const std::set<std::string> _batchedOutputs;
 };
 class AutoBatchInferRequest : public InferenceEngine::IInferRequestInternal {
@ -89,12 +94,16 @@ public:
                                   const InferenceEngine::OutputsDataMap& networkOutputs,
                                   AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
                                   int batch_id,
-                                   int num_batch);
+                                   int num_batch,
                                   const std::set<std::string>& batchedIntputs,
                                   const std::set<std::string>& batchedOutputs);
    explicit AutoBatchInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
                                   const std::vector<std::shared_ptr<const ov::Node>>& outputs,
                                   AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr,
                                   int batch_id,
-                                   int num_batch);
+                                   int num_batch,
                                   const std::set<std::string>& batchedIntputs,
                                   const std::set<std::string>& batchedOutputs);
    // Batch-Device impl specific: sets the data (blobs from the device request to the batched device request)
    void SetBlobsToAnotherRequest(InferenceEngine::SoIInferRequestInternal& req);
@ -110,7 +119,8 @@ public:
 protected:
    void CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput);
-    void ShareBlobsWithBatchRequest();
+    void ShareBlobsWithBatchRequest(const std::set<std::string>& batchedIntputs,
                                    const std::set<std::string>& batchedOutputs);
    size_t _batchId;
    size_t _batchSize;
 };