allocate host blob from hw context (#9854)

* allocate host blob from hw context Signed-off-by: fishbell <bell.song@intel.com> * accomodate to new namespace Signed-off-by: fishbell <bell.song@intel.com> * fix build Signed-off-by: fishbell <bell.song@intel.com> * refine logic Signed-off-by: fishbell <bell.song@intel.com> * dynamic shape need to share blob with cpu infer request Signed-off-by: fishbell <bell.song@intel.com> * reuse icore from core Signed-off-by: fishbell <bell.song@intel.com> * cpu,gpu case fallback to original code, CI failure Signed-off-by: fishbell <bell.song@intel.com>
2022-01-27 13:52:25 +08:00 · 2022-01-27 13:52:25 +08:00 · 49c2086d23
commit 49c2086d23
parent aa518282b0
6 changed files with 70 additions and 29 deletions
--- a/src/inference/dev_api/ie_icore.hpp
+++ b/src/inference/dev_api/ie_icore.hpp
@ -180,6 +180,13 @@ public:

    virtual bool isNewAPI() const = 0;

+    /**
+     * @brief Get a pointer to default shared context object for the specified device.
+     * @param deviceName  - A name of a device to get create shared context from.
+     * @return A shared pointer to a default remote context.
+     */
+    virtual RemoteContext::Ptr GetDefaultContext(const std::string& deviceName) = 0;
+
    /**
     * @brief Default virtual destructor
     */
--- a/src/inference/src/ie_core.cpp
+++ b/src/inference/src/ie_core.cpp
@ -497,6 +497,11 @@ public:
        return newAPI;
    }

+    ie::RemoteContext::Ptr GetDefaultContext(const std::string& deviceName) override {
+        auto parsed = ov::parseDeviceNameIntoConfig(deviceName, ParamMap{});
+        return GetCPPPluginByName(parsed._deviceName).get_default_context(parsed._config)._ptr;
+    }
+
    ov::SoPtr<ie::IExecutableNetworkInternal> LoadNetwork(const ie::CNNNetwork& network,
                                                          const std::shared_ptr<ie::RemoteContext>& context,
                                                          const std::map<std::string, std::string>& config) override {
@ -1424,9 +1429,7 @@ RemoteContext::Ptr Core::GetDefaultContext(const std::string& deviceName) {
    if (deviceName.find("AUTO") == 0) {
        IE_THROW() << "AUTO device does not support remote context";
    }
-
-    auto parsed = ov::parseDeviceNameIntoConfig(deviceName, ParamMap());
-    return _impl->GetCPPPluginByName(parsed._deviceName).get_default_context(parsed._config)._ptr;
+    return _impl->GetDefaultContext(deviceName);
 }

 void Core::AddExtension(IExtensionPtr extension, const std::string& deviceName_) {
--- a/src/plugins/auto/executable_network.cpp
+++ b/src/plugins/auto/executable_network.cpp
@ -557,16 +557,24 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create
    auto num = _numRequestsCreated++;
    size_t sum = 0;
    InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with;
+    InferenceEngine::RemoteContext::Ptr ctx = nullptr;

    if (_workModeIsAUTO) {
        if (!_loadContext[CPU].isEnabled && _loadContext[ACTUALDEVICE].isAlready) {
-            auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
-            if (num < dev_requests.size()) {
-                request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
+            try {
+                ctx = GetCore()->GetDefaultContext(_loadContext[ACTUALDEVICE].deviceInfo.deviceName);
+            } catch (InferenceEngine::Exception& ex) {
+                // plugin does not support context, say CPU
+                LOG_DEBUG("[AUTOPLUGIN]context not supported for %s, fallback to default memory",
+                                _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
+                // for dynamic shape support
+                auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
+                if (num < dev_requests.size()) {
+                    request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
+                }
            }
        }
-        // if user creates more infer request than the device optimal value, fall back to default memory
-        return std::make_shared<MultiDeviceInferRequest>(inputs, outputs, request_to_share_blobs_with);
+        return std::make_shared<MultiDeviceInferRequest>(inputs, outputs, request_to_share_blobs_with, ctx);
    }

    // borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
@ -587,16 +595,23 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create
    auto num = _numRequestsCreated++;
    size_t sum = 0;
    InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with;
+    InferenceEngine::RemoteContext::Ptr ctx = nullptr;

    if (_workModeIsAUTO) {
        if (!_loadContext[CPU].isEnabled && _loadContext[ACTUALDEVICE].isAlready) {
-            auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
-            if (num < dev_requests.size()) {
-                request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
+            try {
+                ctx = GetCore()->GetDefaultContext(_loadContext[ACTUALDEVICE].deviceInfo.deviceName);
+            } catch (InferenceEngine::Exception& ex) {
+                // plugin does not support context
+                LOG_DEBUG("[AUTOPLUGIN]context not supported for %s, fallback to default memory",
+                                _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
+                auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
+                if (num < dev_requests.size()) {
+                    request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
+                }
            }
        }
-        // if user creates more infer request than the device optimal value, fall back to default memory
-        return std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs, request_to_share_blobs_with);
+        return std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs, request_to_share_blobs_with, ctx);
    }

    // borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
--- a/src/plugins/auto/infer_request.cpp
+++ b/src/plugins/auto/infer_request.cpp
@ -16,19 +16,22 @@ using namespace InferenceEngine;
 // ------------------------------MultiDeviceInferRequest----------------------------
 MultiDeviceInferRequest::MultiDeviceInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
                                                 const std::vector<std::shared_ptr<const ov::Node>>& outputs,
-                                                 const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with)
+                                                 const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
+                                                 InferenceEngine::RemoteContext::Ptr ctx)
        : IInferRequestInternal(inputs, outputs) {
-    CreateInferRequest(request_to_share_blobs_with);
+    CreateInferRequest(request_to_share_blobs_with, ctx);
 }

 MultiDeviceInferRequest::MultiDeviceInferRequest(const InputsDataMap&   networkInputs,
                                                 const OutputsDataMap&  networkOutputs,
-                                                 const SoIInferRequestInternal & request_to_share_blobs_with)
+                                                 const SoIInferRequestInternal & request_to_share_blobs_with,
+                                                 InferenceEngine::RemoteContext::Ptr ctx)
        : IInferRequestInternal(networkInputs, networkOutputs) {
-    CreateInferRequest(request_to_share_blobs_with);
+    CreateInferRequest(request_to_share_blobs_with, ctx);
 }

-void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with) {
+void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with,
+            InferenceEngine::RemoteContext::Ptr ctx) {
    if (request_to_share_blobs_with) {
        // borrow device-friendly blobs from the request
        for (const auto &it : _networkInputs)
@ -39,22 +42,30 @@ void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInfer
    }
    // Allocate all input blobs
    for (const auto &it : _networkInputs) {
-        Layout l = it.second->getLayout();
-        Precision p = it.second->getPrecision();
-        SizeVector dims = it.second->getTensorDesc().getDims();
+        auto l = it.second->getLayout();
+        auto p = it.second->getPrecision();
+        auto dims = it.second->getTensorDesc().getDims();

        TensorDesc desc = TensorDesc(p, dims, l);
-        _inputs[it.first] = make_blob_with_precision(desc);
+        if (ctx) {
+            _inputs[it.first] = ctx->CreateHostBlob(desc);
+        } else {
+            _inputs[it.first] = make_blob_with_precision(desc);
+        }
        _inputs[it.first]->allocate();
    }
    // Allocate all output blobs
    for (const auto &it : _networkOutputs) {
-        Layout l = it.second->getLayout();
-        Precision p = it.second->getPrecision();
-        SizeVector dims = it.second->getTensorDesc().getDims();
+        auto l = it.second->getLayout();
+        auto p = it.second->getPrecision();
+        auto dims = it.second->getTensorDesc().getDims();

        TensorDesc desc = TensorDesc(p, dims, l);
-        _outputs[it.first] = make_blob_with_precision(desc);
+        if (ctx) {
+            _outputs[it.first] = ctx->CreateHostBlob(desc);
+        } else {
+            _outputs[it.first] = make_blob_with_precision(desc);
+        }
        _outputs[it.first]->allocate();
    }
 }
--- a/src/plugins/auto/infer_request.hpp
+++ b/src/plugins/auto/infer_request.hpp
@ -15,6 +15,7 @@
 #include <memory>
 #include <string>
 #include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
+#include "ie_remote_context.hpp"

 #ifdef  MULTIUNITTEST
 #define MOCKTESTMACRO virtual
@ -30,17 +31,20 @@ public:
    using Ptr = std::shared_ptr<MultiDeviceInferRequest>;
    explicit MultiDeviceInferRequest(const InferenceEngine::InputsDataMap&  networkInputs,
                                     const InferenceEngine::OutputsDataMap& networkOutputs,
-                                     const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with);
+                                     const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
+                                     InferenceEngine::RemoteContext::Ptr ctx = nullptr);
    explicit MultiDeviceInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
                                     const std::vector<std::shared_ptr<const ov::Node>>& outputs,
-                                     const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with);
+                                     const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
+                                     InferenceEngine::RemoteContext::Ptr ctx = nullptr);
    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
    void InferImpl() override;
    // Multi-Device impl specific: sets the data (blobs from the device-less requests to the specific device request)
    void SetBlobsToAnotherRequest(const InferenceEngine::SoIInferRequestInternal& req);

 private:
-    void CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with);
+    void CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with,
+                            InferenceEngine::RemoteContext::Ptr ctx);
 };

 }  // namespace MultiDevicePlugin
--- a/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp
+++ b/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp
@ -38,6 +38,7 @@ public:
    MOCK_CONST_METHOD1(DeviceSupportsImportExport, bool(const std::string&)); // NOLINT not a cast to bool
    MOCK_METHOD2(GetSupportedConfig, std::map<std::string, std::string>(const std::string&, const std::map<std::string, std::string>&));
    MOCK_CONST_METHOD0(isNewAPI, bool());
+    MOCK_METHOD1(GetDefaultContext, InferenceEngine::RemoteContext::Ptr(const std::string&));

    ~MockICore() = default;
 };