allocate host blob from hw context (#9854)

* allocate host blob from hw context

Signed-off-by: fishbell <bell.song@intel.com>

* accomodate to new namespace

Signed-off-by: fishbell <bell.song@intel.com>

* fix build

Signed-off-by: fishbell <bell.song@intel.com>

* refine logic

Signed-off-by: fishbell <bell.song@intel.com>

* dynamic shape need to share blob with cpu infer request

Signed-off-by: fishbell <bell.song@intel.com>

* reuse icore from core

Signed-off-by: fishbell <bell.song@intel.com>

* cpu,gpu case fallback to original code, CI failure

Signed-off-by: fishbell <bell.song@intel.com>
This commit is contained in:
song, bell 2022-01-27 13:52:25 +08:00 committed by GitHub
parent aa518282b0
commit 49c2086d23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 70 additions and 29 deletions

View File

@ -180,6 +180,13 @@ public:
virtual bool isNewAPI() const = 0;
/**
* @brief Get a pointer to default shared context object for the specified device.
* @param deviceName - A name of a device to get create shared context from.
* @return A shared pointer to a default remote context.
*/
virtual RemoteContext::Ptr GetDefaultContext(const std::string& deviceName) = 0;
/**
* @brief Default virtual destructor
*/

View File

@ -497,6 +497,11 @@ public:
return newAPI;
}
ie::RemoteContext::Ptr GetDefaultContext(const std::string& deviceName) override {
auto parsed = ov::parseDeviceNameIntoConfig(deviceName, ParamMap{});
return GetCPPPluginByName(parsed._deviceName).get_default_context(parsed._config)._ptr;
}
ov::SoPtr<ie::IExecutableNetworkInternal> LoadNetwork(const ie::CNNNetwork& network,
const std::shared_ptr<ie::RemoteContext>& context,
const std::map<std::string, std::string>& config) override {
@ -1424,9 +1429,7 @@ RemoteContext::Ptr Core::GetDefaultContext(const std::string& deviceName) {
if (deviceName.find("AUTO") == 0) {
IE_THROW() << "AUTO device does not support remote context";
}
auto parsed = ov::parseDeviceNameIntoConfig(deviceName, ParamMap());
return _impl->GetCPPPluginByName(parsed._deviceName).get_default_context(parsed._config)._ptr;
return _impl->GetDefaultContext(deviceName);
}
void Core::AddExtension(IExtensionPtr extension, const std::string& deviceName_) {

View File

@ -557,16 +557,24 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create
auto num = _numRequestsCreated++;
size_t sum = 0;
InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with;
InferenceEngine::RemoteContext::Ptr ctx = nullptr;
if (_workModeIsAUTO) {
if (!_loadContext[CPU].isEnabled && _loadContext[ACTUALDEVICE].isAlready) {
auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
if (num < dev_requests.size()) {
request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
try {
ctx = GetCore()->GetDefaultContext(_loadContext[ACTUALDEVICE].deviceInfo.deviceName);
} catch (InferenceEngine::Exception& ex) {
// plugin does not support context, say CPU
LOG_DEBUG("[AUTOPLUGIN]context not supported for %s, fallback to default memory",
_loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
// for dynamic shape support
auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
if (num < dev_requests.size()) {
request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
}
}
}
// if user creates more infer request than the device optimal value, fall back to default memory
return std::make_shared<MultiDeviceInferRequest>(inputs, outputs, request_to_share_blobs_with);
return std::make_shared<MultiDeviceInferRequest>(inputs, outputs, request_to_share_blobs_with, ctx);
}
// borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
@ -587,16 +595,23 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create
auto num = _numRequestsCreated++;
size_t sum = 0;
InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with;
InferenceEngine::RemoteContext::Ptr ctx = nullptr;
if (_workModeIsAUTO) {
if (!_loadContext[CPU].isEnabled && _loadContext[ACTUALDEVICE].isAlready) {
auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
if (num < dev_requests.size()) {
request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
try {
ctx = GetCore()->GetDefaultContext(_loadContext[ACTUALDEVICE].deviceInfo.deviceName);
} catch (InferenceEngine::Exception& ex) {
// plugin does not support context
LOG_DEBUG("[AUTOPLUGIN]context not supported for %s, fallback to default memory",
_loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
if (num < dev_requests.size()) {
request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
}
}
}
// if user creates more infer request than the device optimal value, fall back to default memory
return std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs, request_to_share_blobs_with);
return std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs, request_to_share_blobs_with, ctx);
}
// borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests

View File

@ -16,19 +16,22 @@ using namespace InferenceEngine;
// ------------------------------MultiDeviceInferRequest----------------------------
MultiDeviceInferRequest::MultiDeviceInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with)
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx)
: IInferRequestInternal(inputs, outputs) {
CreateInferRequest(request_to_share_blobs_with);
CreateInferRequest(request_to_share_blobs_with, ctx);
}
MultiDeviceInferRequest::MultiDeviceInferRequest(const InputsDataMap& networkInputs,
const OutputsDataMap& networkOutputs,
const SoIInferRequestInternal & request_to_share_blobs_with)
const SoIInferRequestInternal & request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx)
: IInferRequestInternal(networkInputs, networkOutputs) {
CreateInferRequest(request_to_share_blobs_with);
CreateInferRequest(request_to_share_blobs_with, ctx);
}
void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with) {
void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx) {
if (request_to_share_blobs_with) {
// borrow device-friendly blobs from the request
for (const auto &it : _networkInputs)
@ -39,22 +42,30 @@ void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInfer
}
// Allocate all input blobs
for (const auto &it : _networkInputs) {
Layout l = it.second->getLayout();
Precision p = it.second->getPrecision();
SizeVector dims = it.second->getTensorDesc().getDims();
auto l = it.second->getLayout();
auto p = it.second->getPrecision();
auto dims = it.second->getTensorDesc().getDims();
TensorDesc desc = TensorDesc(p, dims, l);
_inputs[it.first] = make_blob_with_precision(desc);
if (ctx) {
_inputs[it.first] = ctx->CreateHostBlob(desc);
} else {
_inputs[it.first] = make_blob_with_precision(desc);
}
_inputs[it.first]->allocate();
}
// Allocate all output blobs
for (const auto &it : _networkOutputs) {
Layout l = it.second->getLayout();
Precision p = it.second->getPrecision();
SizeVector dims = it.second->getTensorDesc().getDims();
auto l = it.second->getLayout();
auto p = it.second->getPrecision();
auto dims = it.second->getTensorDesc().getDims();
TensorDesc desc = TensorDesc(p, dims, l);
_outputs[it.first] = make_blob_with_precision(desc);
if (ctx) {
_outputs[it.first] = ctx->CreateHostBlob(desc);
} else {
_outputs[it.first] = make_blob_with_precision(desc);
}
_outputs[it.first]->allocate();
}
}

View File

@ -15,6 +15,7 @@
#include <memory>
#include <string>
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
#include "ie_remote_context.hpp"
#ifdef MULTIUNITTEST
#define MOCKTESTMACRO virtual
@ -30,17 +31,20 @@ public:
using Ptr = std::shared_ptr<MultiDeviceInferRequest>;
explicit MultiDeviceInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
const InferenceEngine::OutputsDataMap& networkOutputs,
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with);
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx = nullptr);
explicit MultiDeviceInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with);
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx = nullptr);
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
void InferImpl() override;
// Multi-Device impl specific: sets the data (blobs from the device-less requests to the specific device request)
void SetBlobsToAnotherRequest(const InferenceEngine::SoIInferRequestInternal& req);
private:
void CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with);
void CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with,
InferenceEngine::RemoteContext::Ptr ctx);
};
} // namespace MultiDevicePlugin

View File

@ -38,6 +38,7 @@ public:
MOCK_CONST_METHOD1(DeviceSupportsImportExport, bool(const std::string&)); // NOLINT not a cast to bool
MOCK_METHOD2(GetSupportedConfig, std::map<std::string, std::string>(const std::string&, const std::map<std::string, std::string>&));
MOCK_CONST_METHOD0(isNewAPI, bool());
MOCK_METHOD1(GetDefaultContext, InferenceEngine::RemoteContext::Ptr(const std::string&));
~MockICore() = default;
};