allocate host blob from hw context (#9854)
* allocate host blob from hw context Signed-off-by: fishbell <bell.song@intel.com> * accomodate to new namespace Signed-off-by: fishbell <bell.song@intel.com> * fix build Signed-off-by: fishbell <bell.song@intel.com> * refine logic Signed-off-by: fishbell <bell.song@intel.com> * dynamic shape need to share blob with cpu infer request Signed-off-by: fishbell <bell.song@intel.com> * reuse icore from core Signed-off-by: fishbell <bell.song@intel.com> * cpu,gpu case fallback to original code, CI failure Signed-off-by: fishbell <bell.song@intel.com>
This commit is contained in:
parent
aa518282b0
commit
49c2086d23
@ -180,6 +180,13 @@ public:
|
||||
|
||||
virtual bool isNewAPI() const = 0;
|
||||
|
||||
/**
|
||||
* @brief Get a pointer to default shared context object for the specified device.
|
||||
* @param deviceName - A name of a device to get create shared context from.
|
||||
* @return A shared pointer to a default remote context.
|
||||
*/
|
||||
virtual RemoteContext::Ptr GetDefaultContext(const std::string& deviceName) = 0;
|
||||
|
||||
/**
|
||||
* @brief Default virtual destructor
|
||||
*/
|
||||
|
@ -497,6 +497,11 @@ public:
|
||||
return newAPI;
|
||||
}
|
||||
|
||||
ie::RemoteContext::Ptr GetDefaultContext(const std::string& deviceName) override {
|
||||
auto parsed = ov::parseDeviceNameIntoConfig(deviceName, ParamMap{});
|
||||
return GetCPPPluginByName(parsed._deviceName).get_default_context(parsed._config)._ptr;
|
||||
}
|
||||
|
||||
ov::SoPtr<ie::IExecutableNetworkInternal> LoadNetwork(const ie::CNNNetwork& network,
|
||||
const std::shared_ptr<ie::RemoteContext>& context,
|
||||
const std::map<std::string, std::string>& config) override {
|
||||
@ -1424,9 +1429,7 @@ RemoteContext::Ptr Core::GetDefaultContext(const std::string& deviceName) {
|
||||
if (deviceName.find("AUTO") == 0) {
|
||||
IE_THROW() << "AUTO device does not support remote context";
|
||||
}
|
||||
|
||||
auto parsed = ov::parseDeviceNameIntoConfig(deviceName, ParamMap());
|
||||
return _impl->GetCPPPluginByName(parsed._deviceName).get_default_context(parsed._config)._ptr;
|
||||
return _impl->GetDefaultContext(deviceName);
|
||||
}
|
||||
|
||||
void Core::AddExtension(IExtensionPtr extension, const std::string& deviceName_) {
|
||||
|
@ -557,16 +557,24 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create
|
||||
auto num = _numRequestsCreated++;
|
||||
size_t sum = 0;
|
||||
InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with;
|
||||
InferenceEngine::RemoteContext::Ptr ctx = nullptr;
|
||||
|
||||
if (_workModeIsAUTO) {
|
||||
if (!_loadContext[CPU].isEnabled && _loadContext[ACTUALDEVICE].isAlready) {
|
||||
auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
|
||||
if (num < dev_requests.size()) {
|
||||
request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
|
||||
try {
|
||||
ctx = GetCore()->GetDefaultContext(_loadContext[ACTUALDEVICE].deviceInfo.deviceName);
|
||||
} catch (InferenceEngine::Exception& ex) {
|
||||
// plugin does not support context, say CPU
|
||||
LOG_DEBUG("[AUTOPLUGIN]context not supported for %s, fallback to default memory",
|
||||
_loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
|
||||
// for dynamic shape support
|
||||
auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
|
||||
if (num < dev_requests.size()) {
|
||||
request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
|
||||
}
|
||||
}
|
||||
}
|
||||
// if user creates more infer request than the device optimal value, fall back to default memory
|
||||
return std::make_shared<MultiDeviceInferRequest>(inputs, outputs, request_to_share_blobs_with);
|
||||
return std::make_shared<MultiDeviceInferRequest>(inputs, outputs, request_to_share_blobs_with, ctx);
|
||||
}
|
||||
|
||||
// borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
|
||||
@ -587,16 +595,23 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create
|
||||
auto num = _numRequestsCreated++;
|
||||
size_t sum = 0;
|
||||
InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with;
|
||||
InferenceEngine::RemoteContext::Ptr ctx = nullptr;
|
||||
|
||||
if (_workModeIsAUTO) {
|
||||
if (!_loadContext[CPU].isEnabled && _loadContext[ACTUALDEVICE].isAlready) {
|
||||
auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
|
||||
if (num < dev_requests.size()) {
|
||||
request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
|
||||
try {
|
||||
ctx = GetCore()->GetDefaultContext(_loadContext[ACTUALDEVICE].deviceInfo.deviceName);
|
||||
} catch (InferenceEngine::Exception& ex) {
|
||||
// plugin does not support context
|
||||
LOG_DEBUG("[AUTOPLUGIN]context not supported for %s, fallback to default memory",
|
||||
_loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
|
||||
auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
|
||||
if (num < dev_requests.size()) {
|
||||
request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
|
||||
}
|
||||
}
|
||||
}
|
||||
// if user creates more infer request than the device optimal value, fall back to default memory
|
||||
return std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs, request_to_share_blobs_with);
|
||||
return std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs, request_to_share_blobs_with, ctx);
|
||||
}
|
||||
|
||||
// borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
|
||||
|
@ -16,19 +16,22 @@ using namespace InferenceEngine;
|
||||
// ------------------------------MultiDeviceInferRequest----------------------------
|
||||
MultiDeviceInferRequest::MultiDeviceInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
||||
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with)
|
||||
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx)
|
||||
: IInferRequestInternal(inputs, outputs) {
|
||||
CreateInferRequest(request_to_share_blobs_with);
|
||||
CreateInferRequest(request_to_share_blobs_with, ctx);
|
||||
}
|
||||
|
||||
MultiDeviceInferRequest::MultiDeviceInferRequest(const InputsDataMap& networkInputs,
|
||||
const OutputsDataMap& networkOutputs,
|
||||
const SoIInferRequestInternal & request_to_share_blobs_with)
|
||||
const SoIInferRequestInternal & request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx)
|
||||
: IInferRequestInternal(networkInputs, networkOutputs) {
|
||||
CreateInferRequest(request_to_share_blobs_with);
|
||||
CreateInferRequest(request_to_share_blobs_with, ctx);
|
||||
}
|
||||
|
||||
void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with) {
|
||||
void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx) {
|
||||
if (request_to_share_blobs_with) {
|
||||
// borrow device-friendly blobs from the request
|
||||
for (const auto &it : _networkInputs)
|
||||
@ -39,22 +42,30 @@ void MultiDeviceInferRequest::CreateInferRequest(const InferenceEngine::SoIInfer
|
||||
}
|
||||
// Allocate all input blobs
|
||||
for (const auto &it : _networkInputs) {
|
||||
Layout l = it.second->getLayout();
|
||||
Precision p = it.second->getPrecision();
|
||||
SizeVector dims = it.second->getTensorDesc().getDims();
|
||||
auto l = it.second->getLayout();
|
||||
auto p = it.second->getPrecision();
|
||||
auto dims = it.second->getTensorDesc().getDims();
|
||||
|
||||
TensorDesc desc = TensorDesc(p, dims, l);
|
||||
_inputs[it.first] = make_blob_with_precision(desc);
|
||||
if (ctx) {
|
||||
_inputs[it.first] = ctx->CreateHostBlob(desc);
|
||||
} else {
|
||||
_inputs[it.first] = make_blob_with_precision(desc);
|
||||
}
|
||||
_inputs[it.first]->allocate();
|
||||
}
|
||||
// Allocate all output blobs
|
||||
for (const auto &it : _networkOutputs) {
|
||||
Layout l = it.second->getLayout();
|
||||
Precision p = it.second->getPrecision();
|
||||
SizeVector dims = it.second->getTensorDesc().getDims();
|
||||
auto l = it.second->getLayout();
|
||||
auto p = it.second->getPrecision();
|
||||
auto dims = it.second->getTensorDesc().getDims();
|
||||
|
||||
TensorDesc desc = TensorDesc(p, dims, l);
|
||||
_outputs[it.first] = make_blob_with_precision(desc);
|
||||
if (ctx) {
|
||||
_outputs[it.first] = ctx->CreateHostBlob(desc);
|
||||
} else {
|
||||
_outputs[it.first] = make_blob_with_precision(desc);
|
||||
}
|
||||
_outputs[it.first]->allocate();
|
||||
}
|
||||
}
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
|
||||
#include "ie_remote_context.hpp"
|
||||
|
||||
#ifdef MULTIUNITTEST
|
||||
#define MOCKTESTMACRO virtual
|
||||
@ -30,17 +31,20 @@ public:
|
||||
using Ptr = std::shared_ptr<MultiDeviceInferRequest>;
|
||||
explicit MultiDeviceInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
|
||||
const InferenceEngine::OutputsDataMap& networkOutputs,
|
||||
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with);
|
||||
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx = nullptr);
|
||||
explicit MultiDeviceInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
||||
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with);
|
||||
const InferenceEngine::SoIInferRequestInternal & request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx = nullptr);
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
|
||||
void InferImpl() override;
|
||||
// Multi-Device impl specific: sets the data (blobs from the device-less requests to the specific device request)
|
||||
void SetBlobsToAnotherRequest(const InferenceEngine::SoIInferRequestInternal& req);
|
||||
|
||||
private:
|
||||
void CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with);
|
||||
void CreateInferRequest(const InferenceEngine::SoIInferRequestInternal& request_to_share_blobs_with,
|
||||
InferenceEngine::RemoteContext::Ptr ctx);
|
||||
};
|
||||
|
||||
} // namespace MultiDevicePlugin
|
||||
|
@ -38,6 +38,7 @@ public:
|
||||
MOCK_CONST_METHOD1(DeviceSupportsImportExport, bool(const std::string&)); // NOLINT not a cast to bool
|
||||
MOCK_METHOD2(GetSupportedConfig, std::map<std::string, std::string>(const std::string&, const std::map<std::string, std::string>&));
|
||||
MOCK_CONST_METHOD0(isNewAPI, bool());
|
||||
MOCK_METHOD1(GetDefaultContext, InferenceEngine::RemoteContext::Ptr(const std::string&));
|
||||
|
||||
~MockICore() = default;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user