Runtime fallback to other devices (#16015)

* Runtime fallback to other devices

* Update properties.hpp

* Update infer callback in AUTO

* Avoid some hang cases

* Add test cases for AUTO runtime fallback

* Replace mockExecutor with ImmediateExecutor

* Update the runtime fallback logic

* Update test case and support the case thar infer failed on CPU_HELP

* Update the test to detect whether to throw exception

* fix the error of CTPUT

* Add lock to AUTO executable network GetContext

* Update variable name in selectOtherDevice API

* Simplify variables and add testcase to improve test coverage

* Fix the issues when release CPU_HELP device and clean up the code

* Clean up code
This commit is contained in:
Wang Wangwang
2023-03-20 10:13:07 +08:00
committed by GitHub
parent b2a2266f60
commit 9c7f7b8338
12 changed files with 610 additions and 43 deletions

View File

@@ -19,9 +19,14 @@ namespace intel_auto {
static constexpr Property<bool> device_bind_buffer{"DEVICE_BIND_BUFFER"};
/**
* @brief auto/multi device setting that enable/disable CPU as acceleration (or helper device) at the beginning
* @brief auto device setting that enable/disable CPU as acceleration (or helper device) at the beginning
*/
static constexpr Property<bool> enable_startup_fallback{"ENABLE_STARTUP_FALLBACK"};
/**
* @brief auto device setting that enable/disable runtime fallback to other devices when infer fails on current
* selected device
*/
static constexpr Property<bool> enable_runtime_fallback{"ENABLE_RUNTIME_FALLBACK"};
} // namespace intel_auto
} // namespace ov

View File

@@ -17,8 +17,13 @@ AutoExecutableNetwork::AutoExecutableNetwork(AutoScheduleContext::Ptr& context,
}
std::shared_ptr<IE::RemoteContext> AutoExecutableNetwork::GetContext() const {
_autoSchedule->WaitActualNetworkReady();
return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetContext();
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) {
return _autoSchedule->_loadContext[FALLBACKDEVICE].executableNetwork->GetContext();
} else {
_autoSchedule->WaitActualNetworkReady();
return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetContext();
}
}
void AutoExecutableNetwork::SetConfig(const std::map<std::string, IE::Parameter>

View File

@@ -79,25 +79,114 @@ void AutoSchedule::GenerateWorkers(const std::string& device,
IdleGuard<NotBusyPriorityWorkerRequests> idleGuard{workerRequestPtr, *idleWorkerRequestsPtr};
workerRequestPtr->_exceptionPtr = exceptionPtr;
{
auto capturedTask = std::move(workerRequestPtr->_task);
capturedTask();
}
// try to return the request to the idle list (fails if the overall object destruction has began)
if (idleGuard.Release()->try_push(std::make_pair(workerRequestPtr->_index, workerRequestPtr))) {
// let's try to pop a task, as we know there is at least one idle request, schedule if succeeded
// if no device-agnostic tasks, let's try pop the device specific task, schedule if succeeded
IE::Task t;
do {
_inferPipelineTasks.try_pop(t);
} while (t && ScheduleToWorkerInferRequest(std::move(t)));
do {
_inferPipelineTasksDeviceSpecific[device]->try_pop(t);
} while (t && ScheduleToWorkerInferRequest(std::move(t), device));
auto stopRetryAndContinue = [workerRequestPtr]() {
auto capturedTask = std::move(workerRequestPtr->_task);
capturedTask();
};
// will fallback to other devices if enable _runtimeFallback
if (workerRequestPtr->_exceptionPtr != nullptr && _autoSContext->_runtimeFallback) {
bool selectOtherDeviceFlag = false;
// select other device
try {
selectOtherDeviceFlag = selectOtherDevice(device);
} catch (const IE::Exception& iie) {
LOG_DEBUG_TAG("select other devices with error: %s", iie.what());
selectOtherDeviceFlag = false;
}
if (selectOtherDeviceFlag) {
// Add end time to current workerRequest and restart the task in pipeline
workerRequestPtr->_endTimes.push_back(std::chrono::steady_clock::now());
workerRequestPtr->_fallbackExec->_task();
} else {
// continue to run the task in pipeline
stopRetryAndContinue();
}
} else {
stopRetryAndContinue();
}
// try to return the request to the idle list (fails if the overall object destruction has began)
if (idleGuard.Release()->try_push(std::make_pair(workerRequestPtr->_index, workerRequestPtr))) {
// let's try to pop a task, as we know there is at least one idle request, schedule if succeeded
// if no device-agnostic tasks, let's try pop the device specific task, schedule if succeeded
IE::Task t;
do {
_inferPipelineTasks.try_pop(t);
} while (t && ScheduleToWorkerInferRequest(std::move(t)));
do {
_inferPipelineTasksDeviceSpecific[device]->try_pop(t);
} while (t && ScheduleToWorkerInferRequest(std::move(t), device));
}
}
});
}
}
bool AutoSchedule::selectOtherDevice(const std::string& currentDeviceName) {
{
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
// a recursive function to select other devices
std::function<bool(std::string)> getExecutionDevices;
getExecutionDevices = [&](const std::string& deviceName) {
std::string realDeviceName;
bool isCPUHelp = false;
if (_autoSContext->_modelPath.empty())
_loadContext[FALLBACKDEVICE].networkPrecision = GetNetworkPrecision(_autoSContext->_network);
if (deviceName == "CPU_HELP") {
// if infer failed in CPU_HELP, we will remove CPU from _devicePriorities
// and re-run infer request when _loadContext[ACTUALDEVICE] is ready
realDeviceName = "CPU";
isCPUHelp = true;
WaitActualNetworkReady();
} else {
realDeviceName = deviceName;
}
const auto CurrentDeviceIter = std::find_if(_autoSContext->_devicePriorities.begin(), _autoSContext->_devicePriorities.end(),
[=](const DeviceInformation& d) -> bool {
return d.deviceName.find(realDeviceName) != std::string::npos;});
if (CurrentDeviceIter != _autoSContext->_devicePriorities.end()) {
if (_autoSContext->_devicePriorities.size() == 1) {
LOG_INFO_TAG("No other devices in _devicePriorities");
return false;
}
_autoSContext->_devicePriorities.erase(CurrentDeviceIter);
if (isCPUHelp) {
return true;
}
} else {
LOG_DEBUG_TAG("Already selected the fallback device");
return _loadContext[FALLBACKDEVICE].isReloadSuccess ? true : false;
}
_loadContext[FALLBACKDEVICE].metaDevices = _autoSContext->_devicePriorities;
_loadContext[FALLBACKDEVICE].isLoadSuccess = false;
_loadContext[FALLBACKDEVICE].workName = "";
_loadContext[FALLBACKDEVICE].isReloadSuccess = false;
_loadContext[FALLBACKDEVICE].deviceInfo =
_autoSContext->_plugin->SelectDevice(_autoSContext->_devicePriorities,
_loadContext[FALLBACKDEVICE].networkPrecision,
_autoSContext->_modelPriority);
try {
_loadContext[FALLBACKDEVICE].task();
// FALLBACKDEVICE need to be load again if infer failed, so reset promise here
_loadContext[FALLBACKDEVICE].promise = {};
_loadContext[FALLBACKDEVICE].future = _loadContext[FALLBACKDEVICE].promise.get_future();
} catch (const IE::Exception& iie) {
LOG_DEBUG_TAG("Load context in FALLBACKDEVICE with error: %s", iie.what());
}
if (_loadContext[FALLBACKDEVICE].isReloadSuccess) {
_loadContext[ACTUALDEVICE].isEnabled = false;
_loadContext[ACTUALDEVICE].isLoadSuccess = false;
_loadContext[ACTUALDEVICE].isAlready = false;
LOG_INFO_TAG("Select fallback device:%s", _loadContext[FALLBACKDEVICE].deviceInfo.deviceName.c_str());
return true;
} else {
// load failed or generate works failed, so reselect other devices
return getExecutionDevices(_loadContext[FALLBACKDEVICE].deviceInfo.deviceName.c_str());
}
};
return getExecutionDevices(currentDeviceName);
}
}
void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
_LogTag = sContext->_LogTag;
LOG_INFO_TAG("ExecutableNetwork start");
@@ -116,6 +205,9 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
// loadContext[ACTUALDEVICE] is always enabled,
// when there is CPU and there are more than two devices, loadContext[CPU] is enabled
_loadContext[ACTUALDEVICE].isEnabled = true;
if (_autoSContext->_runtimeFallback) {
_loadContext[FALLBACKDEVICE].isEnabled = true;
}
if (_autoSContext->_modelPath.empty())
_loadContext[ACTUALDEVICE].networkPrecision = GetNetworkPrecision(_autoSContext->_network);
_loadContext[ACTUALDEVICE].metaDevices = _autoSContext->_devicePriorities;
@@ -179,7 +271,7 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
// initialize the rest members of load context
for (int i = 0; i < CONTEXTNUM; i++) {
if (_loadContext[i].isEnabled) {
_loadContext[i].future = _loadContext[i].promise.get_future();
_loadContext[i].future = _loadContext[i].promise.get_future();
auto* contextPtr = &_loadContext[i];
auto modelPath = _autoSContext->_modelPath;
auto network = _autoSContext->_network;
@@ -197,12 +289,14 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
_autoSContext->_config.insert(contextPtr->deviceInfo.config.begin(), contextPtr->deviceInfo.config.end());
}
contextPtr->isAlready = true;
// reloadsuccess flag only for _loadContext[FALLBACKDEVICE]
contextPtr->isReloadSuccess = true;
auto& deviceName = contextPtr->deviceInfo.deviceName;
LOG_INFO_TAG("device:%s loading Network finished", deviceName.c_str());
if (!isCumulative) {
auto supported_config_keys =
_autoSContext->_core->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))
.as<std::vector<std::string>>();
.as<std::vector<std::string>>();
DEBUG_RUN([this, &contextPtr, &deviceName, &supported_config_keys] {
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
for (const auto& cfg : supported_config_keys) {
@@ -256,8 +350,14 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
_loadContext[CPU].future.wait();
// clean up helper infer requests
// first, wait for all the remaining requests to finish
for (auto& iter : _workerRequests["CPU_HELP"]) {
iter._inferRequest._ptr->Wait(IE::InferRequest::WaitMode::RESULT_READY);
if (!_autoSContext->_runtimeFallback) {
for (auto& iter : _workerRequests["CPU_HELP"]) {
try {
iter._inferRequest._ptr->Wait(IE::InferRequest::WaitMode::RESULT_READY);
} catch (const IE::Exception& iie) {
LOG_DEBUG_TAG("No infer results expected, infer in CPU_HELP throw some errors: %s", iie.what());
}
}
}
// late enough to check the idle queue now
// second, check the idle queue if all requests are in place
@@ -301,6 +401,15 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
}
};
_executor->run(std::move(recycleTask));
} else if (_autoSContext->_devicePriorities.size() != 1 && !isCumulative && _autoSContext->_runtimeFallback) {
// The performance will has some drop then _passthroughExeNet when enable ENABLE_RUNTIME_FALLBACK
for (auto&& device : _autoSContext->_devicePriorities) {
// initialize containers before run async task
_idleWorkerRequests[device.deviceName];
_workerRequests[device.deviceName];
_inferPipelineTasksDeviceSpecific[device.deviceName] = nullptr;
}
_loadContext[ACTUALDEVICE].task();
} else {
// only one device need to load network, do not need to load it async
_loadContext[ACTUALDEVICE].task();
@@ -412,13 +521,13 @@ void AutoSchedule::WaitFirstNetworkReady() {
_firstLoadFuture.wait();
}
// check if there is any device that have loaded network successfully
for (int i = CONTEXTNUM - 1; i >= 0; i--) {
for (int i = CONTEXTNUM - 2; i >= 0; i--) {
if (_loadContext[i].isEnabled && _loadContext[i].isAlready) {
return;
}
}
// the first loading is failed, wait for another loading
for (int i = CONTEXTNUM - 1; i >= 0; i--) {
for (int i = CONTEXTNUM - 2; i >= 0; i--) {
if (_loadContext[i].isEnabled) {
_loadContext[i].future.wait();
// check if loading is successful
@@ -428,7 +537,7 @@ void AutoSchedule::WaitFirstNetworkReady() {
}
}
//print errMessage
for (int i = CONTEXTNUM - 1; i >= 0; i--) {
for (int i = CONTEXTNUM - 2; i >= 0; i--) {
if (_loadContext[i].isEnabled) {
LOG_ERROR_TAG("load failed, %s", _loadContext[i].errMessage.c_str());
}
@@ -460,14 +569,18 @@ bool AutoSchedule::ScheduleToWorkerInferRequest(IE::Task inferPipelineTask, Devi
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
} else {
// _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU
if (_loadContext[ACTUALDEVICE].isAlready) {
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
if (_loadContext[FALLBACKDEVICE].isAlready) {
devices.push_back(_loadContext[FALLBACKDEVICE].deviceInfo);
} else {
// replace deviceName with workName, so schedule can select correct
// idleWorkerQueue
auto deviceInfo = _loadContext[CPU].deviceInfo;
deviceInfo.deviceName = _loadContext[CPU].workName;
devices.push_back(std::move(deviceInfo));
if (_loadContext[ACTUALDEVICE].isAlready) {
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
} else {
// replace deviceName with workName, so schedule can select correct
// idleWorkerQueue
auto deviceInfo = _loadContext[CPU].deviceInfo;
deviceInfo.deviceName = _loadContext[CPU].workName;
devices.push_back(std::move(deviceInfo));
}
}
}
for (auto&& device : devices) {

View File

@@ -18,6 +18,7 @@ struct AutoLoadContext {
std::atomic<bool> isEnabled = {false};
std::atomic<bool> isAlready = {false};
std::atomic<bool> isLoadSuccess = {false};
std::atomic<bool> isReloadSuccess = {false};
std::future<void> future;
std::promise<void> promise;
SoExecNetwork executableNetwork;
@@ -36,7 +37,8 @@ struct AutoLoadContext {
enum AutoLoadContextIndex {
CPU = 0,
ACTUALDEVICE = 1,
CONTEXTNUM = 2
FALLBACKDEVICE = 2,
CONTEXTNUM = 3
};
class AutoSchedule : public MultiSchedule {
public:
@@ -52,12 +54,15 @@ public:
protected:
void GenerateWorkers(const std::string& device, const SoExecNetwork& executableNetwork) override;
bool ScheduleToWorkerInferRequest(IE::Task, DeviceName preferred_device = "") override;
static bool RunPipelineTask(IE::Task& inferPipelineTask, NotBusyPriorityWorkerRequests& idleWorkerRequests, const DeviceName& preferred_device);
static bool RunPipelineTask(IE::Task& inferPipelineTask, NotBusyPriorityWorkerRequests& idleWorkerRequests,
const DeviceName& preferred_device);
DeviceMap<NotBusyPriorityWorkerRequests> _idleWorkerRequests;
private:
void WaitFirstNetworkReady();
void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network);
bool selectOtherDevice(const std::string& currentDeviceName);
IE::Task releaseActualdeviceTask;
private:
IE::IStreamsExecutor::Ptr _executor;

View File

@@ -42,6 +42,26 @@ using Time = std::chrono::time_point<std::chrono::steady_clock>;
template<typename T>
using DeviceMap = std::unordered_map<DeviceName, T>;
struct MultiImmediateExecutor : public IE::ITaskExecutor {
public:
/**
* @brief A shared pointer to a ImmediateExecutor object
*/
using Ptr = std::shared_ptr<MultiImmediateExecutor>;
/**
* @brief Destroys the object.
*/
~MultiImmediateExecutor() override = default;
void run(IE::Task task) override {
_task = std::move(task);
_task();
}
InferenceEngine::Task _task;
};
struct DeviceInformation {
DeviceName deviceName;
std::map<std::string, std::string> config;
@@ -58,6 +78,7 @@ struct WorkerInferRequest {
std::list<Time> _startTimes;
std::list<Time> _endTimes;
int _index = 0;
MultiImmediateExecutor::Ptr _fallbackExec;
};
using NotBusyPriorityWorkerRequests = IE::ThreadSafeBoundedPriorityQueue<std::pair<int, WorkerInferRequest*>>;
@@ -124,6 +145,7 @@ public:
bool _batchingDisabled = {false};
bool _bindBuffer = false;
bool _startupfallback = true;
bool _runtimeFallback = true;
virtual ~MultiScheduleContext() = default;
};
@@ -137,6 +159,7 @@ public:
unsigned int _modelPriority = 0;
std::string _performanceHint;
std::mutex _confMutex;
std::mutex _fallbackMutex;
MultiDeviceInferencePlugin* _plugin;
virtual ~AutoScheduleContext() = default;
};

View File

@@ -54,10 +54,11 @@ Pipeline MultiSchedule::GetPipeline(const IInferPtr& syncInferRequest, WorkerInf
}
});
} else {
MultiImmediateExecutor::Ptr _firstExecutor = std::make_shared<MultiImmediateExecutor>();
pipeline = {
// if the request is coming with device-specific remote blobs make sure it is scheduled to the specific device only:
Stage {
/*TaskExecutor*/ std::make_shared<IE::ImmediateExecutor>(), /*task*/ [this, &syncInferRequest]() {
/*TaskExecutor*/ _firstExecutor, /*task*/ [this, &syncInferRequest]() {
// by default, no preferred device:
_thisPreferredDeviceName = "";
auto execNetwork = _multiSContext->_executableNetwork.lock();
@@ -96,13 +97,18 @@ Pipeline MultiSchedule::GetPipeline(const IInferPtr& syncInferRequest, WorkerInf
multiSyncInferRequest->SetBlobsToAnotherRequest(_thisWorkerInferRequest->_inferRequest);
INFO_RUN([workerInferRequest]() {
(*workerInferRequest)->_startTimes.push_back(std::chrono::steady_clock::now());
});
});
}},
// final task in the pipeline:
Stage {
/*TaskExecutor*/std::make_shared<ThisRequestExecutor>(workerInferRequest), /*task*/ [this, &syncInferRequest, workerInferRequest]() {
if (nullptr != (*workerInferRequest)->_exceptionPtr) {
std::rethrow_exception((*workerInferRequest)->_exceptionPtr);
/*TaskExecutor*/std::make_shared<ThisRequestExecutor>(workerInferRequest, _firstExecutor), /*task*/
[this, &syncInferRequest, workerInferRequest]() {
INFO_RUN([workerInferRequest]() {
(*workerInferRequest)->_endTimes.push_back(std::chrono::steady_clock::now());
});
std::exception_ptr eptr = (*workerInferRequest)->_exceptionPtr;
if (nullptr != eptr) {
std::rethrow_exception(eptr);
}
if (_multiSContext->_needPerfCounters) {
auto multiSyncInferRequest = std::dynamic_pointer_cast<MultiDeviceInferRequest>
@@ -110,9 +116,6 @@ Pipeline MultiSchedule::GetPipeline(const IInferPtr& syncInferRequest, WorkerInf
multiSyncInferRequest->_scheduledRequest =
(*workerInferRequest)->_inferRequest;
}
INFO_RUN([workerInferRequest]() {
(*workerInferRequest)->_endTimes.push_back(std::chrono::steady_clock::now());
});
}}
};
}

View File

@@ -16,12 +16,14 @@
namespace MultiDevicePlugin {
struct ThisRequestExecutor : public IE::ITaskExecutor {
explicit ThisRequestExecutor(WorkerInferRequest** ptr): _workptrptr{ptr} {}
explicit ThisRequestExecutor(WorkerInferRequest** ptr, MultiImmediateExecutor::Ptr executor = nullptr): _workptrptr{ptr}, _fallbackExec(executor) {}
void run(IE::Task task) override {
(*_workptrptr)->_task = std::move(task);
(*_workptrptr)->_fallbackExec = _fallbackExec;
(*_workptrptr)->_inferRequest->StartAsync();
};
WorkerInferRequest** _workptrptr = nullptr;
MultiImmediateExecutor::Ptr _fallbackExec;
};
class MultiSchedule : public Schedule, public IE::ITaskExecutor {
@@ -54,7 +56,6 @@ protected:
DeviceMap<std::unique_ptr<IE::ThreadSafeQueue<IE::Task>>> _inferPipelineTasksDeviceSpecific;
DeviceMap<NotBusyWorkerRequests> _idleWorkerRequests;
DeviceMap<std::vector<WorkerInferRequest>> _workerRequests;
mutable std::mutex _mutex;
std::atomic_size_t _numRequestsCreated = {0};
MultiScheduleContext::Ptr _multiSContext;
SoExecNetwork _passthroughExeNet;

View File

@@ -485,6 +485,7 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons
autoSContext->_LogTag = _LogTag;
autoSContext->_bindBuffer = loadConfig.get_property(ov::intel_auto::device_bind_buffer);
autoSContext->_startupfallback = loadConfig.get_property(ov::intel_auto::enable_startup_fallback);
autoSContext->_runtimeFallback = loadConfig.get_property(ov::intel_auto::enable_runtime_fallback);
return std::make_shared<AutoExecutableNetwork>(autoSContext, std::make_shared<AutoSchedule>());
}
OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::LoadNetworkImpl:MultiMode");

View File

@@ -26,6 +26,7 @@ void PluginConfig::set_default() {
std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::UNDEFINED),
std::make_tuple(ov::hint::num_requests, 0, UnsignedTypeValidator()),
std::make_tuple(ov::intel_auto::enable_startup_fallback, true),
std::make_tuple(ov::intel_auto::enable_runtime_fallback, true),
// TODO 1) cache_dir 2) allow_auto_batch 3) auto_batch_timeout
std::make_tuple(ov::cache_dir, ""),
std::make_tuple(ov::hint::allow_auto_batching, true),

View File

@@ -159,6 +159,9 @@ public:
multi_supported_configKeys.erase(std::remove(
multi_supported_configKeys.begin(), multi_supported_configKeys.end(), ov::intel_auto::enable_startup_fallback.name()),
multi_supported_configKeys.end());
multi_supported_configKeys.erase(std::remove(
multi_supported_configKeys.begin(), multi_supported_configKeys.end(), ov::intel_auto::enable_runtime_fallback.name()),
multi_supported_configKeys.end());
return pluginName == "AUTO" ? supported_configKeys : multi_supported_configKeys;
}
@@ -171,6 +174,9 @@ public:
multi_supported_properties.erase(std::remove(
multi_supported_properties.begin(), multi_supported_properties.end(), ov::intel_auto::enable_startup_fallback),
multi_supported_properties.end());
multi_supported_properties.erase(std::remove(
multi_supported_properties.begin(), multi_supported_properties.end(), ov::intel_auto::enable_runtime_fallback),
multi_supported_properties.end());
return pluginName == "AUTO" ? supported_properties : multi_supported_properties;
}

View File

@@ -0,0 +1,364 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <ngraph_functions/subgraph_builders.hpp>
#include <common_test_utils/test_constants.hpp>
#include <ie_metric_helpers.hpp>
#include "mock_common.hpp"
#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
#include "plugin/mock_auto_device_plugin.hpp"
#include "plugin/mock_infer_request.hpp"
using ::testing::Throw;
using ::testing::Matches;
using ::testing::_;
using ::testing::StrEq;
using ::testing::Return;
using ::testing::InvokeWithoutArgs;
using ::testing::NiceMock;
using namespace MockMultiDevice;
using Config = std::map<std::string, std::string>;
using ConfigParams = std::tuple<std::vector<std::tuple<std::string, bool>>, int, bool, bool, bool, bool>;
class AutoRuntimeFallback : public ::testing::TestWithParam<ConfigParams> {
public:
std::shared_ptr<ngraph::Function> function;
InferenceEngine::CNNNetwork cnnNet;
std::shared_ptr<NiceMock<MockICore>> core;
std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>> plugin;
// config for Auto device
std::map<std::string, std::string> config;
std::vector<DeviceInformation> metaDevices;
//mock exeNetwork helper
ov::SoPtr<IExecutableNetworkInternal> mockExeNetwork;
ov::SoPtr<IExecutableNetworkInternal> mockExeNetworkGPU_0;
ov::SoPtr<IExecutableNetworkInternal> mockExeNetworkGPU_1;
ov::SoPtr<IExecutableNetworkInternal> mockExeNetworkVPUX;
std::shared_ptr<NiceMock<MockIInferRequestInternal>> inferReqInternal;
std::shared_ptr<NiceMock<MockIInferRequestInternal>> inferReqInternalGPU_0;
std::shared_ptr<NiceMock<MockIInferRequestInternal>> inferReqInternalGPU_1;
std::shared_ptr<NiceMock<MockIInferRequestInternal>> inferReqInternalVPUX;
std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> mockIExeNet;
std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> mockIExeNetGPU_0;
std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> mockIExeNetGPU_1;
std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> mockIExeNetVPUX;
std::shared_ptr<mockAsyncInferRequest> mockInferrequest;
std::shared_ptr<mockAsyncInferRequest> mockInferrequestGPU_0;
std::shared_ptr<mockAsyncInferRequest> mockInferrequestGPU_1;
std::shared_ptr<mockAsyncInferRequest> mockInferrequestVPUX;
std::shared_ptr<ImmediateExecutor> mockExecutor;
std::shared_ptr<ImmediateExecutor> mockExecutorGPU_0;
std::shared_ptr<ImmediateExecutor> mockExecutorGPU_1;
std::shared_ptr<ImmediateExecutor> mockExecutorVPUX;
size_t optimalNum;
public:
static std::string getTestCaseName(testing::TestParamInfo<ConfigParams> obj) {
std::vector<std::tuple<std::string, bool>> targetDevices;
int loadNetworkNum;
bool enableRumtimeFallback;
bool expectThrow;
bool loadNetworkFail;
bool generateWorkersFail;
std::tie(targetDevices, loadNetworkNum, enableRumtimeFallback, expectThrow, loadNetworkFail, generateWorkersFail) = obj.param;
std::ostringstream result;
result << "auto_runtime_fallback_";
for (auto deviceInfo : targetDevices) {
std::string deviceName;
bool ifThrow;
std::tie(deviceName, ifThrow) = deviceInfo;
result << deviceName << "_";
if (ifThrow)
result << "true_";
else
result << "false_";
}
if (enableRumtimeFallback)
result << "enableRuntimeFallback";
else
result << "disableRuntimeFallback";
if (loadNetworkFail)
result << "loadNetworkFail";
if (generateWorkersFail)
result << "generateWorkersFail";
return result.str();
}
void TearDown() override {
core.reset();
plugin.reset();
mockExeNetwork = {};
mockExeNetworkGPU_0 = {};
mockExeNetworkGPU_1 = {};
config.clear();
metaDevices.clear();
inferReqInternal.reset();
inferReqInternalGPU_0.reset();
inferReqInternalGPU_1.reset();
inferReqInternalVPUX.reset();
mockIExeNet.reset();
mockIExeNetGPU_0.reset();
mockIExeNetGPU_1.reset();
mockIExeNetVPUX.reset();
mockIExeNet.reset();
mockIExeNetGPU_0.reset();
mockIExeNetGPU_1.reset();
mockIExeNetVPUX.reset();
mockExecutor.reset();
mockExecutorGPU_0.reset();
mockExecutorGPU_1.reset();
mockExecutorVPUX.reset();
}
void SetUp() override {
// prepare mockExeNetwork
mockIExeNet = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
mockExeNetwork = {mockIExeNet, {}};
mockIExeNetGPU_0 = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
mockExeNetworkGPU_0 = {mockIExeNetGPU_0, {}};
mockIExeNetGPU_1 = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
mockExeNetworkGPU_1 = {mockIExeNetGPU_1, {}};
mockIExeNetVPUX = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
mockExeNetworkVPUX = {mockIExeNetVPUX, {}};
// prepare mockicore and cnnNetwork for loading
core = std::make_shared<NiceMock<MockICore>>();
NiceMock<MockMultiDeviceInferencePlugin>* mock_multi = new NiceMock<MockMultiDeviceInferencePlugin>();
plugin.reset(mock_multi);
function = ngraph::builder::subgraph::makeConvPoolRelu();
cnnNet = InferenceEngine::CNNNetwork(function);
plugin->SetCore(core);
IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, supportConfigs, {});
ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(Return(supportConfigs));
ON_CALL(*core, GetConfig(_, StrEq(ov::compilation_num_threads.name()))).WillByDefault(Return(12));
std::vector<std::string> availableDevs = {"CPU", "GPU.0", "GPU.1", "VPUX"};
ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(availableDevs));
std::vector<std::string> metrics = {METRIC_KEY(SUPPORTED_CONFIG_KEYS)};
ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_METRICS)), _)).WillByDefault(Return(metrics));
std::vector<std::string> configKeys = {"SUPPORTED_CONFIG_KEYS", "NUM_STREAMS"};
ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(Return(configKeys));
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(StrEq("GPU.0")),
::testing::Matcher<const Config&>(_))).WillByDefault(InvokeWithoutArgs([this]() {
std::this_thread::sleep_for(std::chrono::milliseconds(200));
return mockExeNetworkGPU_0; }));
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(StrEq("GPU.1")),
::testing::Matcher<const Config&>(_))).WillByDefault(InvokeWithoutArgs([this]() {
std::this_thread::sleep_for(std::chrono::milliseconds(200));
return mockExeNetworkGPU_1; }));
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(StrEq(CommonTestUtils::DEVICE_KEEMBAY)),
::testing::Matcher<const Config&>(_))).WillByDefault(InvokeWithoutArgs([this]() {
std::this_thread::sleep_for(std::chrono::milliseconds(200));
return mockExeNetworkVPUX; }));
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(StrEq(CommonTestUtils::DEVICE_CPU)),
::testing::Matcher<const Config&>(_))).WillByDefault(Return(mockExeNetwork));
ON_CALL(*plugin, ParseMetaDevices)
.WillByDefault(
[this](const std::string& priorityDevices, const std::map<std::string, std::string>& config) {
return plugin->MultiDeviceInferencePlugin::ParseMetaDevices(priorityDevices, config);
});
ON_CALL(*plugin, SelectDevice)
.WillByDefault([this](const std::vector<DeviceInformation>& metaDevices,
const std::string& netPrecision,
unsigned int priority) {
return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, priority);
});
ON_CALL(*plugin, GetValidDevice)
.WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
return devices;
});
ON_CALL(*plugin, GetDeviceList).WillByDefault([this](const std::map<std::string, std::string>& config) {
return plugin->MultiDeviceInferencePlugin::GetDeviceList(config);
});
ON_CALL(*plugin, SelectDevice)
.WillByDefault([this](const std::vector<DeviceInformation>& metaDevices,
const std::string& netPrecision,
unsigned int Priority) {
return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, Priority);
});
inferReqInternal = std::make_shared<NiceMock<MockIInferRequestInternal>>();
mockExecutor = std::make_shared<ImmediateExecutor>();
IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, optimalNum, 1);
ON_CALL(*mockIExeNet.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
.WillByDefault(Return(optimalNum));
inferReqInternalGPU_0 = std::make_shared<NiceMock<MockIInferRequestInternal>>();
mockExecutorGPU_0 = std::make_shared<ImmediateExecutor>();
ON_CALL(*mockIExeNetGPU_0.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
.WillByDefault(Return(optimalNum));
inferReqInternalGPU_1 = std::make_shared<NiceMock<MockIInferRequestInternal>>();
mockExecutorGPU_1 = std::make_shared<ImmediateExecutor>();
ON_CALL(*mockIExeNetGPU_1.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
.WillByDefault(Return(optimalNum));
inferReqInternalVPUX = std::make_shared<NiceMock<MockIInferRequestInternal>>();
mockExecutorVPUX = std::make_shared<ImmediateExecutor>();
ON_CALL(*mockIExeNetVPUX.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
.WillByDefault(Return(optimalNum));
}
};
TEST_P(AutoRuntimeFallback, releaseResource) {
std::string targetDev;
std::vector<std::tuple<std::string, bool>> targetDevices;
int loadNetworkNum;
bool enableRumtimeFallback;
bool expectThrow;
bool loadNetworkFail;
bool generateWorkersFail;
std::tie(targetDevices, loadNetworkNum, enableRumtimeFallback, expectThrow, loadNetworkFail, generateWorkersFail) = this->GetParam();
if (loadNetworkFail) {
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(StrEq("GPU.1")),
::testing::Matcher<const Config&>(_))).WillByDefault(Throw(InferenceEngine::GeneralError{""}));
}
for (auto& deviceInfo : targetDevices) {
std::string deviceName;
bool ifThrow;
std::tie(deviceName, ifThrow) = deviceInfo;
targetDev += deviceName;
targetDev += ((deviceInfo == targetDevices.back()) ? "" : ",");
if (deviceName == "CPU") {
mockInferrequest = std::make_shared<mockAsyncInferRequest>(
inferReqInternal, mockExecutor, nullptr, ifThrow);
ON_CALL(*mockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(mockInferrequest));
} else if (deviceName == "GPU.0") {
mockInferrequestGPU_0 = std::make_shared<mockAsyncInferRequest>(
inferReqInternalGPU_0, mockExecutorGPU_0, nullptr, ifThrow);
ON_CALL(*mockIExeNetGPU_0.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
std::this_thread::sleep_for(std::chrono::milliseconds(0));
return mockInferrequestGPU_0; }));
} else if (deviceName == "GPU.1") {
if (generateWorkersFail) {
mockInferrequestGPU_1 = std::make_shared<mockAsyncInferRequest>(
inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow);
ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest()).WillByDefault(Throw(InferenceEngine::GeneralError{""}));
} else {
mockInferrequestGPU_1 = std::make_shared<mockAsyncInferRequest>(
inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow);
ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
std::this_thread::sleep_for(std::chrono::milliseconds(0));
return mockInferrequestGPU_1; }));
}
} else if (deviceName == "VPUX") {
mockInferrequestVPUX = std::make_shared<mockAsyncInferRequest>(
inferReqInternalVPUX, mockExecutorVPUX, nullptr, ifThrow);
ON_CALL(*mockIExeNetVPUX.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
std::this_thread::sleep_for(std::chrono::milliseconds(0));
return mockInferrequestVPUX; }));
} else {
return;
}
}
plugin->SetName("AUTO");
config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
if (!enableRumtimeFallback) {
config.insert({{"ENABLE_RUNTIME_FALLBACK", "NO"}});
}
EXPECT_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(_),
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
.Times(loadNetworkNum);
std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> exeNetwork;
std::shared_ptr<IInferRequestInternal> infer_request;
ASSERT_NO_THROW(exeNetwork = plugin->LoadExeNetworkImpl(cnnNet, config));
ASSERT_NO_THROW(infer_request = exeNetwork->CreateInferRequest());
if (expectThrow) {
EXPECT_THROW(infer_request->Infer(), IE::Exception);
} else {
ASSERT_NO_THROW(infer_request->Infer());
}
}
const std::vector<ConfigParams> testConfigs = {
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}}, 2, true, true, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}}, 2, true, false, false, false},
ConfigParams{{{"GPU.0", false}, {"GPU.1", true}}, 1, true, false, false, false},
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}}, 1, true, false, false, false},
//CPU_HELP does not throw
ConfigParams{{{"GPU.0", false}, {"CPU", false}}, 2, true, false, false, false},
ConfigParams{{{"GPU.0", true}, {"CPU", false}}, 2, true, false, false, false},
//CPU_HELP throw
ConfigParams{{{"GPU.0", false}, {"CPU", true}}, 2, true, false, false, false},
ConfigParams{{{"GPU.0", true}, {"CPU", true}}, 2, true, true, false, false},
// 3 devices
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"VPUX", false}}, 1, true, false, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"VPUX", false}}, 2, true, false, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"VPUX", false}}, 3, true, false, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"VPUX", true}}, 3, true, true, false, false},
//CPU_HELP does not throw
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"CPU", false}}, 2, true, false, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"CPU", false}}, 2, true, false, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"CPU", false}}, 2, true, false, false, false},
//CPU_HELP throw
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"CPU", true}}, 2, true, false, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"CPU", true}}, 3, true, false, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"CPU", true}}, 3, true, true, false, false},
// disable RumtimeFallback
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}}, 1, false, true, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}}, 1, false, true, false, false},
ConfigParams{{{"GPU.0", false}, {"GPU.1", true}}, 1, false, false, false, false},
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}}, 1, false, false, false, false},
//CPU_HELP does not throw
ConfigParams{{{"GPU.0", false}, {"CPU", false}}, 2, false, false, false, false},
ConfigParams{{{"GPU.0", true}, {"CPU", false}}, 2, false, false, false, false},
//CPU_HELP throw
ConfigParams{{{"GPU.0", false}, {"CPU", true}}, 2, false, true, false, false},
ConfigParams{{{"GPU.0", true}, {"CPU", true}}, 2, false, true, false, false},
// 3 devices
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"VPUX", false}}, 1, false, false, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"VPUX", false}}, 1, false, true, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"VPUX", false}}, 1, false, true, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"VPUX", true}}, 1, false, true, false, false},
//CPU_HELP does not throw
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"CPU", false}}, 2, false, false, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"CPU", false}}, 2, false, false, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"CPU", false}}, 2, false, false, false, false},
//CPU_HELP throw
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"CPU", true}}, 2, false, true, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"CPU", true}}, 2, false, true, false, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"CPU", true}}, 2, false, true, false, false},
// loadFail and CreateInferRequestFail
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"VPUX", false}}, 3, true, false, true, false},
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"VPUX", false}}, 3, true, false, false, true},
};
INSTANTIATE_TEST_SUITE_P(smoke_AutoRuntimeFallback, AutoRuntimeFallback,
::testing::ValuesIn(testConfigs),
AutoRuntimeFallback::getTestCaseName);

View File

@@ -0,0 +1,40 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <gmock/gmock.h>
#include "ie_icore.hpp"
#include "plugin.hpp"
#include <iostream>
using namespace MockMultiDevicePlugin;
namespace MockMultiDevice {
class mockAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
public:
using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault;
mockAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest,
const ImmediateExecutor::Ptr& taskExecutor,
const ImmediateExecutor::Ptr& callbackExecutor,
bool ifThrow);
~mockAsyncInferRequest() override = default;
private:
bool _throw;
};
mockAsyncInferRequest::mockAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest,
const ImmediateExecutor::Ptr& taskExecutor,
const ImmediateExecutor::Ptr& callbackExecutor,
bool ifThrow)
: InferenceEngine::AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor), _throw(ifThrow) {
_pipeline = {};
_pipeline.push_back({taskExecutor,
[this] {
if (_throw)
IE_THROW();
} });
}
} // namespace MockMultiDevice