Runtime fallback to other devices (#16015)
* Runtime fallback to other devices * Update properties.hpp * Update infer callback in AUTO * Avoid some hang cases * Add test cases for AUTO runtime fallback * Replace mockExecutor with ImmediateExecutor * Update the runtime fallback logic * Update test case and support the case thar infer failed on CPU_HELP * Update the test to detect whether to throw exception * fix the error of CTPUT * Add lock to AUTO executable network GetContext * Update variable name in selectOtherDevice API * Simplify variables and add testcase to improve test coverage * Fix the issues when release CPU_HELP device and clean up the code * Clean up code
This commit is contained in:
@@ -19,9 +19,14 @@ namespace intel_auto {
|
||||
static constexpr Property<bool> device_bind_buffer{"DEVICE_BIND_BUFFER"};
|
||||
|
||||
/**
|
||||
* @brief auto/multi device setting that enable/disable CPU as acceleration (or helper device) at the beginning
|
||||
* @brief auto device setting that enable/disable CPU as acceleration (or helper device) at the beginning
|
||||
*/
|
||||
static constexpr Property<bool> enable_startup_fallback{"ENABLE_STARTUP_FALLBACK"};
|
||||
|
||||
/**
|
||||
* @brief auto device setting that enable/disable runtime fallback to other devices when infer fails on current
|
||||
* selected device
|
||||
*/
|
||||
static constexpr Property<bool> enable_runtime_fallback{"ENABLE_RUNTIME_FALLBACK"};
|
||||
} // namespace intel_auto
|
||||
} // namespace ov
|
||||
@@ -17,8 +17,13 @@ AutoExecutableNetwork::AutoExecutableNetwork(AutoScheduleContext::Ptr& context,
|
||||
}
|
||||
|
||||
std::shared_ptr<IE::RemoteContext> AutoExecutableNetwork::GetContext() const {
|
||||
_autoSchedule->WaitActualNetworkReady();
|
||||
return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetContext();
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
|
||||
if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) {
|
||||
return _autoSchedule->_loadContext[FALLBACKDEVICE].executableNetwork->GetContext();
|
||||
} else {
|
||||
_autoSchedule->WaitActualNetworkReady();
|
||||
return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetContext();
|
||||
}
|
||||
}
|
||||
|
||||
void AutoExecutableNetwork::SetConfig(const std::map<std::string, IE::Parameter>
|
||||
|
||||
@@ -79,25 +79,114 @@ void AutoSchedule::GenerateWorkers(const std::string& device,
|
||||
IdleGuard<NotBusyPriorityWorkerRequests> idleGuard{workerRequestPtr, *idleWorkerRequestsPtr};
|
||||
workerRequestPtr->_exceptionPtr = exceptionPtr;
|
||||
{
|
||||
auto capturedTask = std::move(workerRequestPtr->_task);
|
||||
capturedTask();
|
||||
}
|
||||
// try to return the request to the idle list (fails if the overall object destruction has began)
|
||||
if (idleGuard.Release()->try_push(std::make_pair(workerRequestPtr->_index, workerRequestPtr))) {
|
||||
// let's try to pop a task, as we know there is at least one idle request, schedule if succeeded
|
||||
// if no device-agnostic tasks, let's try pop the device specific task, schedule if succeeded
|
||||
IE::Task t;
|
||||
do {
|
||||
_inferPipelineTasks.try_pop(t);
|
||||
} while (t && ScheduleToWorkerInferRequest(std::move(t)));
|
||||
do {
|
||||
_inferPipelineTasksDeviceSpecific[device]->try_pop(t);
|
||||
} while (t && ScheduleToWorkerInferRequest(std::move(t), device));
|
||||
auto stopRetryAndContinue = [workerRequestPtr]() {
|
||||
auto capturedTask = std::move(workerRequestPtr->_task);
|
||||
capturedTask();
|
||||
};
|
||||
// will fallback to other devices if enable _runtimeFallback
|
||||
if (workerRequestPtr->_exceptionPtr != nullptr && _autoSContext->_runtimeFallback) {
|
||||
bool selectOtherDeviceFlag = false;
|
||||
// select other device
|
||||
try {
|
||||
selectOtherDeviceFlag = selectOtherDevice(device);
|
||||
} catch (const IE::Exception& iie) {
|
||||
LOG_DEBUG_TAG("select other devices with error: %s", iie.what());
|
||||
selectOtherDeviceFlag = false;
|
||||
}
|
||||
if (selectOtherDeviceFlag) {
|
||||
// Add end time to current workerRequest and restart the task in pipeline
|
||||
workerRequestPtr->_endTimes.push_back(std::chrono::steady_clock::now());
|
||||
workerRequestPtr->_fallbackExec->_task();
|
||||
} else {
|
||||
// continue to run the task in pipeline
|
||||
stopRetryAndContinue();
|
||||
}
|
||||
} else {
|
||||
stopRetryAndContinue();
|
||||
}
|
||||
// try to return the request to the idle list (fails if the overall object destruction has began)
|
||||
if (idleGuard.Release()->try_push(std::make_pair(workerRequestPtr->_index, workerRequestPtr))) {
|
||||
// let's try to pop a task, as we know there is at least one idle request, schedule if succeeded
|
||||
// if no device-agnostic tasks, let's try pop the device specific task, schedule if succeeded
|
||||
IE::Task t;
|
||||
do {
|
||||
_inferPipelineTasks.try_pop(t);
|
||||
} while (t && ScheduleToWorkerInferRequest(std::move(t)));
|
||||
do {
|
||||
_inferPipelineTasksDeviceSpecific[device]->try_pop(t);
|
||||
} while (t && ScheduleToWorkerInferRequest(std::move(t), device));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
bool AutoSchedule::selectOtherDevice(const std::string& currentDeviceName) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
|
||||
// a recursive function to select other devices
|
||||
std::function<bool(std::string)> getExecutionDevices;
|
||||
getExecutionDevices = [&](const std::string& deviceName) {
|
||||
std::string realDeviceName;
|
||||
bool isCPUHelp = false;
|
||||
if (_autoSContext->_modelPath.empty())
|
||||
_loadContext[FALLBACKDEVICE].networkPrecision = GetNetworkPrecision(_autoSContext->_network);
|
||||
if (deviceName == "CPU_HELP") {
|
||||
// if infer failed in CPU_HELP, we will remove CPU from _devicePriorities
|
||||
// and re-run infer request when _loadContext[ACTUALDEVICE] is ready
|
||||
realDeviceName = "CPU";
|
||||
isCPUHelp = true;
|
||||
WaitActualNetworkReady();
|
||||
} else {
|
||||
realDeviceName = deviceName;
|
||||
}
|
||||
const auto CurrentDeviceIter = std::find_if(_autoSContext->_devicePriorities.begin(), _autoSContext->_devicePriorities.end(),
|
||||
[=](const DeviceInformation& d) -> bool {
|
||||
return d.deviceName.find(realDeviceName) != std::string::npos;});
|
||||
if (CurrentDeviceIter != _autoSContext->_devicePriorities.end()) {
|
||||
if (_autoSContext->_devicePriorities.size() == 1) {
|
||||
LOG_INFO_TAG("No other devices in _devicePriorities");
|
||||
return false;
|
||||
}
|
||||
_autoSContext->_devicePriorities.erase(CurrentDeviceIter);
|
||||
if (isCPUHelp) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
LOG_DEBUG_TAG("Already selected the fallback device");
|
||||
return _loadContext[FALLBACKDEVICE].isReloadSuccess ? true : false;
|
||||
}
|
||||
_loadContext[FALLBACKDEVICE].metaDevices = _autoSContext->_devicePriorities;
|
||||
_loadContext[FALLBACKDEVICE].isLoadSuccess = false;
|
||||
_loadContext[FALLBACKDEVICE].workName = "";
|
||||
_loadContext[FALLBACKDEVICE].isReloadSuccess = false;
|
||||
_loadContext[FALLBACKDEVICE].deviceInfo =
|
||||
_autoSContext->_plugin->SelectDevice(_autoSContext->_devicePriorities,
|
||||
_loadContext[FALLBACKDEVICE].networkPrecision,
|
||||
_autoSContext->_modelPriority);
|
||||
try {
|
||||
_loadContext[FALLBACKDEVICE].task();
|
||||
// FALLBACKDEVICE need to be load again if infer failed, so reset promise here
|
||||
_loadContext[FALLBACKDEVICE].promise = {};
|
||||
_loadContext[FALLBACKDEVICE].future = _loadContext[FALLBACKDEVICE].promise.get_future();
|
||||
} catch (const IE::Exception& iie) {
|
||||
LOG_DEBUG_TAG("Load context in FALLBACKDEVICE with error: %s", iie.what());
|
||||
}
|
||||
if (_loadContext[FALLBACKDEVICE].isReloadSuccess) {
|
||||
_loadContext[ACTUALDEVICE].isEnabled = false;
|
||||
_loadContext[ACTUALDEVICE].isLoadSuccess = false;
|
||||
_loadContext[ACTUALDEVICE].isAlready = false;
|
||||
LOG_INFO_TAG("Select fallback device:%s", _loadContext[FALLBACKDEVICE].deviceInfo.deviceName.c_str());
|
||||
return true;
|
||||
} else {
|
||||
// load failed or generate works failed, so reselect other devices
|
||||
return getExecutionDevices(_loadContext[FALLBACKDEVICE].deviceInfo.deviceName.c_str());
|
||||
}
|
||||
};
|
||||
return getExecutionDevices(currentDeviceName);
|
||||
}
|
||||
}
|
||||
|
||||
void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
|
||||
_LogTag = sContext->_LogTag;
|
||||
LOG_INFO_TAG("ExecutableNetwork start");
|
||||
@@ -116,6 +205,9 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
|
||||
// loadContext[ACTUALDEVICE] is always enabled,
|
||||
// when there is CPU and there are more than two devices, loadContext[CPU] is enabled
|
||||
_loadContext[ACTUALDEVICE].isEnabled = true;
|
||||
if (_autoSContext->_runtimeFallback) {
|
||||
_loadContext[FALLBACKDEVICE].isEnabled = true;
|
||||
}
|
||||
if (_autoSContext->_modelPath.empty())
|
||||
_loadContext[ACTUALDEVICE].networkPrecision = GetNetworkPrecision(_autoSContext->_network);
|
||||
_loadContext[ACTUALDEVICE].metaDevices = _autoSContext->_devicePriorities;
|
||||
@@ -179,7 +271,7 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
|
||||
// initialize the rest members of load context
|
||||
for (int i = 0; i < CONTEXTNUM; i++) {
|
||||
if (_loadContext[i].isEnabled) {
|
||||
_loadContext[i].future = _loadContext[i].promise.get_future();
|
||||
_loadContext[i].future = _loadContext[i].promise.get_future();
|
||||
auto* contextPtr = &_loadContext[i];
|
||||
auto modelPath = _autoSContext->_modelPath;
|
||||
auto network = _autoSContext->_network;
|
||||
@@ -197,12 +289,14 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
|
||||
_autoSContext->_config.insert(contextPtr->deviceInfo.config.begin(), contextPtr->deviceInfo.config.end());
|
||||
}
|
||||
contextPtr->isAlready = true;
|
||||
// reloadsuccess flag only for _loadContext[FALLBACKDEVICE]
|
||||
contextPtr->isReloadSuccess = true;
|
||||
auto& deviceName = contextPtr->deviceInfo.deviceName;
|
||||
LOG_INFO_TAG("device:%s loading Network finished", deviceName.c_str());
|
||||
if (!isCumulative) {
|
||||
auto supported_config_keys =
|
||||
_autoSContext->_core->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))
|
||||
.as<std::vector<std::string>>();
|
||||
.as<std::vector<std::string>>();
|
||||
DEBUG_RUN([this, &contextPtr, &deviceName, &supported_config_keys] {
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
for (const auto& cfg : supported_config_keys) {
|
||||
@@ -256,8 +350,14 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
|
||||
_loadContext[CPU].future.wait();
|
||||
// clean up helper infer requests
|
||||
// first, wait for all the remaining requests to finish
|
||||
for (auto& iter : _workerRequests["CPU_HELP"]) {
|
||||
iter._inferRequest._ptr->Wait(IE::InferRequest::WaitMode::RESULT_READY);
|
||||
if (!_autoSContext->_runtimeFallback) {
|
||||
for (auto& iter : _workerRequests["CPU_HELP"]) {
|
||||
try {
|
||||
iter._inferRequest._ptr->Wait(IE::InferRequest::WaitMode::RESULT_READY);
|
||||
} catch (const IE::Exception& iie) {
|
||||
LOG_DEBUG_TAG("No infer results expected, infer in CPU_HELP throw some errors: %s", iie.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
// late enough to check the idle queue now
|
||||
// second, check the idle queue if all requests are in place
|
||||
@@ -301,6 +401,15 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
|
||||
}
|
||||
};
|
||||
_executor->run(std::move(recycleTask));
|
||||
} else if (_autoSContext->_devicePriorities.size() != 1 && !isCumulative && _autoSContext->_runtimeFallback) {
|
||||
// The performance will has some drop then _passthroughExeNet when enable ENABLE_RUNTIME_FALLBACK
|
||||
for (auto&& device : _autoSContext->_devicePriorities) {
|
||||
// initialize containers before run async task
|
||||
_idleWorkerRequests[device.deviceName];
|
||||
_workerRequests[device.deviceName];
|
||||
_inferPipelineTasksDeviceSpecific[device.deviceName] = nullptr;
|
||||
}
|
||||
_loadContext[ACTUALDEVICE].task();
|
||||
} else {
|
||||
// only one device need to load network, do not need to load it async
|
||||
_loadContext[ACTUALDEVICE].task();
|
||||
@@ -412,13 +521,13 @@ void AutoSchedule::WaitFirstNetworkReady() {
|
||||
_firstLoadFuture.wait();
|
||||
}
|
||||
// check if there is any device that have loaded network successfully
|
||||
for (int i = CONTEXTNUM - 1; i >= 0; i--) {
|
||||
for (int i = CONTEXTNUM - 2; i >= 0; i--) {
|
||||
if (_loadContext[i].isEnabled && _loadContext[i].isAlready) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// the first loading is failed, wait for another loading
|
||||
for (int i = CONTEXTNUM - 1; i >= 0; i--) {
|
||||
for (int i = CONTEXTNUM - 2; i >= 0; i--) {
|
||||
if (_loadContext[i].isEnabled) {
|
||||
_loadContext[i].future.wait();
|
||||
// check if loading is successful
|
||||
@@ -428,7 +537,7 @@ void AutoSchedule::WaitFirstNetworkReady() {
|
||||
}
|
||||
}
|
||||
//print errMessage
|
||||
for (int i = CONTEXTNUM - 1; i >= 0; i--) {
|
||||
for (int i = CONTEXTNUM - 2; i >= 0; i--) {
|
||||
if (_loadContext[i].isEnabled) {
|
||||
LOG_ERROR_TAG("load failed, %s", _loadContext[i].errMessage.c_str());
|
||||
}
|
||||
@@ -460,14 +569,18 @@ bool AutoSchedule::ScheduleToWorkerInferRequest(IE::Task inferPipelineTask, Devi
|
||||
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
|
||||
} else {
|
||||
// _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU
|
||||
if (_loadContext[ACTUALDEVICE].isAlready) {
|
||||
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
|
||||
if (_loadContext[FALLBACKDEVICE].isAlready) {
|
||||
devices.push_back(_loadContext[FALLBACKDEVICE].deviceInfo);
|
||||
} else {
|
||||
// replace deviceName with workName, so schedule can select correct
|
||||
// idleWorkerQueue
|
||||
auto deviceInfo = _loadContext[CPU].deviceInfo;
|
||||
deviceInfo.deviceName = _loadContext[CPU].workName;
|
||||
devices.push_back(std::move(deviceInfo));
|
||||
if (_loadContext[ACTUALDEVICE].isAlready) {
|
||||
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
|
||||
} else {
|
||||
// replace deviceName with workName, so schedule can select correct
|
||||
// idleWorkerQueue
|
||||
auto deviceInfo = _loadContext[CPU].deviceInfo;
|
||||
deviceInfo.deviceName = _loadContext[CPU].workName;
|
||||
devices.push_back(std::move(deviceInfo));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto&& device : devices) {
|
||||
|
||||
@@ -18,6 +18,7 @@ struct AutoLoadContext {
|
||||
std::atomic<bool> isEnabled = {false};
|
||||
std::atomic<bool> isAlready = {false};
|
||||
std::atomic<bool> isLoadSuccess = {false};
|
||||
std::atomic<bool> isReloadSuccess = {false};
|
||||
std::future<void> future;
|
||||
std::promise<void> promise;
|
||||
SoExecNetwork executableNetwork;
|
||||
@@ -36,7 +37,8 @@ struct AutoLoadContext {
|
||||
enum AutoLoadContextIndex {
|
||||
CPU = 0,
|
||||
ACTUALDEVICE = 1,
|
||||
CONTEXTNUM = 2
|
||||
FALLBACKDEVICE = 2,
|
||||
CONTEXTNUM = 3
|
||||
};
|
||||
class AutoSchedule : public MultiSchedule {
|
||||
public:
|
||||
@@ -52,12 +54,15 @@ public:
|
||||
protected:
|
||||
void GenerateWorkers(const std::string& device, const SoExecNetwork& executableNetwork) override;
|
||||
bool ScheduleToWorkerInferRequest(IE::Task, DeviceName preferred_device = "") override;
|
||||
static bool RunPipelineTask(IE::Task& inferPipelineTask, NotBusyPriorityWorkerRequests& idleWorkerRequests, const DeviceName& preferred_device);
|
||||
static bool RunPipelineTask(IE::Task& inferPipelineTask, NotBusyPriorityWorkerRequests& idleWorkerRequests,
|
||||
const DeviceName& preferred_device);
|
||||
DeviceMap<NotBusyPriorityWorkerRequests> _idleWorkerRequests;
|
||||
|
||||
private:
|
||||
void WaitFirstNetworkReady();
|
||||
void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network);
|
||||
bool selectOtherDevice(const std::string& currentDeviceName);
|
||||
IE::Task releaseActualdeviceTask;
|
||||
|
||||
private:
|
||||
IE::IStreamsExecutor::Ptr _executor;
|
||||
|
||||
@@ -42,6 +42,26 @@ using Time = std::chrono::time_point<std::chrono::steady_clock>;
|
||||
|
||||
template<typename T>
|
||||
using DeviceMap = std::unordered_map<DeviceName, T>;
|
||||
|
||||
struct MultiImmediateExecutor : public IE::ITaskExecutor {
|
||||
public:
|
||||
/**
|
||||
* @brief A shared pointer to a ImmediateExecutor object
|
||||
*/
|
||||
using Ptr = std::shared_ptr<MultiImmediateExecutor>;
|
||||
|
||||
/**
|
||||
* @brief Destroys the object.
|
||||
*/
|
||||
~MultiImmediateExecutor() override = default;
|
||||
|
||||
void run(IE::Task task) override {
|
||||
_task = std::move(task);
|
||||
_task();
|
||||
}
|
||||
InferenceEngine::Task _task;
|
||||
};
|
||||
|
||||
struct DeviceInformation {
|
||||
DeviceName deviceName;
|
||||
std::map<std::string, std::string> config;
|
||||
@@ -58,6 +78,7 @@ struct WorkerInferRequest {
|
||||
std::list<Time> _startTimes;
|
||||
std::list<Time> _endTimes;
|
||||
int _index = 0;
|
||||
MultiImmediateExecutor::Ptr _fallbackExec;
|
||||
};
|
||||
|
||||
using NotBusyPriorityWorkerRequests = IE::ThreadSafeBoundedPriorityQueue<std::pair<int, WorkerInferRequest*>>;
|
||||
@@ -124,6 +145,7 @@ public:
|
||||
bool _batchingDisabled = {false};
|
||||
bool _bindBuffer = false;
|
||||
bool _startupfallback = true;
|
||||
bool _runtimeFallback = true;
|
||||
virtual ~MultiScheduleContext() = default;
|
||||
};
|
||||
|
||||
@@ -137,6 +159,7 @@ public:
|
||||
unsigned int _modelPriority = 0;
|
||||
std::string _performanceHint;
|
||||
std::mutex _confMutex;
|
||||
std::mutex _fallbackMutex;
|
||||
MultiDeviceInferencePlugin* _plugin;
|
||||
virtual ~AutoScheduleContext() = default;
|
||||
};
|
||||
|
||||
@@ -54,10 +54,11 @@ Pipeline MultiSchedule::GetPipeline(const IInferPtr& syncInferRequest, WorkerInf
|
||||
}
|
||||
});
|
||||
} else {
|
||||
MultiImmediateExecutor::Ptr _firstExecutor = std::make_shared<MultiImmediateExecutor>();
|
||||
pipeline = {
|
||||
// if the request is coming with device-specific remote blobs make sure it is scheduled to the specific device only:
|
||||
Stage {
|
||||
/*TaskExecutor*/ std::make_shared<IE::ImmediateExecutor>(), /*task*/ [this, &syncInferRequest]() {
|
||||
/*TaskExecutor*/ _firstExecutor, /*task*/ [this, &syncInferRequest]() {
|
||||
// by default, no preferred device:
|
||||
_thisPreferredDeviceName = "";
|
||||
auto execNetwork = _multiSContext->_executableNetwork.lock();
|
||||
@@ -96,13 +97,18 @@ Pipeline MultiSchedule::GetPipeline(const IInferPtr& syncInferRequest, WorkerInf
|
||||
multiSyncInferRequest->SetBlobsToAnotherRequest(_thisWorkerInferRequest->_inferRequest);
|
||||
INFO_RUN([workerInferRequest]() {
|
||||
(*workerInferRequest)->_startTimes.push_back(std::chrono::steady_clock::now());
|
||||
});
|
||||
});
|
||||
}},
|
||||
// final task in the pipeline:
|
||||
Stage {
|
||||
/*TaskExecutor*/std::make_shared<ThisRequestExecutor>(workerInferRequest), /*task*/ [this, &syncInferRequest, workerInferRequest]() {
|
||||
if (nullptr != (*workerInferRequest)->_exceptionPtr) {
|
||||
std::rethrow_exception((*workerInferRequest)->_exceptionPtr);
|
||||
/*TaskExecutor*/std::make_shared<ThisRequestExecutor>(workerInferRequest, _firstExecutor), /*task*/
|
||||
[this, &syncInferRequest, workerInferRequest]() {
|
||||
INFO_RUN([workerInferRequest]() {
|
||||
(*workerInferRequest)->_endTimes.push_back(std::chrono::steady_clock::now());
|
||||
});
|
||||
std::exception_ptr eptr = (*workerInferRequest)->_exceptionPtr;
|
||||
if (nullptr != eptr) {
|
||||
std::rethrow_exception(eptr);
|
||||
}
|
||||
if (_multiSContext->_needPerfCounters) {
|
||||
auto multiSyncInferRequest = std::dynamic_pointer_cast<MultiDeviceInferRequest>
|
||||
@@ -110,9 +116,6 @@ Pipeline MultiSchedule::GetPipeline(const IInferPtr& syncInferRequest, WorkerInf
|
||||
multiSyncInferRequest->_scheduledRequest =
|
||||
(*workerInferRequest)->_inferRequest;
|
||||
}
|
||||
INFO_RUN([workerInferRequest]() {
|
||||
(*workerInferRequest)->_endTimes.push_back(std::chrono::steady_clock::now());
|
||||
});
|
||||
}}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -16,12 +16,14 @@
|
||||
|
||||
namespace MultiDevicePlugin {
|
||||
struct ThisRequestExecutor : public IE::ITaskExecutor {
|
||||
explicit ThisRequestExecutor(WorkerInferRequest** ptr): _workptrptr{ptr} {}
|
||||
explicit ThisRequestExecutor(WorkerInferRequest** ptr, MultiImmediateExecutor::Ptr executor = nullptr): _workptrptr{ptr}, _fallbackExec(executor) {}
|
||||
void run(IE::Task task) override {
|
||||
(*_workptrptr)->_task = std::move(task);
|
||||
(*_workptrptr)->_fallbackExec = _fallbackExec;
|
||||
(*_workptrptr)->_inferRequest->StartAsync();
|
||||
};
|
||||
WorkerInferRequest** _workptrptr = nullptr;
|
||||
MultiImmediateExecutor::Ptr _fallbackExec;
|
||||
};
|
||||
|
||||
class MultiSchedule : public Schedule, public IE::ITaskExecutor {
|
||||
@@ -54,7 +56,6 @@ protected:
|
||||
DeviceMap<std::unique_ptr<IE::ThreadSafeQueue<IE::Task>>> _inferPipelineTasksDeviceSpecific;
|
||||
DeviceMap<NotBusyWorkerRequests> _idleWorkerRequests;
|
||||
DeviceMap<std::vector<WorkerInferRequest>> _workerRequests;
|
||||
mutable std::mutex _mutex;
|
||||
std::atomic_size_t _numRequestsCreated = {0};
|
||||
MultiScheduleContext::Ptr _multiSContext;
|
||||
SoExecNetwork _passthroughExeNet;
|
||||
|
||||
@@ -485,6 +485,7 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons
|
||||
autoSContext->_LogTag = _LogTag;
|
||||
autoSContext->_bindBuffer = loadConfig.get_property(ov::intel_auto::device_bind_buffer);
|
||||
autoSContext->_startupfallback = loadConfig.get_property(ov::intel_auto::enable_startup_fallback);
|
||||
autoSContext->_runtimeFallback = loadConfig.get_property(ov::intel_auto::enable_runtime_fallback);
|
||||
return std::make_shared<AutoExecutableNetwork>(autoSContext, std::make_shared<AutoSchedule>());
|
||||
}
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::LoadNetworkImpl:MultiMode");
|
||||
|
||||
@@ -26,6 +26,7 @@ void PluginConfig::set_default() {
|
||||
std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::UNDEFINED),
|
||||
std::make_tuple(ov::hint::num_requests, 0, UnsignedTypeValidator()),
|
||||
std::make_tuple(ov::intel_auto::enable_startup_fallback, true),
|
||||
std::make_tuple(ov::intel_auto::enable_runtime_fallback, true),
|
||||
// TODO 1) cache_dir 2) allow_auto_batch 3) auto_batch_timeout
|
||||
std::make_tuple(ov::cache_dir, ""),
|
||||
std::make_tuple(ov::hint::allow_auto_batching, true),
|
||||
|
||||
@@ -159,6 +159,9 @@ public:
|
||||
multi_supported_configKeys.erase(std::remove(
|
||||
multi_supported_configKeys.begin(), multi_supported_configKeys.end(), ov::intel_auto::enable_startup_fallback.name()),
|
||||
multi_supported_configKeys.end());
|
||||
multi_supported_configKeys.erase(std::remove(
|
||||
multi_supported_configKeys.begin(), multi_supported_configKeys.end(), ov::intel_auto::enable_runtime_fallback.name()),
|
||||
multi_supported_configKeys.end());
|
||||
return pluginName == "AUTO" ? supported_configKeys : multi_supported_configKeys;
|
||||
}
|
||||
|
||||
@@ -171,6 +174,9 @@ public:
|
||||
multi_supported_properties.erase(std::remove(
|
||||
multi_supported_properties.begin(), multi_supported_properties.end(), ov::intel_auto::enable_startup_fallback),
|
||||
multi_supported_properties.end());
|
||||
multi_supported_properties.erase(std::remove(
|
||||
multi_supported_properties.begin(), multi_supported_properties.end(), ov::intel_auto::enable_runtime_fallback),
|
||||
multi_supported_properties.end());
|
||||
return pluginName == "AUTO" ? supported_properties : multi_supported_properties;
|
||||
}
|
||||
|
||||
|
||||
364
src/tests/unit/auto/auto_runtime_fallback_test.cpp
Normal file
364
src/tests/unit/auto/auto_runtime_fallback_test.cpp
Normal file
@@ -0,0 +1,364 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <gmock/gmock.h>
|
||||
|
||||
#include <ngraph_functions/subgraph_builders.hpp>
|
||||
#include <common_test_utils/test_constants.hpp>
|
||||
#include <ie_metric_helpers.hpp>
|
||||
#include "mock_common.hpp"
|
||||
|
||||
#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
|
||||
#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
|
||||
#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
|
||||
#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
|
||||
#include "plugin/mock_auto_device_plugin.hpp"
|
||||
#include "plugin/mock_infer_request.hpp"
|
||||
|
||||
using ::testing::Throw;
|
||||
using ::testing::Matches;
|
||||
using ::testing::_;
|
||||
using ::testing::StrEq;
|
||||
using ::testing::Return;
|
||||
using ::testing::InvokeWithoutArgs;
|
||||
using ::testing::NiceMock;
|
||||
|
||||
using namespace MockMultiDevice;
|
||||
using Config = std::map<std::string, std::string>;
|
||||
using ConfigParams = std::tuple<std::vector<std::tuple<std::string, bool>>, int, bool, bool, bool, bool>;
|
||||
|
||||
class AutoRuntimeFallback : public ::testing::TestWithParam<ConfigParams> {
|
||||
public:
|
||||
std::shared_ptr<ngraph::Function> function;
|
||||
InferenceEngine::CNNNetwork cnnNet;
|
||||
std::shared_ptr<NiceMock<MockICore>> core;
|
||||
std::shared_ptr<NiceMock<MockMultiDeviceInferencePlugin>> plugin;
|
||||
// config for Auto device
|
||||
std::map<std::string, std::string> config;
|
||||
std::vector<DeviceInformation> metaDevices;
|
||||
//mock exeNetwork helper
|
||||
ov::SoPtr<IExecutableNetworkInternal> mockExeNetwork;
|
||||
ov::SoPtr<IExecutableNetworkInternal> mockExeNetworkGPU_0;
|
||||
ov::SoPtr<IExecutableNetworkInternal> mockExeNetworkGPU_1;
|
||||
ov::SoPtr<IExecutableNetworkInternal> mockExeNetworkVPUX;
|
||||
|
||||
std::shared_ptr<NiceMock<MockIInferRequestInternal>> inferReqInternal;
|
||||
std::shared_ptr<NiceMock<MockIInferRequestInternal>> inferReqInternalGPU_0;
|
||||
std::shared_ptr<NiceMock<MockIInferRequestInternal>> inferReqInternalGPU_1;
|
||||
std::shared_ptr<NiceMock<MockIInferRequestInternal>> inferReqInternalVPUX;
|
||||
|
||||
std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> mockIExeNet;
|
||||
std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> mockIExeNetGPU_0;
|
||||
std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> mockIExeNetGPU_1;
|
||||
std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> mockIExeNetVPUX;
|
||||
|
||||
std::shared_ptr<mockAsyncInferRequest> mockInferrequest;
|
||||
std::shared_ptr<mockAsyncInferRequest> mockInferrequestGPU_0;
|
||||
std::shared_ptr<mockAsyncInferRequest> mockInferrequestGPU_1;
|
||||
std::shared_ptr<mockAsyncInferRequest> mockInferrequestVPUX;
|
||||
|
||||
std::shared_ptr<ImmediateExecutor> mockExecutor;
|
||||
std::shared_ptr<ImmediateExecutor> mockExecutorGPU_0;
|
||||
std::shared_ptr<ImmediateExecutor> mockExecutorGPU_1;
|
||||
std::shared_ptr<ImmediateExecutor> mockExecutorVPUX;
|
||||
|
||||
size_t optimalNum;
|
||||
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<ConfigParams> obj) {
|
||||
std::vector<std::tuple<std::string, bool>> targetDevices;
|
||||
int loadNetworkNum;
|
||||
bool enableRumtimeFallback;
|
||||
bool expectThrow;
|
||||
bool loadNetworkFail;
|
||||
bool generateWorkersFail;
|
||||
std::tie(targetDevices, loadNetworkNum, enableRumtimeFallback, expectThrow, loadNetworkFail, generateWorkersFail) = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "auto_runtime_fallback_";
|
||||
for (auto deviceInfo : targetDevices) {
|
||||
std::string deviceName;
|
||||
bool ifThrow;
|
||||
std::tie(deviceName, ifThrow) = deviceInfo;
|
||||
result << deviceName << "_";
|
||||
if (ifThrow)
|
||||
result << "true_";
|
||||
else
|
||||
result << "false_";
|
||||
}
|
||||
if (enableRumtimeFallback)
|
||||
result << "enableRuntimeFallback";
|
||||
else
|
||||
result << "disableRuntimeFallback";
|
||||
if (loadNetworkFail)
|
||||
result << "loadNetworkFail";
|
||||
if (generateWorkersFail)
|
||||
result << "generateWorkersFail";
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
core.reset();
|
||||
plugin.reset();
|
||||
mockExeNetwork = {};
|
||||
mockExeNetworkGPU_0 = {};
|
||||
mockExeNetworkGPU_1 = {};
|
||||
config.clear();
|
||||
metaDevices.clear();
|
||||
inferReqInternal.reset();
|
||||
inferReqInternalGPU_0.reset();
|
||||
inferReqInternalGPU_1.reset();
|
||||
inferReqInternalVPUX.reset();
|
||||
mockIExeNet.reset();
|
||||
mockIExeNetGPU_0.reset();
|
||||
mockIExeNetGPU_1.reset();
|
||||
mockIExeNetVPUX.reset();
|
||||
mockIExeNet.reset();
|
||||
mockIExeNetGPU_0.reset();
|
||||
mockIExeNetGPU_1.reset();
|
||||
mockIExeNetVPUX.reset();
|
||||
mockExecutor.reset();
|
||||
mockExecutorGPU_0.reset();
|
||||
mockExecutorGPU_1.reset();
|
||||
mockExecutorVPUX.reset();
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
// prepare mockExeNetwork
|
||||
mockIExeNet = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
|
||||
mockExeNetwork = {mockIExeNet, {}};
|
||||
|
||||
mockIExeNetGPU_0 = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
|
||||
mockExeNetworkGPU_0 = {mockIExeNetGPU_0, {}};
|
||||
|
||||
mockIExeNetGPU_1 = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
|
||||
mockExeNetworkGPU_1 = {mockIExeNetGPU_1, {}};
|
||||
|
||||
mockIExeNetVPUX = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
|
||||
mockExeNetworkVPUX = {mockIExeNetVPUX, {}};
|
||||
|
||||
// prepare mockicore and cnnNetwork for loading
|
||||
core = std::make_shared<NiceMock<MockICore>>();
|
||||
NiceMock<MockMultiDeviceInferencePlugin>* mock_multi = new NiceMock<MockMultiDeviceInferencePlugin>();
|
||||
plugin.reset(mock_multi);
|
||||
function = ngraph::builder::subgraph::makeConvPoolRelu();
|
||||
cnnNet = InferenceEngine::CNNNetwork(function);
|
||||
plugin->SetCore(core);
|
||||
|
||||
IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, supportConfigs, {});
|
||||
ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(Return(supportConfigs));
|
||||
ON_CALL(*core, GetConfig(_, StrEq(ov::compilation_num_threads.name()))).WillByDefault(Return(12));
|
||||
std::vector<std::string> availableDevs = {"CPU", "GPU.0", "GPU.1", "VPUX"};
|
||||
ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(availableDevs));
|
||||
|
||||
std::vector<std::string> metrics = {METRIC_KEY(SUPPORTED_CONFIG_KEYS)};
|
||||
ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_METRICS)), _)).WillByDefault(Return(metrics));
|
||||
|
||||
std::vector<std::string> configKeys = {"SUPPORTED_CONFIG_KEYS", "NUM_STREAMS"};
|
||||
ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(Return(configKeys));
|
||||
|
||||
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
::testing::Matcher<const std::string&>(StrEq("GPU.0")),
|
||||
::testing::Matcher<const Config&>(_))).WillByDefault(InvokeWithoutArgs([this]() {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200));
|
||||
return mockExeNetworkGPU_0; }));
|
||||
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
::testing::Matcher<const std::string&>(StrEq("GPU.1")),
|
||||
::testing::Matcher<const Config&>(_))).WillByDefault(InvokeWithoutArgs([this]() {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200));
|
||||
return mockExeNetworkGPU_1; }));
|
||||
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
::testing::Matcher<const std::string&>(StrEq(CommonTestUtils::DEVICE_KEEMBAY)),
|
||||
::testing::Matcher<const Config&>(_))).WillByDefault(InvokeWithoutArgs([this]() {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200));
|
||||
return mockExeNetworkVPUX; }));
|
||||
|
||||
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
::testing::Matcher<const std::string&>(StrEq(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::Matcher<const Config&>(_))).WillByDefault(Return(mockExeNetwork));
|
||||
|
||||
ON_CALL(*plugin, ParseMetaDevices)
|
||||
.WillByDefault(
|
||||
[this](const std::string& priorityDevices, const std::map<std::string, std::string>& config) {
|
||||
return plugin->MultiDeviceInferencePlugin::ParseMetaDevices(priorityDevices, config);
|
||||
});
|
||||
|
||||
ON_CALL(*plugin, SelectDevice)
|
||||
.WillByDefault([this](const std::vector<DeviceInformation>& metaDevices,
|
||||
const std::string& netPrecision,
|
||||
unsigned int priority) {
|
||||
return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, priority);
|
||||
});
|
||||
|
||||
ON_CALL(*plugin, GetValidDevice)
|
||||
.WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
|
||||
std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
|
||||
return devices;
|
||||
});
|
||||
|
||||
ON_CALL(*plugin, GetDeviceList).WillByDefault([this](const std::map<std::string, std::string>& config) {
|
||||
return plugin->MultiDeviceInferencePlugin::GetDeviceList(config);
|
||||
});
|
||||
ON_CALL(*plugin, SelectDevice)
|
||||
.WillByDefault([this](const std::vector<DeviceInformation>& metaDevices,
|
||||
const std::string& netPrecision,
|
||||
unsigned int Priority) {
|
||||
return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, Priority);
|
||||
});
|
||||
|
||||
inferReqInternal = std::make_shared<NiceMock<MockIInferRequestInternal>>();
|
||||
mockExecutor = std::make_shared<ImmediateExecutor>();
|
||||
IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, optimalNum, 1);
|
||||
ON_CALL(*mockIExeNet.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
|
||||
.WillByDefault(Return(optimalNum));
|
||||
|
||||
inferReqInternalGPU_0 = std::make_shared<NiceMock<MockIInferRequestInternal>>();
|
||||
mockExecutorGPU_0 = std::make_shared<ImmediateExecutor>();
|
||||
ON_CALL(*mockIExeNetGPU_0.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
|
||||
.WillByDefault(Return(optimalNum));
|
||||
|
||||
inferReqInternalGPU_1 = std::make_shared<NiceMock<MockIInferRequestInternal>>();
|
||||
mockExecutorGPU_1 = std::make_shared<ImmediateExecutor>();
|
||||
ON_CALL(*mockIExeNetGPU_1.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
|
||||
.WillByDefault(Return(optimalNum));
|
||||
|
||||
inferReqInternalVPUX = std::make_shared<NiceMock<MockIInferRequestInternal>>();
|
||||
mockExecutorVPUX = std::make_shared<ImmediateExecutor>();
|
||||
ON_CALL(*mockIExeNetVPUX.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
|
||||
.WillByDefault(Return(optimalNum));
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(AutoRuntimeFallback, releaseResource) {
|
||||
std::string targetDev;
|
||||
std::vector<std::tuple<std::string, bool>> targetDevices;
|
||||
int loadNetworkNum;
|
||||
bool enableRumtimeFallback;
|
||||
bool expectThrow;
|
||||
bool loadNetworkFail;
|
||||
bool generateWorkersFail;
|
||||
std::tie(targetDevices, loadNetworkNum, enableRumtimeFallback, expectThrow, loadNetworkFail, generateWorkersFail) = this->GetParam();
|
||||
if (loadNetworkFail) {
|
||||
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
::testing::Matcher<const std::string&>(StrEq("GPU.1")),
|
||||
::testing::Matcher<const Config&>(_))).WillByDefault(Throw(InferenceEngine::GeneralError{""}));
|
||||
}
|
||||
for (auto& deviceInfo : targetDevices) {
|
||||
std::string deviceName;
|
||||
bool ifThrow;
|
||||
std::tie(deviceName, ifThrow) = deviceInfo;
|
||||
targetDev += deviceName;
|
||||
targetDev += ((deviceInfo == targetDevices.back()) ? "" : ",");
|
||||
if (deviceName == "CPU") {
|
||||
mockInferrequest = std::make_shared<mockAsyncInferRequest>(
|
||||
inferReqInternal, mockExecutor, nullptr, ifThrow);
|
||||
ON_CALL(*mockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(mockInferrequest));
|
||||
} else if (deviceName == "GPU.0") {
|
||||
mockInferrequestGPU_0 = std::make_shared<mockAsyncInferRequest>(
|
||||
inferReqInternalGPU_0, mockExecutorGPU_0, nullptr, ifThrow);
|
||||
ON_CALL(*mockIExeNetGPU_0.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(0));
|
||||
return mockInferrequestGPU_0; }));
|
||||
} else if (deviceName == "GPU.1") {
|
||||
if (generateWorkersFail) {
|
||||
mockInferrequestGPU_1 = std::make_shared<mockAsyncInferRequest>(
|
||||
inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow);
|
||||
ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest()).WillByDefault(Throw(InferenceEngine::GeneralError{""}));
|
||||
} else {
|
||||
mockInferrequestGPU_1 = std::make_shared<mockAsyncInferRequest>(
|
||||
inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow);
|
||||
ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(0));
|
||||
return mockInferrequestGPU_1; }));
|
||||
}
|
||||
} else if (deviceName == "VPUX") {
|
||||
mockInferrequestVPUX = std::make_shared<mockAsyncInferRequest>(
|
||||
inferReqInternalVPUX, mockExecutorVPUX, nullptr, ifThrow);
|
||||
ON_CALL(*mockIExeNetVPUX.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(0));
|
||||
return mockInferrequestVPUX; }));
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
plugin->SetName("AUTO");
|
||||
config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
|
||||
if (!enableRumtimeFallback) {
|
||||
config.insert({{"ENABLE_RUNTIME_FALLBACK", "NO"}});
|
||||
}
|
||||
|
||||
EXPECT_CALL(*core,
|
||||
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
::testing::Matcher<const std::string&>(_),
|
||||
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
|
||||
.Times(loadNetworkNum);
|
||||
|
||||
std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> exeNetwork;
|
||||
std::shared_ptr<IInferRequestInternal> infer_request;
|
||||
|
||||
ASSERT_NO_THROW(exeNetwork = plugin->LoadExeNetworkImpl(cnnNet, config));
|
||||
ASSERT_NO_THROW(infer_request = exeNetwork->CreateInferRequest());
|
||||
if (expectThrow) {
|
||||
EXPECT_THROW(infer_request->Infer(), IE::Exception);
|
||||
} else {
|
||||
ASSERT_NO_THROW(infer_request->Infer());
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<ConfigParams> testConfigs = {
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}}, 2, true, true, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}}, 2, true, false, false, false},
|
||||
ConfigParams{{{"GPU.0", false}, {"GPU.1", true}}, 1, true, false, false, false},
|
||||
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}}, 1, true, false, false, false},
|
||||
//CPU_HELP does not throw
|
||||
ConfigParams{{{"GPU.0", false}, {"CPU", false}}, 2, true, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"CPU", false}}, 2, true, false, false, false},
|
||||
//CPU_HELP throw
|
||||
ConfigParams{{{"GPU.0", false}, {"CPU", true}}, 2, true, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"CPU", true}}, 2, true, true, false, false},
|
||||
// 3 devices
|
||||
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"VPUX", false}}, 1, true, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"VPUX", false}}, 2, true, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"VPUX", false}}, 3, true, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"VPUX", true}}, 3, true, true, false, false},
|
||||
//CPU_HELP does not throw
|
||||
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"CPU", false}}, 2, true, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"CPU", false}}, 2, true, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"CPU", false}}, 2, true, false, false, false},
|
||||
//CPU_HELP throw
|
||||
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"CPU", true}}, 2, true, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"CPU", true}}, 3, true, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"CPU", true}}, 3, true, true, false, false},
|
||||
// disable RumtimeFallback
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}}, 1, false, true, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}}, 1, false, true, false, false},
|
||||
ConfigParams{{{"GPU.0", false}, {"GPU.1", true}}, 1, false, false, false, false},
|
||||
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}}, 1, false, false, false, false},
|
||||
//CPU_HELP does not throw
|
||||
ConfigParams{{{"GPU.0", false}, {"CPU", false}}, 2, false, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"CPU", false}}, 2, false, false, false, false},
|
||||
//CPU_HELP throw
|
||||
ConfigParams{{{"GPU.0", false}, {"CPU", true}}, 2, false, true, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"CPU", true}}, 2, false, true, false, false},
|
||||
// 3 devices
|
||||
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"VPUX", false}}, 1, false, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"VPUX", false}}, 1, false, true, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"VPUX", false}}, 1, false, true, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"VPUX", true}}, 1, false, true, false, false},
|
||||
//CPU_HELP does not throw
|
||||
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"CPU", false}}, 2, false, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"CPU", false}}, 2, false, false, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"CPU", false}}, 2, false, false, false, false},
|
||||
//CPU_HELP throw
|
||||
ConfigParams{{{"GPU.0", false}, {"GPU.1", false}, {"CPU", true}}, 2, false, true, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"CPU", true}}, 2, false, true, false, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", true}, {"CPU", true}}, 2, false, true, false, false},
|
||||
// loadFail and CreateInferRequestFail
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"VPUX", false}}, 3, true, false, true, false},
|
||||
ConfigParams{{{"GPU.0", true}, {"GPU.1", false}, {"VPUX", false}}, 3, true, false, false, true},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_AutoRuntimeFallback, AutoRuntimeFallback,
|
||||
::testing::ValuesIn(testConfigs),
|
||||
AutoRuntimeFallback::getTestCaseName);
|
||||
40
src/tests/unit/auto/plugin/mock_infer_request.hpp
Normal file
40
src/tests/unit/auto/plugin/mock_infer_request.hpp
Normal file
@@ -0,0 +1,40 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
#include <gmock/gmock.h>
|
||||
#include "ie_icore.hpp"
|
||||
#include "plugin.hpp"
|
||||
#include <iostream>
|
||||
|
||||
using namespace MockMultiDevicePlugin;
|
||||
namespace MockMultiDevice {
|
||||
|
||||
class mockAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
|
||||
public:
|
||||
using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault;
|
||||
mockAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest,
|
||||
const ImmediateExecutor::Ptr& taskExecutor,
|
||||
const ImmediateExecutor::Ptr& callbackExecutor,
|
||||
bool ifThrow);
|
||||
|
||||
~mockAsyncInferRequest() override = default;
|
||||
private:
|
||||
bool _throw;
|
||||
};
|
||||
|
||||
mockAsyncInferRequest::mockAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest,
|
||||
const ImmediateExecutor::Ptr& taskExecutor,
|
||||
const ImmediateExecutor::Ptr& callbackExecutor,
|
||||
bool ifThrow)
|
||||
: InferenceEngine::AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor), _throw(ifThrow) {
|
||||
_pipeline = {};
|
||||
|
||||
_pipeline.push_back({taskExecutor,
|
||||
[this] {
|
||||
if (_throw)
|
||||
IE_THROW();
|
||||
} });
|
||||
}
|
||||
} // namespace MockMultiDevice
|
||||
Reference in New Issue
Block a user