Implement CTPUT in AUTO code logic (#16220)
* Implement CTPUT in AUTO code logic * Add logic to handle device loading failure * add some code comments * fix warnning conversion from size_t to int * Updated code according to comments of bell and wanglei * the preferred device code path need to be updated with ctput also * add fallback logic for CTPUT * Modify the code logic according to bell suggestion * Add prints for debugging bug * throw exception when no device to run pipline task * initialize idleWorkerRequest for CTPUT * fix getting properties Signed-off-by: fishbell <bell.song@intel.com> refine Signed-off-by: fishbell <bell.song@intel.com> * fix warning Signed-off-by: fishbell <bell.song@intel.com> * fix illegal character on windows Signed-off-by: fishbell <bell.song@intel.com> * fix illegal character Signed-off-by: fishbell <bell.song@intel.com> add missing include Signed-off-by: fishbell <bell.song@intel.com> * more code refine Signed-off-by: fishbell <bell.song@intel.com> --------- Signed-off-by: fishbell <bell.song@intel.com> Co-authored-by: fishbell <bell.song@intel.com>
This commit is contained in:
parent
e66b837104
commit
60ab7490bf
@ -64,9 +64,8 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
|
||||
return decltype(ov::device::priorities)::value_type {value->second.as<std::string>()};
|
||||
} else if (name == ov::device::properties) {
|
||||
ov::AnyMap all_devices = {};
|
||||
if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
|
||||
ov::AnyMap device_properties = {};
|
||||
auto& context = _autoSchedule->_loadContext[ACTUALDEVICE];
|
||||
auto get_device_supported_metrics = [&all_devices] (const AutoLoadContext& context) {
|
||||
ov::AnyMap device_properties = {};
|
||||
auto device_supported_metrics = context.executableNetwork->GetMetric(METRIC_KEY(SUPPORTED_METRICS));
|
||||
for (auto&& property_name : device_supported_metrics.as<std::vector<std::string>>()) {
|
||||
device_properties[property_name] = context.executableNetwork->GetMetric(property_name);
|
||||
@ -76,6 +75,26 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
|
||||
device_properties[property_name] = context.executableNetwork->GetConfig(property_name);
|
||||
}
|
||||
all_devices[context.deviceInfo.deviceName] = device_properties;
|
||||
};
|
||||
if (_autoSchedule->_pCTPUTLoadContext) {
|
||||
// need lock for inference failure
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
|
||||
auto load_count = _autoSContext->_devicePriorities.size();
|
||||
for (size_t i = 0; i < load_count; i++)
|
||||
get_device_supported_metrics(_autoSchedule->_pCTPUTLoadContext[i]);
|
||||
} else {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
|
||||
if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) {
|
||||
get_device_supported_metrics(_autoSchedule->_loadContext[FALLBACKDEVICE]);
|
||||
}
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
|
||||
get_device_supported_metrics(_autoSchedule->_loadContext[ACTUALDEVICE]);
|
||||
} else {
|
||||
get_device_supported_metrics(_autoSchedule->_loadContext[CPU]);
|
||||
}
|
||||
}
|
||||
return all_devices;
|
||||
} else if (name == ov::hint::model_priority) {
|
||||
@ -91,6 +110,24 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
|
||||
const unsigned int defaultNumForTPUT = 4u;
|
||||
const unsigned int defaultNumForLatency = 1u;
|
||||
unsigned int real = 0;
|
||||
if (_autoSchedule->_pCTPUTLoadContext) {
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
|
||||
unsigned int res = 0u;
|
||||
auto load_count = _autoSContext->_devicePriorities.size();
|
||||
for (size_t i = 0; i < load_count; i++) {
|
||||
try {
|
||||
res += (_autoSchedule->_pCTPUTLoadContext[i]).executableNetwork->GetMetric(
|
||||
METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
|
||||
} catch (const IE::Exception& iie) {
|
||||
IE_THROW()
|
||||
<< "Every device used in cumulative mode should "
|
||||
<< "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
|
||||
<< "Failed to query the metric for with error:" <<
|
||||
iie.what();
|
||||
}
|
||||
}
|
||||
return decltype(ov::optimal_number_of_infer_requests)::value_type {res};
|
||||
}
|
||||
if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
|
||||
real = _autoSchedule->_loadContext[ACTUALDEVICE].
|
||||
executableNetwork->GetMetric(name).as<unsigned int>();
|
||||
@ -181,12 +218,13 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
|
||||
exeDevices.push_back(ExeDevicesString);
|
||||
execution_devices = decltype(ov::execution_devices)::value_type {exeDevices};
|
||||
};
|
||||
if (_autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) {
|
||||
try {
|
||||
execution_devices = _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
|
||||
} catch(const IE::Exception&) {
|
||||
GetExecutionDevices(_autoSchedule->_loadContext[ACTUALDEVICE].workName);
|
||||
if (_autoSchedule->_pCTPUTLoadContext) {
|
||||
std::vector<std::string> exeDevices = {};
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
for (auto n : _autoSContext->_devicePriorities) {
|
||||
exeDevices.push_back(n.deviceName);
|
||||
}
|
||||
execution_devices = decltype(ov::execution_devices)::value_type {exeDevices};
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
for (int i = 0; i < CONTEXTNUM; i++) {
|
||||
@ -203,9 +241,13 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
|
||||
return execution_devices;
|
||||
} else if (name == ov::model_name) {
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
if (_autoSchedule->_loadContext[CPU].isEnabled && _autoSchedule->_loadContext[CPU].isAlready)
|
||||
return _autoSchedule->_loadContext[CPU].executableNetwork->GetMetric(name);
|
||||
return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
|
||||
if (_autoSchedule->_pCTPUTLoadContext) {
|
||||
return _autoSchedule->_pCTPUTLoadContext[0].executableNetwork->GetMetric(name);
|
||||
} else {
|
||||
if (_autoSchedule->_loadContext[CPU].isEnabled && _autoSchedule->_loadContext[CPU].isAlready)
|
||||
return _autoSchedule->_loadContext[CPU].executableNetwork->GetMetric(name);
|
||||
return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
|
||||
}
|
||||
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
|
||||
{METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),
|
||||
|
@ -3,7 +3,6 @@
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "auto_schedule.hpp"
|
||||
#include "async_infer_request.hpp"
|
||||
#include "auto_executable_network.hpp"
|
||||
@ -183,6 +182,27 @@ bool AutoSchedule::selectOtherDevice(const std::string& currentDeviceName) {
|
||||
return getExecutionDevices(_loadContext[FALLBACKDEVICE].deviceInfo.deviceName.c_str());
|
||||
}
|
||||
};
|
||||
|
||||
auto removeInferFailDevice = [&](const std::string& deviceName) {
|
||||
if (_autoSContext->_devicePriorities.size() > 1) {
|
||||
const auto CurrentDeviceIter =
|
||||
std::find_if(_autoSContext->_devicePriorities.begin(),
|
||||
_autoSContext->_devicePriorities.end(),
|
||||
[=](const DeviceInformation& d) -> bool {
|
||||
return d.deviceName.find(deviceName) != std::string::npos;
|
||||
});
|
||||
if (CurrentDeviceIter != _autoSContext->_devicePriorities.end()) {
|
||||
_autoSContext->_devicePriorities.erase(CurrentDeviceIter);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
if (_pCTPUTLoadContext) {
|
||||
return removeInferFailDevice(currentDeviceName);
|
||||
}
|
||||
|
||||
return getExecutionDevices(currentDeviceName);
|
||||
}
|
||||
}
|
||||
@ -217,30 +237,40 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
|
||||
std::list<DeviceInformation> validDevices =
|
||||
_autoSContext->_plugin->GetValidDevice(_autoSContext->_devicePriorities,
|
||||
_loadContext[ACTUALDEVICE].networkPrecision);
|
||||
// When the hint is ctput and there is only one device, the single-device logic is used
|
||||
if (validDevices.size() == 1) {
|
||||
// When the hint is ctput and there is only one device, the single-device logic is used instead of
|
||||
// the MULTI logic
|
||||
// can not change _autoSContext->_performanceHint to THROUGHPUT, because GetMetric needs to return CTPUT
|
||||
_loadContext[ACTUALDEVICE].deviceInfo = validDevices.front();
|
||||
_loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
|
||||
IE::PluginConfigParams::THROUGHPUT;
|
||||
isCumulative = false;
|
||||
} else {
|
||||
// When the hint is ctput and there are more than one device, the MULTI logic is used
|
||||
std::string deviceName = "MULTI:";
|
||||
} else if (validDevices.size() > 1) {
|
||||
_loadContext[ACTUALDEVICE].isEnabled = false;
|
||||
_autoSContext->_devicePriorities.clear();
|
||||
std::copy(std::begin(validDevices),
|
||||
std::end(validDevices),
|
||||
std::back_inserter(_autoSContext->_devicePriorities));
|
||||
// Total number of devices in CTPUT
|
||||
auto nCTputDeviceNums = validDevices.size();
|
||||
// Generate contexts for loading each device
|
||||
_pCTPUTLoadContext.reset(new AutoLoadContext[nCTputDeviceNums]);
|
||||
int idx = 0;
|
||||
DeviceInformation cpuDeviceInformation;
|
||||
for (auto& device : validDevices) {
|
||||
deviceName += device.deviceName;
|
||||
deviceName += ((device.deviceName == validDevices.back().deviceName) ? "" : ",");
|
||||
if (device.deviceName.find("CPU") == std::string::npos) {
|
||||
_pCTPUTLoadContext[idx].deviceInfo = device;
|
||||
_pCTPUTLoadContext[idx].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
|
||||
IE::PluginConfigParams::THROUGHPUT;
|
||||
idx++;
|
||||
} else {
|
||||
cpuDeviceInformation = device;
|
||||
cpuDeviceInformation.config.insert(
|
||||
{ov::affinity.name(), ov::Any(ov::Affinity::CORE).as<std::string>()});
|
||||
}
|
||||
}
|
||||
if (!cpuDeviceInformation.deviceName.empty()) {
|
||||
_pCTPUTLoadContext[idx].deviceInfo = cpuDeviceInformation;
|
||||
_pCTPUTLoadContext[idx].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
|
||||
IE::PluginConfigParams::THROUGHPUT;
|
||||
}
|
||||
_loadContext[ACTUALDEVICE].deviceInfo.deviceName = deviceName;
|
||||
_loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
|
||||
InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT;
|
||||
_loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERF_COUNT)] =
|
||||
_autoSContext->_needPerfCounters ? InferenceEngine::PluginConfigParams::YES
|
||||
: InferenceEngine::PluginConfigParams::NO;
|
||||
if (_autoSContext->_bindBuffer)
|
||||
_loadContext[ACTUALDEVICE].deviceInfo.config[ov::intel_auto::device_bind_buffer.name()] =
|
||||
InferenceEngine::PluginConfigParams::YES;
|
||||
}
|
||||
} else {
|
||||
_loadContext[ACTUALDEVICE].deviceInfo =
|
||||
@ -248,76 +278,127 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
|
||||
_loadContext[ACTUALDEVICE].networkPrecision,
|
||||
_autoSContext->_modelPriority);
|
||||
}
|
||||
LOG_INFO_TAG("select device:%s", _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
|
||||
bool isActualDevCPU =
|
||||
_loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") !=std::string::npos && !isCumulative;
|
||||
// if Actual device is CPU or perf_hint is cumulative, disabled _loadContext[CPU], only use _loadContext[ACTUALDEVICE]
|
||||
if (isActualDevCPU || isCumulative || !_autoSContext->_startupfallback) {
|
||||
_loadContext[CPU].isEnabled = false;
|
||||
} else {
|
||||
const auto CPUIter = std::find_if(_autoSContext->_devicePriorities.begin(), _autoSContext->_devicePriorities.end(),
|
||||
[=](const DeviceInformation& d) -> bool { return d.deviceName.find("CPU") != std::string::npos; });
|
||||
// if have CPU Device, enable _loadContext[CPU]
|
||||
if (CPUIter != _autoSContext->_devicePriorities.end()) {
|
||||
_loadContext[CPU].isEnabled = true;
|
||||
_loadContext[CPU].deviceInfo = *CPUIter;
|
||||
_loadContext[CPU].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] = IE::PluginConfigParams::LATENCY;
|
||||
_loadContext[CPU].workName = "CPU_HELP";
|
||||
LOG_INFO_TAG("will load CPU for accelerator");
|
||||
} else {
|
||||
_loadContext[CPU].isEnabled = false;
|
||||
}
|
||||
}
|
||||
// initialize the rest members of load context
|
||||
for (int i = 0; i < CONTEXTNUM; i++) {
|
||||
if (_loadContext[i].isEnabled) {
|
||||
_loadContext[i].future = _loadContext[i].promise.get_future();
|
||||
auto* contextPtr = &_loadContext[i];
|
||||
auto modelPath = _autoSContext->_modelPath;
|
||||
auto network = _autoSContext->_network;
|
||||
_loadContext[i].task = [this, contextPtr, modelPath, network, isCumulative]() mutable {
|
||||
TryToLoadNetWork(*contextPtr, modelPath, network);
|
||||
if (contextPtr->isLoadSuccess) {
|
||||
if (contextPtr->workName.empty()) {
|
||||
contextPtr->workName = contextPtr->deviceInfo.deviceName;
|
||||
}
|
||||
if (!isCumulative)
|
||||
GenerateWorkers(contextPtr->workName, contextPtr->executableNetwork);
|
||||
//need lock
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
_autoSContext->_config.insert(contextPtr->deviceInfo.config.begin(), contextPtr->deviceInfo.config.end());
|
||||
}
|
||||
contextPtr->isAlready = true;
|
||||
// reloadsuccess flag only for _loadContext[FALLBACKDEVICE]
|
||||
contextPtr->isReloadSuccess = true;
|
||||
auto& deviceName = contextPtr->deviceInfo.deviceName;
|
||||
LOG_INFO_TAG("device:%s loading Network finished", deviceName.c_str());
|
||||
if (!isCumulative) {
|
||||
auto supported_config_keys =
|
||||
_autoSContext->_core->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))
|
||||
.as<std::vector<std::string>>();
|
||||
DEBUG_RUN([this, &contextPtr, &deviceName, &supported_config_keys] {
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
for (const auto& cfg : supported_config_keys) {
|
||||
try {
|
||||
LOG_DEBUG_TAG(
|
||||
"device:%s, GetConfig:%s=%s",
|
||||
deviceName.c_str(),
|
||||
cfg.c_str(),
|
||||
contextPtr->executableNetwork->GetConfig(cfg).as<std::string>().c_str());
|
||||
} catch (const IE::Exception&) {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
auto loadDeviceTask = [&](AutoLoadContext* contextPtr,
|
||||
const std::string& modelPath,
|
||||
const IE::CNNNetwork& network,
|
||||
bool isCumulative) {
|
||||
TryToLoadNetWork(*contextPtr, modelPath, network, isCumulative);
|
||||
if (contextPtr->isLoadSuccess) {
|
||||
if (contextPtr->workName.empty()) {
|
||||
contextPtr->workName = contextPtr->deviceInfo.deviceName;
|
||||
}
|
||||
GenerateWorkers(contextPtr->workName, contextPtr->executableNetwork);
|
||||
// need lock
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
_autoSContext->_config.insert(contextPtr->deviceInfo.config.begin(),
|
||||
contextPtr->deviceInfo.config.end());
|
||||
}
|
||||
contextPtr->isAlready = true;
|
||||
// reloadsuccess flag only for _loadContext[FALLBACKDEVICE]
|
||||
contextPtr->isReloadSuccess = true;
|
||||
auto& deviceName = contextPtr->deviceInfo.deviceName;
|
||||
LOG_INFO_TAG("device:%s loading Network finished", deviceName.c_str());
|
||||
auto supported_config_keys = _autoSContext->_core->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))
|
||||
.as<std::vector<std::string>>();
|
||||
DEBUG_RUN([this, &contextPtr, &deviceName, &supported_config_keys] {
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
for (const auto& cfg : supported_config_keys) {
|
||||
try {
|
||||
LOG_DEBUG_TAG("device:%s, GetConfig:%s=%s",
|
||||
deviceName.c_str(),
|
||||
cfg.c_str(),
|
||||
contextPtr->executableNetwork->GetConfig(cfg).as<std::string>().c_str());
|
||||
} catch (const IE::Exception&) {
|
||||
}
|
||||
}
|
||||
contextPtr->promise.set_value();
|
||||
// the first load network process finished
|
||||
std::call_once(_firstLoadOC, [this]() {
|
||||
_firstLoadPromise.set_value();
|
||||
});
|
||||
};
|
||||
});
|
||||
}
|
||||
// Handle device load failure in case of ctput
|
||||
if (isCumulative && !contextPtr->isLoadSuccess) {
|
||||
std::string failedDeviceName = contextPtr->deviceInfo.deviceName;
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
|
||||
const auto DeviceIter =
|
||||
std::find_if(_autoSContext->_devicePriorities.begin(),
|
||||
_autoSContext->_devicePriorities.end(),
|
||||
[&](const DeviceInformation& d) -> bool {
|
||||
return d.deviceName.find(failedDeviceName) != std::string::npos;
|
||||
});
|
||||
// Remove failed device from _devicePriorities
|
||||
if (DeviceIter != _autoSContext->_devicePriorities.end()) {
|
||||
_autoSContext->_devicePriorities.erase(DeviceIter);
|
||||
}
|
||||
// Remove failed device from ov::device::priorities in config
|
||||
auto it_prior = _autoSContext->_config.find(ov::device::priorities.name());
|
||||
if (it_prior != _autoSContext->_config.end()) {
|
||||
auto priorities = it_prior->second.as<std::string>();
|
||||
size_t nPos = priorities.find(failedDeviceName);
|
||||
if (nPos != std::string::npos) {
|
||||
// If need to delete failed device and "," then length plus 1
|
||||
size_t nNameLen = (nPos + failedDeviceName.length()) == priorities.length()
|
||||
? failedDeviceName.length()
|
||||
: failedDeviceName.length() + 1;
|
||||
priorities.erase(nPos, nNameLen);
|
||||
it_prior->second = priorities;
|
||||
}
|
||||
}
|
||||
}
|
||||
contextPtr->promise.set_value();
|
||||
// the first load network process finished
|
||||
std::call_once(_firstLoadOC, [this]() {
|
||||
_firstLoadPromise.set_value();
|
||||
});
|
||||
};
|
||||
if (_loadContext[ACTUALDEVICE].isEnabled) {
|
||||
LOG_INFO_TAG("select device:%s", _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
|
||||
bool isActualDevCPU = _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") != std::string::npos;
|
||||
// if Actual device is CPU or perf_hint is cumulative, disabled _loadContext[CPU], only use
|
||||
// _loadContext[ACTUALDEVICE]
|
||||
if (isActualDevCPU || !_autoSContext->_startupfallback) {
|
||||
_loadContext[CPU].isEnabled = false;
|
||||
} else {
|
||||
const auto CPUIter = std::find_if(_autoSContext->_devicePriorities.begin(),
|
||||
_autoSContext->_devicePriorities.end(),
|
||||
[](const DeviceInformation& d) -> bool {
|
||||
return d.deviceName.find("CPU") != std::string::npos;
|
||||
});
|
||||
// if have CPU Device, enable _loadContext[CPU]
|
||||
if (CPUIter != _autoSContext->_devicePriorities.end()) {
|
||||
_loadContext[CPU].isEnabled = true;
|
||||
_loadContext[CPU].deviceInfo = *CPUIter;
|
||||
_loadContext[CPU].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] = IE::PluginConfigParams::LATENCY;
|
||||
_loadContext[CPU].workName = "CPU_HELP";
|
||||
LOG_INFO_TAG("will load CPU for accelerator");
|
||||
} else {
|
||||
_loadContext[CPU].isEnabled = false;
|
||||
}
|
||||
}
|
||||
// initialize the rest members of load context
|
||||
for (int i = 0; i < CONTEXTNUM; i++) {
|
||||
if (_loadContext[i].isEnabled) {
|
||||
_loadContext[i].future = _loadContext[i].promise.get_future();
|
||||
auto* contextPtr = &_loadContext[i];
|
||||
auto modelPath = _autoSContext->_modelPath;
|
||||
auto network = _autoSContext->_network;
|
||||
_loadContext[i].task = std::bind(loadDeviceTask, contextPtr, modelPath, network, isCumulative);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<Task> otherDevicesloads;
|
||||
std::vector<Task> cpuLoads;
|
||||
if (_pCTPUTLoadContext) {
|
||||
for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
|
||||
auto* contextPtr = &_pCTPUTLoadContext[i];
|
||||
auto modelPath = _autoSContext->_modelPath;
|
||||
auto network = _autoSContext->_network;
|
||||
_pCTPUTLoadContext[i].task = std::bind(loadDeviceTask, contextPtr, modelPath, network, isCumulative);
|
||||
if (i == _autoSContext->_devicePriorities.size() - 1 &&
|
||||
_pCTPUTLoadContext[i].deviceInfo.deviceName.find("CPU") != std::string::npos) {
|
||||
cpuLoads.push_back(_pCTPUTLoadContext[i].task);
|
||||
} else {
|
||||
otherDevicesloads.push_back(_pCTPUTLoadContext[i].task);
|
||||
}
|
||||
}
|
||||
}
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin,
|
||||
@ -350,13 +431,11 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
|
||||
_loadContext[CPU].future.wait();
|
||||
// clean up helper infer requests
|
||||
// first, wait for all the remaining requests to finish
|
||||
if (!_autoSContext->_runtimeFallback) {
|
||||
for (auto& iter : _workerRequests["CPU_HELP"]) {
|
||||
try {
|
||||
iter._inferRequest._ptr->Wait(IE::InferRequest::WaitMode::RESULT_READY);
|
||||
} catch (const IE::Exception& iie) {
|
||||
LOG_DEBUG_TAG("No infer results expected, infer in CPU_HELP throw some errors: %s", iie.what());
|
||||
}
|
||||
for (auto& iter : _workerRequests["CPU_HELP"]) {
|
||||
try {
|
||||
iter._inferRequest._ptr->Wait(IE::InferRequest::WaitMode::RESULT_READY);
|
||||
} catch (const IE::Exception& iie) {
|
||||
LOG_DEBUG_TAG("No infer results expected, infer in CPU_HELP throw some errors: %s", iie.what());
|
||||
}
|
||||
}
|
||||
// late enough to check the idle queue now
|
||||
@ -411,14 +490,38 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
|
||||
}
|
||||
_loadContext[ACTUALDEVICE].task();
|
||||
} else {
|
||||
// only one device need to load network, do not need to load it async
|
||||
_loadContext[ACTUALDEVICE].task();
|
||||
_passthroughExeNet = _loadContext[ACTUALDEVICE].executableNetwork;
|
||||
if (_pCTPUTLoadContext) {
|
||||
for (auto&& device : _autoSContext->_devicePriorities) {
|
||||
// initialize containers before run async task, if not initialized, it will hang during infer
|
||||
_idleWorkerRequests[device.deviceName];
|
||||
_workerRequests[device.deviceName];
|
||||
_inferPipelineTasksDeviceSpecific[device.deviceName] = nullptr;
|
||||
}
|
||||
_executor = _autoSContext->_plugin->executorManager()->getIdleCPUStreamsExecutor(IStreamsExecutor::Config{
|
||||
"CTPUTDeviceAsyncLoad",
|
||||
static_cast<int>(std::thread::hardware_concurrency()) /* max possible #streams*/,
|
||||
0 /*default threads per stream, workaround for ticket 62376*/,
|
||||
IStreamsExecutor::ThreadBindingType::NONE});
|
||||
// load devices other than CPU first
|
||||
if (otherDevicesloads.size() > 0) {
|
||||
// Wait for the devices other than CPU to load the network
|
||||
_executor->runAndWait(otherDevicesloads);
|
||||
}
|
||||
// Finally load the CPU
|
||||
if (cpuLoads.size() > 0) {
|
||||
// Wait for CPU to load the network
|
||||
_executor->runAndWait(cpuLoads);
|
||||
}
|
||||
} else {
|
||||
// only one device need to load network, do not need to load it async
|
||||
_loadContext[ACTUALDEVICE].task();
|
||||
_passthroughExeNet = _loadContext[ACTUALDEVICE].executableNetwork;
|
||||
}
|
||||
}
|
||||
WaitFirstNetworkReady();
|
||||
}
|
||||
|
||||
void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network) {
|
||||
void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative) {
|
||||
auto& device = context.deviceInfo.deviceName;
|
||||
auto& deviceConfig = context.deviceInfo.config;
|
||||
auto& deviceList = context.metaDevices;
|
||||
@ -458,7 +561,7 @@ void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string&
|
||||
context.errMessage += device + ":" + e.what();
|
||||
context.isLoadSuccess = false;
|
||||
}
|
||||
if (context.isLoadSuccess || curDevIsCPU) {
|
||||
if (context.isLoadSuccess || curDevIsCPU || isCumulative) {
|
||||
return;
|
||||
}
|
||||
// need to reload network, unregister it's priority
|
||||
@ -512,7 +615,7 @@ void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string&
|
||||
}
|
||||
LOG_DEBUG_TAG("try to load %s", context.deviceInfo.deviceName.c_str());
|
||||
// try to load this candidate device
|
||||
TryToLoadNetWork(context, modelPath, network);
|
||||
TryToLoadNetWork(context, modelPath, network, isCumulative);
|
||||
}
|
||||
|
||||
void AutoSchedule::WaitFirstNetworkReady() {
|
||||
@ -542,6 +645,20 @@ void AutoSchedule::WaitFirstNetworkReady() {
|
||||
LOG_ERROR_TAG("load failed, %s", _loadContext[i].errMessage.c_str());
|
||||
}
|
||||
}
|
||||
// devices loaded successfully in CTPUT
|
||||
if (_pCTPUTLoadContext) {
|
||||
int nLoadSucNums = 0;
|
||||
for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
|
||||
// check if device loaded successfully
|
||||
if (_pCTPUTLoadContext[i].isAlready) {
|
||||
nLoadSucNums++;
|
||||
}
|
||||
}
|
||||
// one or more devices loaded successfully
|
||||
if (nLoadSucNums > 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
IE_THROW() << GetLogTag() << "load all devices failed";
|
||||
}
|
||||
|
||||
@ -560,29 +677,45 @@ bool AutoSchedule::ScheduleToWorkerInferRequest(IE::Task inferPipelineTask, Devi
|
||||
std::vector<DeviceInformation> devices;
|
||||
// AUTO work mode
|
||||
if (!preferred_device.empty()) {
|
||||
// if the device needed by customer is not ready, need to wait for it
|
||||
WaitActualNetworkReady();
|
||||
// the preferred_device should be the selected device in AUTO work mode
|
||||
if (preferred_device != _loadContext[ACTUALDEVICE].deviceInfo.deviceName) {
|
||||
IE_THROW(NotFound) << "The preferred device should be the selected device";
|
||||
}
|
||||
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
|
||||
} else {
|
||||
// _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU
|
||||
if (_loadContext[FALLBACKDEVICE].isAlready) {
|
||||
devices.push_back(_loadContext[FALLBACKDEVICE].deviceInfo);
|
||||
if (_pCTPUTLoadContext) {
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
|
||||
devices = _autoSContext->_devicePriorities;
|
||||
} else {
|
||||
if (_loadContext[ACTUALDEVICE].isAlready) {
|
||||
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
|
||||
// if the device needed by customer is not ready, need to wait for it
|
||||
WaitActualNetworkReady();
|
||||
// the preferred_device should be the selected device in AUTO work mode
|
||||
if (preferred_device != _loadContext[ACTUALDEVICE].deviceInfo.deviceName) {
|
||||
IE_THROW(NotFound) << "The preferred device should be the selected device";
|
||||
}
|
||||
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
|
||||
}
|
||||
} else {
|
||||
if (_pCTPUTLoadContext) {
|
||||
// Devices that fail infer will be removed from the priority list in the callback, need lock here
|
||||
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
|
||||
for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
|
||||
devices.push_back(_autoSContext->_devicePriorities[i]);
|
||||
}
|
||||
} else {
|
||||
// _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU
|
||||
if (_loadContext[FALLBACKDEVICE].isAlready) {
|
||||
devices.push_back(_loadContext[FALLBACKDEVICE].deviceInfo);
|
||||
} else {
|
||||
// replace deviceName with workName, so schedule can select correct
|
||||
// idleWorkerQueue
|
||||
auto deviceInfo = _loadContext[CPU].deviceInfo;
|
||||
deviceInfo.deviceName = _loadContext[CPU].workName;
|
||||
devices.push_back(std::move(deviceInfo));
|
||||
if (_loadContext[ACTUALDEVICE].isAlready) {
|
||||
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
|
||||
} else {
|
||||
// replace deviceName with workName, so schedule can select correct
|
||||
// idleWorkerQueue
|
||||
auto deviceInfo = _loadContext[CPU].deviceInfo;
|
||||
deviceInfo.deviceName = _loadContext[CPU].workName;
|
||||
devices.push_back(std::move(deviceInfo));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (devices.size() == 0) {
|
||||
IE_THROW(GeneralError) << "No device to run pipeline task";
|
||||
}
|
||||
for (auto&& device : devices) {
|
||||
if (!preferred_device.empty() && (device.deviceName != preferred_device)) {
|
||||
continue;
|
||||
@ -644,27 +777,12 @@ IInferPtr AutoSchedule::CreateInferRequest() {
|
||||
if (!syncRequestImpl)
|
||||
syncRequestImpl = CreateInferRequestImpl(execNetwork->_networkInputs, execNetwork->_networkOutputs);
|
||||
syncRequestImpl->setPointerToExecutableNetworkInternal(execNetwork);
|
||||
bool isCumulative = (_autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) ? true : false;
|
||||
bool isCTPUTSingleDevice =
|
||||
isCumulative && _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("MULTI:") == std::string::npos ? true
|
||||
: false;
|
||||
if ((_passthroughExeNet && !isCumulative) || isCTPUTSingleDevice) {
|
||||
std::string perfmode;
|
||||
try {
|
||||
perfmode = _passthroughExeNet->GetConfig(
|
||||
CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>();
|
||||
} catch (const IE::Exception&) {
|
||||
LOG_INFO("query perf hint from passthrough network failed");
|
||||
}
|
||||
if (_autoSContext->_batchingDisabled || perfmode != CONFIG_VALUE(THROUGHPUT)) {
|
||||
syncRequestImpl->setPointerToSo(_passthroughExeNet._so);
|
||||
} else {
|
||||
auto so = _passthroughExeNet._ptr->GetPointerToSo();
|
||||
// Get the _so from passthrough executable network when batch plugin is disable.
|
||||
if (!so)
|
||||
so = _passthroughExeNet._so;
|
||||
syncRequestImpl->setPointerToSo(so);
|
||||
}
|
||||
if (_passthroughExeNet) {
|
||||
auto so = _passthroughExeNet._ptr->GetPointerToSo();
|
||||
// Get the _so from passthrough executable network when batch plugin is disable.
|
||||
if (!so)
|
||||
so = _passthroughExeNet._so;
|
||||
syncRequestImpl->setPointerToSo(so);
|
||||
} else if (std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest()) {
|
||||
// cumulative case, load to MULTI:*
|
||||
auto sharedMultiRequest = std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest();
|
||||
|
@ -50,6 +50,7 @@ public:
|
||||
|
||||
public:
|
||||
AutoLoadContext _loadContext[CONTEXTNUM];
|
||||
std::unique_ptr<AutoLoadContext[]> _pCTPUTLoadContext = nullptr;
|
||||
|
||||
protected:
|
||||
void GenerateWorkers(const std::string& device, const SoExecNetwork& executableNetwork) override;
|
||||
@ -60,7 +61,7 @@ protected:
|
||||
|
||||
private:
|
||||
void WaitFirstNetworkReady();
|
||||
void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network);
|
||||
void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative);
|
||||
bool selectOtherDevice(const std::string& currentDeviceName);
|
||||
IE::Task releaseActualdeviceTask;
|
||||
|
||||
|
@ -307,26 +307,15 @@ IInferPtr MultiSchedule::CreateInferRequest() {
|
||||
syncRequestImpl = CreateInferRequestImpl(execNetwork->_networkInputs, execNetwork->_networkOutputs);
|
||||
syncRequestImpl->setPointerToExecutableNetworkInternal(execNetwork);
|
||||
if (_passthroughExeNet) {
|
||||
std::string perfmode;
|
||||
try {
|
||||
perfmode = _passthroughExeNet->GetConfig(
|
||||
CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>();
|
||||
} catch (const IE::Exception&) {
|
||||
LOG_INFO("query perf hint from passthrough network failed");
|
||||
}
|
||||
if (_multiSContext->_batchingDisabled || perfmode != CONFIG_VALUE(THROUGHPUT)) {
|
||||
syncRequestImpl->setPointerToSo(_passthroughExeNet._so);
|
||||
} else {
|
||||
auto so = _passthroughExeNet._ptr->GetPointerToSo();
|
||||
// Get the _so from passthrough executable network when batch plugin is disable.
|
||||
if (!so)
|
||||
so = _passthroughExeNet._so;
|
||||
syncRequestImpl->setPointerToSo(so);
|
||||
}
|
||||
auto so = _passthroughExeNet._ptr->GetPointerToSo();
|
||||
// Get the _so from passthrough executable network when batch plugin is disable.
|
||||
if (!so)
|
||||
so = _passthroughExeNet._so;
|
||||
syncRequestImpl->setPointerToSo(so);
|
||||
} else if (_multiSContext->_bindBuffer) {
|
||||
auto sharedRequest = std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest();
|
||||
if (sharedRequest._ptr->getPointerToSo())
|
||||
syncRequestImpl->setPointerToSo(sharedRequest._ptr->getPointerToSo());
|
||||
syncRequestImpl->setPointerToSo(sharedRequest._ptr->getPointerToSo());
|
||||
else
|
||||
syncRequestImpl->setPointerToSo(sharedRequest._so);
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <thread>
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "ngraph/function.hpp"
|
||||
#include "ngraph_functions/subgraph_builders.hpp"
|
||||
@ -90,9 +90,11 @@ protected:
|
||||
} else {
|
||||
m_extList.push_back(ext);
|
||||
}
|
||||
std::replace(test_name.begin(), test_name.end(), '/', '_');
|
||||
std::replace(test_name.begin(), test_name.end(), '\\', '_');
|
||||
cache_path = "LoadNetwork" + test_name + "_cache";
|
||||
auto hash = std::hash<std::string>()(test_name);
|
||||
std::stringstream ss;
|
||||
ss << std::this_thread::get_id();
|
||||
cache_path = "LoadNetwork" + std::to_string(hash) + "_"
|
||||
+ ss.str() + "_" + GetTimestamp() + "_cache";
|
||||
}
|
||||
void TearDown() override {
|
||||
APIBaseTest::TearDown();
|
||||
|
@ -510,14 +510,16 @@ void CompiledKernelsCacheTest::SetUp() {
|
||||
std::string ext = userConfig.second;
|
||||
std::string::size_type pos = 0;
|
||||
if ((pos = ext.find(",", pos)) != std::string::npos) {
|
||||
m_extList.push_back(ext.substr(0, pos));
|
||||
m_extList.push_back(ext.substr(pos + 1));
|
||||
} else {
|
||||
m_extList.push_back(ext);
|
||||
}
|
||||
std::replace(test_name.begin(), test_name.end(), '/', '_');
|
||||
std::replace(test_name.begin(), test_name.end(), '\\', '_');
|
||||
cache_path = "compiledModel" + test_name + "_cache";
|
||||
m_extList.push_back(ext.substr(0, pos));
|
||||
m_extList.push_back(ext.substr(pos + 1));
|
||||
} else {
|
||||
m_extList.push_back(ext);
|
||||
}
|
||||
auto hash = std::hash<std::string>()(test_name);
|
||||
std::stringstream ss;
|
||||
ss << std::this_thread::get_id();
|
||||
cache_path = "compiledModel" + std::to_string(hash) + "_"
|
||||
+ ss.str() + "_" + GetTimestamp() + "_cache";
|
||||
}
|
||||
|
||||
void CompiledKernelsCacheTest::TearDown() {
|
||||
|
@ -200,12 +200,6 @@ TEST_P(LoadNetworkWithCTPUTMockTest, CTPUTSingleDevLogicTest) {
|
||||
::testing::Matcher<const std::map<std::string, std::string>&>(
|
||||
ComparePerfHint(InferenceEngine::PluginConfigParams::THROUGHPUT))))
|
||||
.Times(1);
|
||||
// no MULTI logic to be called
|
||||
EXPECT_CALL(*core,
|
||||
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
::testing::Matcher<const std::string&>("MULTI:" + targetDevice),
|
||||
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
|
||||
.Times(0);
|
||||
// if target device only has GPU, no CPU helper to be called
|
||||
if (targetDevice.find("GPU") != std::string::npos) {
|
||||
EXPECT_CALL(*core,
|
||||
@ -220,14 +214,14 @@ TEST_P(LoadNetworkWithCTPUTMockTest, CTPUTSingleDevLogicTest) {
|
||||
for (auto& deviceName : targetDevices) {
|
||||
targetDev += deviceName;
|
||||
targetDev += ((deviceName == targetDevices.back()) ? "" : ",");
|
||||
EXPECT_CALL(*core,
|
||||
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
::testing::Matcher<const std::string&>(deviceName),
|
||||
::testing::Matcher<const std::map<std::string, std::string>&>(
|
||||
ComparePerfHint(InferenceEngine::PluginConfigParams::THROUGHPUT))))
|
||||
.Times(1);
|
||||
}
|
||||
config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
|
||||
// Call MULTI logic
|
||||
EXPECT_CALL(*core,
|
||||
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
::testing::Matcher<const std::string&>("MULTI:" + targetDev),
|
||||
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
|
||||
.Times(1);
|
||||
// no CPU helper to be called
|
||||
EXPECT_CALL(*core,
|
||||
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
|
@ -230,6 +230,8 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
using AutoCTPUTRuntimeFallback = AutoRuntimeFallback;
|
||||
|
||||
TEST_P(AutoRuntimeFallback, releaseResource) {
|
||||
std::string targetDev;
|
||||
std::vector<std::tuple<std::string, bool>> targetDevices;
|
||||
@ -362,3 +364,93 @@ const std::vector<ConfigParams> testConfigs = {
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_AutoRuntimeFallback, AutoRuntimeFallback,
|
||||
::testing::ValuesIn(testConfigs),
|
||||
AutoRuntimeFallback::getTestCaseName);
|
||||
|
||||
TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) {
|
||||
std::string targetDev;
|
||||
std::vector<std::tuple<std::string, bool>> targetDevices; //std::tuple<deviceName, will infer throw exception>
|
||||
int loadNetworkNum;
|
||||
bool enableRumtimeFallback;
|
||||
bool expectThrow;
|
||||
bool loadNetworkFail;
|
||||
bool generateWorkersFail;
|
||||
std::tie(targetDevices, loadNetworkNum, enableRumtimeFallback, expectThrow, loadNetworkFail, generateWorkersFail) = this->GetParam();
|
||||
if (loadNetworkFail) {
|
||||
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
::testing::Matcher<const std::string&>(StrEq("GPU.1")),
|
||||
::testing::Matcher<const Config&>(_))).WillByDefault(Throw(InferenceEngine::GeneralError{""}));
|
||||
}
|
||||
for (auto& deviceInfo : targetDevices) {
|
||||
std::string deviceName;
|
||||
bool ifThrow;
|
||||
std::tie(deviceName, ifThrow) = deviceInfo;
|
||||
targetDev += deviceName;
|
||||
targetDev += ((deviceInfo == targetDevices.back()) ? "" : ",");
|
||||
if (deviceName == "CPU") {
|
||||
mockInferrequest = std::make_shared<mockAsyncInferRequest>(
|
||||
inferReqInternal, mockExecutor, nullptr, ifThrow);
|
||||
ON_CALL(*mockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(mockInferrequest));
|
||||
} else if (deviceName == "GPU.0") {
|
||||
mockInferrequestGPU_0 = std::make_shared<mockAsyncInferRequest>(
|
||||
inferReqInternalGPU_0, mockExecutorGPU_0, nullptr, ifThrow);
|
||||
ON_CALL(*mockIExeNetGPU_0.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(0));
|
||||
return mockInferrequestGPU_0; }));
|
||||
} else if (deviceName == "GPU.1") {
|
||||
if (generateWorkersFail) {
|
||||
mockInferrequestGPU_1 =
|
||||
std::make_shared<mockAsyncInferRequest>(inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow);
|
||||
ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest())
|
||||
.WillByDefault(Throw(InferenceEngine::GeneralError{""}));
|
||||
} else {
|
||||
mockInferrequestGPU_1 =
|
||||
std::make_shared<mockAsyncInferRequest>(inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow);
|
||||
ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(0));
|
||||
return mockInferrequestGPU_1;
|
||||
}));
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
plugin->SetName("AUTO");
|
||||
config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
|
||||
config.insert({InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT,
|
||||
InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT});
|
||||
if (!enableRumtimeFallback) {
|
||||
config.insert({{"ENABLE_RUNTIME_FALLBACK", "NO"}});
|
||||
}
|
||||
|
||||
EXPECT_CALL(*core,
|
||||
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
|
||||
::testing::Matcher<const std::string&>(_),
|
||||
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
|
||||
.Times(loadNetworkNum);
|
||||
|
||||
std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> exeNetwork;
|
||||
std::shared_ptr<IInferRequestInternal> infer_request;
|
||||
|
||||
ASSERT_NO_THROW(exeNetwork = plugin->LoadExeNetworkImpl(cnnNet, config));
|
||||
ASSERT_NO_THROW(infer_request = exeNetwork->CreateInferRequest());
|
||||
if (expectThrow) {
|
||||
EXPECT_THROW(infer_request->Infer(), IE::Exception);
|
||||
} else {
|
||||
ASSERT_NO_THROW(infer_request->Infer());
|
||||
}
|
||||
}
|
||||
|
||||
// ConfigParams: targetDevices(deviceName, will infer throw exception), loadNetworkNum, enableRumtimeFallback,
|
||||
// expectThrow, loadNetworkFail, generateWorkersFail
|
||||
const std::vector<ConfigParams> testCtputConfigs = {
|
||||
ConfigParams{{{"CPU", false}, {"GPU.0", true}, {"GPU.1", true}}, 3, true, false, false, false},
|
||||
ConfigParams{{{"CPU", true}, {"GPU.0", false}, {"GPU.1", true}}, 3, true, false, false, false},
|
||||
ConfigParams{{{"CPU", true}, {"GPU.0", true}, {"GPU.1", true}}, 3, true, true, false, false},
|
||||
// disable RumtimeFallback
|
||||
ConfigParams{{{"CPU", false}, {"GPU.0", false}, {"GPU.1", false}}, 3, false, false, false, false},
|
||||
ConfigParams{{{"CPU", true}, {"GPU.0", false}, {"GPU.1", false}}, 3, false, true, false, false},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_AutoCTPUTRuntimeFallback,
|
||||
AutoCTPUTRuntimeFallback,
|
||||
::testing::ValuesIn(testCtputConfigs),
|
||||
AutoCTPUTRuntimeFallback::getTestCaseName);
|
||||
|
Loading…
Reference in New Issue
Block a user