Implement CTPUT in AUTO code logic (#16220)

* Implement CTPUT in AUTO code logic

* Add logic to handle device loading failure

* add some code comments

* fix warnning conversion from size_t to int

* Updated code according to comments of bell and wanglei

* the preferred device code path need to be updated with ctput also

* add fallback logic for CTPUT

* Modify the code logic according to bell suggestion

* Add prints for debugging bug

* throw exception when no device to run pipline task

* initialize idleWorkerRequest for CTPUT

* fix getting properties

Signed-off-by: fishbell <bell.song@intel.com>

refine

Signed-off-by: fishbell <bell.song@intel.com>

* fix warning

Signed-off-by: fishbell <bell.song@intel.com>

* fix illegal character on windows

Signed-off-by: fishbell <bell.song@intel.com>

* fix illegal character

Signed-off-by: fishbell <bell.song@intel.com>

add missing include

Signed-off-by: fishbell <bell.song@intel.com>

* more code refine

Signed-off-by: fishbell <bell.song@intel.com>

---------

Signed-off-by: fishbell <bell.song@intel.com>
Co-authored-by: fishbell <bell.song@intel.com>
This commit is contained in:
guozhong wang 2023-03-26 12:35:26 +08:00 committed by GitHub
parent e66b837104
commit 60ab7490bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 432 additions and 192 deletions

View File

@ -64,9 +64,8 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
return decltype(ov::device::priorities)::value_type {value->second.as<std::string>()};
} else if (name == ov::device::properties) {
ov::AnyMap all_devices = {};
if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
auto get_device_supported_metrics = [&all_devices] (const AutoLoadContext& context) {
ov::AnyMap device_properties = {};
auto& context = _autoSchedule->_loadContext[ACTUALDEVICE];
auto device_supported_metrics = context.executableNetwork->GetMetric(METRIC_KEY(SUPPORTED_METRICS));
for (auto&& property_name : device_supported_metrics.as<std::vector<std::string>>()) {
device_properties[property_name] = context.executableNetwork->GetMetric(property_name);
@ -76,6 +75,26 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
device_properties[property_name] = context.executableNetwork->GetConfig(property_name);
}
all_devices[context.deviceInfo.deviceName] = device_properties;
};
if (_autoSchedule->_pCTPUTLoadContext) {
// need lock for inference failure
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
auto load_count = _autoSContext->_devicePriorities.size();
for (size_t i = 0; i < load_count; i++)
get_device_supported_metrics(_autoSchedule->_pCTPUTLoadContext[i]);
} else {
{
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) {
get_device_supported_metrics(_autoSchedule->_loadContext[FALLBACKDEVICE]);
}
}
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
get_device_supported_metrics(_autoSchedule->_loadContext[ACTUALDEVICE]);
} else {
get_device_supported_metrics(_autoSchedule->_loadContext[CPU]);
}
}
return all_devices;
} else if (name == ov::hint::model_priority) {
@ -91,6 +110,24 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
const unsigned int defaultNumForTPUT = 4u;
const unsigned int defaultNumForLatency = 1u;
unsigned int real = 0;
if (_autoSchedule->_pCTPUTLoadContext) {
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
unsigned int res = 0u;
auto load_count = _autoSContext->_devicePriorities.size();
for (size_t i = 0; i < load_count; i++) {
try {
res += (_autoSchedule->_pCTPUTLoadContext[i]).executableNetwork->GetMetric(
METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
} catch (const IE::Exception& iie) {
IE_THROW()
<< "Every device used in cumulative mode should "
<< "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
<< "Failed to query the metric for with error:" <<
iie.what();
}
}
return decltype(ov::optimal_number_of_infer_requests)::value_type {res};
}
if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) {
real = _autoSchedule->_loadContext[ACTUALDEVICE].
executableNetwork->GetMetric(name).as<unsigned int>();
@ -181,12 +218,13 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
exeDevices.push_back(ExeDevicesString);
execution_devices = decltype(ov::execution_devices)::value_type {exeDevices};
};
if (_autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) {
try {
execution_devices = _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
} catch(const IE::Exception&) {
GetExecutionDevices(_autoSchedule->_loadContext[ACTUALDEVICE].workName);
if (_autoSchedule->_pCTPUTLoadContext) {
std::vector<std::string> exeDevices = {};
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
for (auto n : _autoSContext->_devicePriorities) {
exeDevices.push_back(n.deviceName);
}
execution_devices = decltype(ov::execution_devices)::value_type {exeDevices};
} else {
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
for (int i = 0; i < CONTEXTNUM; i++) {
@ -203,9 +241,13 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const {
return execution_devices;
} else if (name == ov::model_name) {
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
if (_autoSchedule->_pCTPUTLoadContext) {
return _autoSchedule->_pCTPUTLoadContext[0].executableNetwork->GetMetric(name);
} else {
if (_autoSchedule->_loadContext[CPU].isEnabled && _autoSchedule->_loadContext[CPU].isAlready)
return _autoSchedule->_loadContext[CPU].executableNetwork->GetMetric(name);
return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
}
} else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
IE_SET_METRIC_RETURN(SUPPORTED_METRICS,
{METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS),

View File

@ -3,7 +3,6 @@
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "auto_schedule.hpp"
#include "async_infer_request.hpp"
#include "auto_executable_network.hpp"
@ -183,6 +182,27 @@ bool AutoSchedule::selectOtherDevice(const std::string& currentDeviceName) {
return getExecutionDevices(_loadContext[FALLBACKDEVICE].deviceInfo.deviceName.c_str());
}
};
auto removeInferFailDevice = [&](const std::string& deviceName) {
if (_autoSContext->_devicePriorities.size() > 1) {
const auto CurrentDeviceIter =
std::find_if(_autoSContext->_devicePriorities.begin(),
_autoSContext->_devicePriorities.end(),
[=](const DeviceInformation& d) -> bool {
return d.deviceName.find(deviceName) != std::string::npos;
});
if (CurrentDeviceIter != _autoSContext->_devicePriorities.end()) {
_autoSContext->_devicePriorities.erase(CurrentDeviceIter);
return true;
}
}
return false;
};
if (_pCTPUTLoadContext) {
return removeInferFailDevice(currentDeviceName);
}
return getExecutionDevices(currentDeviceName);
}
}
@ -217,30 +237,40 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
std::list<DeviceInformation> validDevices =
_autoSContext->_plugin->GetValidDevice(_autoSContext->_devicePriorities,
_loadContext[ACTUALDEVICE].networkPrecision);
// When the hint is ctput and there is only one device, the single-device logic is used
if (validDevices.size() == 1) {
// When the hint is ctput and there is only one device, the single-device logic is used instead of
// the MULTI logic
// can not change _autoSContext->_performanceHint to THROUGHPUT, because GetMetric needs to return CTPUT
_loadContext[ACTUALDEVICE].deviceInfo = validDevices.front();
_loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
IE::PluginConfigParams::THROUGHPUT;
isCumulative = false;
} else {
// When the hint is ctput and there are more than one device, the MULTI logic is used
std::string deviceName = "MULTI:";
} else if (validDevices.size() > 1) {
_loadContext[ACTUALDEVICE].isEnabled = false;
_autoSContext->_devicePriorities.clear();
std::copy(std::begin(validDevices),
std::end(validDevices),
std::back_inserter(_autoSContext->_devicePriorities));
// Total number of devices in CTPUT
auto nCTputDeviceNums = validDevices.size();
// Generate contexts for loading each device
_pCTPUTLoadContext.reset(new AutoLoadContext[nCTputDeviceNums]);
int idx = 0;
DeviceInformation cpuDeviceInformation;
for (auto& device : validDevices) {
deviceName += device.deviceName;
deviceName += ((device.deviceName == validDevices.back().deviceName) ? "" : ",");
if (device.deviceName.find("CPU") == std::string::npos) {
_pCTPUTLoadContext[idx].deviceInfo = device;
_pCTPUTLoadContext[idx].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
IE::PluginConfigParams::THROUGHPUT;
idx++;
} else {
cpuDeviceInformation = device;
cpuDeviceInformation.config.insert(
{ov::affinity.name(), ov::Any(ov::Affinity::CORE).as<std::string>()});
}
}
if (!cpuDeviceInformation.deviceName.empty()) {
_pCTPUTLoadContext[idx].deviceInfo = cpuDeviceInformation;
_pCTPUTLoadContext[idx].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
IE::PluginConfigParams::THROUGHPUT;
}
_loadContext[ACTUALDEVICE].deviceInfo.deviceName = deviceName;
_loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT;
_loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERF_COUNT)] =
_autoSContext->_needPerfCounters ? InferenceEngine::PluginConfigParams::YES
: InferenceEngine::PluginConfigParams::NO;
if (_autoSContext->_bindBuffer)
_loadContext[ACTUALDEVICE].deviceInfo.config[ov::intel_auto::device_bind_buffer.name()] =
InferenceEngine::PluginConfigParams::YES;
}
} else {
_loadContext[ACTUALDEVICE].deviceInfo =
@ -248,15 +278,91 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
_loadContext[ACTUALDEVICE].networkPrecision,
_autoSContext->_modelPriority);
}
auto loadDeviceTask = [&](AutoLoadContext* contextPtr,
const std::string& modelPath,
const IE::CNNNetwork& network,
bool isCumulative) {
TryToLoadNetWork(*contextPtr, modelPath, network, isCumulative);
if (contextPtr->isLoadSuccess) {
if (contextPtr->workName.empty()) {
contextPtr->workName = contextPtr->deviceInfo.deviceName;
}
GenerateWorkers(contextPtr->workName, contextPtr->executableNetwork);
// need lock
{
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
_autoSContext->_config.insert(contextPtr->deviceInfo.config.begin(),
contextPtr->deviceInfo.config.end());
}
contextPtr->isAlready = true;
// reloadsuccess flag only for _loadContext[FALLBACKDEVICE]
contextPtr->isReloadSuccess = true;
auto& deviceName = contextPtr->deviceInfo.deviceName;
LOG_INFO_TAG("device:%s loading Network finished", deviceName.c_str());
auto supported_config_keys = _autoSContext->_core->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))
.as<std::vector<std::string>>();
DEBUG_RUN([this, &contextPtr, &deviceName, &supported_config_keys] {
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
for (const auto& cfg : supported_config_keys) {
try {
LOG_DEBUG_TAG("device:%s, GetConfig:%s=%s",
deviceName.c_str(),
cfg.c_str(),
contextPtr->executableNetwork->GetConfig(cfg).as<std::string>().c_str());
} catch (const IE::Exception&) {
}
}
});
}
// Handle device load failure in case of ctput
if (isCumulative && !contextPtr->isLoadSuccess) {
std::string failedDeviceName = contextPtr->deviceInfo.deviceName;
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
const auto DeviceIter =
std::find_if(_autoSContext->_devicePriorities.begin(),
_autoSContext->_devicePriorities.end(),
[&](const DeviceInformation& d) -> bool {
return d.deviceName.find(failedDeviceName) != std::string::npos;
});
// Remove failed device from _devicePriorities
if (DeviceIter != _autoSContext->_devicePriorities.end()) {
_autoSContext->_devicePriorities.erase(DeviceIter);
}
// Remove failed device from ov::device::priorities in config
auto it_prior = _autoSContext->_config.find(ov::device::priorities.name());
if (it_prior != _autoSContext->_config.end()) {
auto priorities = it_prior->second.as<std::string>();
size_t nPos = priorities.find(failedDeviceName);
if (nPos != std::string::npos) {
// If need to delete failed device and "," then length plus 1
size_t nNameLen = (nPos + failedDeviceName.length()) == priorities.length()
? failedDeviceName.length()
: failedDeviceName.length() + 1;
priorities.erase(nPos, nNameLen);
it_prior->second = priorities;
}
}
}
contextPtr->promise.set_value();
// the first load network process finished
std::call_once(_firstLoadOC, [this]() {
_firstLoadPromise.set_value();
});
};
if (_loadContext[ACTUALDEVICE].isEnabled) {
LOG_INFO_TAG("select device:%s", _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
bool isActualDevCPU =
_loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") !=std::string::npos && !isCumulative;
// if Actual device is CPU or perf_hint is cumulative, disabled _loadContext[CPU], only use _loadContext[ACTUALDEVICE]
if (isActualDevCPU || isCumulative || !_autoSContext->_startupfallback) {
bool isActualDevCPU = _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") != std::string::npos;
// if Actual device is CPU or perf_hint is cumulative, disabled _loadContext[CPU], only use
// _loadContext[ACTUALDEVICE]
if (isActualDevCPU || !_autoSContext->_startupfallback) {
_loadContext[CPU].isEnabled = false;
} else {
const auto CPUIter = std::find_if(_autoSContext->_devicePriorities.begin(), _autoSContext->_devicePriorities.end(),
[=](const DeviceInformation& d) -> bool { return d.deviceName.find("CPU") != std::string::npos; });
const auto CPUIter = std::find_if(_autoSContext->_devicePriorities.begin(),
_autoSContext->_devicePriorities.end(),
[](const DeviceInformation& d) -> bool {
return d.deviceName.find("CPU") != std::string::npos;
});
// if have CPU Device, enable _loadContext[CPU]
if (CPUIter != _autoSContext->_devicePriorities.end()) {
_loadContext[CPU].isEnabled = true;
@ -275,49 +381,24 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
auto* contextPtr = &_loadContext[i];
auto modelPath = _autoSContext->_modelPath;
auto network = _autoSContext->_network;
_loadContext[i].task = [this, contextPtr, modelPath, network, isCumulative]() mutable {
TryToLoadNetWork(*contextPtr, modelPath, network);
if (contextPtr->isLoadSuccess) {
if (contextPtr->workName.empty()) {
contextPtr->workName = contextPtr->deviceInfo.deviceName;
}
if (!isCumulative)
GenerateWorkers(contextPtr->workName, contextPtr->executableNetwork);
//need lock
{
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
_autoSContext->_config.insert(contextPtr->deviceInfo.config.begin(), contextPtr->deviceInfo.config.end());
}
contextPtr->isAlready = true;
// reloadsuccess flag only for _loadContext[FALLBACKDEVICE]
contextPtr->isReloadSuccess = true;
auto& deviceName = contextPtr->deviceInfo.deviceName;
LOG_INFO_TAG("device:%s loading Network finished", deviceName.c_str());
if (!isCumulative) {
auto supported_config_keys =
_autoSContext->_core->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS))
.as<std::vector<std::string>>();
DEBUG_RUN([this, &contextPtr, &deviceName, &supported_config_keys] {
std::lock_guard<std::mutex> lock(_autoSContext->_confMutex);
for (const auto& cfg : supported_config_keys) {
try {
LOG_DEBUG_TAG(
"device:%s, GetConfig:%s=%s",
deviceName.c_str(),
cfg.c_str(),
contextPtr->executableNetwork->GetConfig(cfg).as<std::string>().c_str());
} catch (const IE::Exception&) {
_loadContext[i].task = std::bind(loadDeviceTask, contextPtr, modelPath, network, isCumulative);
}
}
});
}
std::vector<Task> otherDevicesloads;
std::vector<Task> cpuLoads;
if (_pCTPUTLoadContext) {
for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
auto* contextPtr = &_pCTPUTLoadContext[i];
auto modelPath = _autoSContext->_modelPath;
auto network = _autoSContext->_network;
_pCTPUTLoadContext[i].task = std::bind(loadDeviceTask, contextPtr, modelPath, network, isCumulative);
if (i == _autoSContext->_devicePriorities.size() - 1 &&
_pCTPUTLoadContext[i].deviceInfo.deviceName.find("CPU") != std::string::npos) {
cpuLoads.push_back(_pCTPUTLoadContext[i].task);
} else {
otherDevicesloads.push_back(_pCTPUTLoadContext[i].task);
}
contextPtr->promise.set_value();
// the first load network process finished
std::call_once(_firstLoadOC, [this]() {
_firstLoadPromise.set_value();
});
};
}
}
OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin,
@ -350,7 +431,6 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
_loadContext[CPU].future.wait();
// clean up helper infer requests
// first, wait for all the remaining requests to finish
if (!_autoSContext->_runtimeFallback) {
for (auto& iter : _workerRequests["CPU_HELP"]) {
try {
iter._inferRequest._ptr->Wait(IE::InferRequest::WaitMode::RESULT_READY);
@ -358,7 +438,6 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
LOG_DEBUG_TAG("No infer results expected, infer in CPU_HELP throw some errors: %s", iie.what());
}
}
}
// late enough to check the idle queue now
// second, check the idle queue if all requests are in place
size_t destroynum = 0;
@ -410,15 +489,39 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
_inferPipelineTasksDeviceSpecific[device.deviceName] = nullptr;
}
_loadContext[ACTUALDEVICE].task();
} else {
if (_pCTPUTLoadContext) {
for (auto&& device : _autoSContext->_devicePriorities) {
// initialize containers before run async task, if not initialized, it will hang during infer
_idleWorkerRequests[device.deviceName];
_workerRequests[device.deviceName];
_inferPipelineTasksDeviceSpecific[device.deviceName] = nullptr;
}
_executor = _autoSContext->_plugin->executorManager()->getIdleCPUStreamsExecutor(IStreamsExecutor::Config{
"CTPUTDeviceAsyncLoad",
static_cast<int>(std::thread::hardware_concurrency()) /* max possible #streams*/,
0 /*default threads per stream, workaround for ticket 62376*/,
IStreamsExecutor::ThreadBindingType::NONE});
// load devices other than CPU first
if (otherDevicesloads.size() > 0) {
// Wait for the devices other than CPU to load the network
_executor->runAndWait(otherDevicesloads);
}
// Finally load the CPU
if (cpuLoads.size() > 0) {
// Wait for CPU to load the network
_executor->runAndWait(cpuLoads);
}
} else {
// only one device need to load network, do not need to load it async
_loadContext[ACTUALDEVICE].task();
_passthroughExeNet = _loadContext[ACTUALDEVICE].executableNetwork;
}
}
WaitFirstNetworkReady();
}
void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network) {
void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative) {
auto& device = context.deviceInfo.deviceName;
auto& deviceConfig = context.deviceInfo.config;
auto& deviceList = context.metaDevices;
@ -458,7 +561,7 @@ void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string&
context.errMessage += device + ":" + e.what();
context.isLoadSuccess = false;
}
if (context.isLoadSuccess || curDevIsCPU) {
if (context.isLoadSuccess || curDevIsCPU || isCumulative) {
return;
}
// need to reload network, unregister it's priority
@ -512,7 +615,7 @@ void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string&
}
LOG_DEBUG_TAG("try to load %s", context.deviceInfo.deviceName.c_str());
// try to load this candidate device
TryToLoadNetWork(context, modelPath, network);
TryToLoadNetWork(context, modelPath, network, isCumulative);
}
void AutoSchedule::WaitFirstNetworkReady() {
@ -542,6 +645,20 @@ void AutoSchedule::WaitFirstNetworkReady() {
LOG_ERROR_TAG("load failed, %s", _loadContext[i].errMessage.c_str());
}
}
// devices loaded successfully in CTPUT
if (_pCTPUTLoadContext) {
int nLoadSucNums = 0;
for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
// check if device loaded successfully
if (_pCTPUTLoadContext[i].isAlready) {
nLoadSucNums++;
}
}
// one or more devices loaded successfully
if (nLoadSucNums > 0) {
return;
}
}
IE_THROW() << GetLogTag() << "load all devices failed";
}
@ -560,6 +677,10 @@ bool AutoSchedule::ScheduleToWorkerInferRequest(IE::Task inferPipelineTask, Devi
std::vector<DeviceInformation> devices;
// AUTO work mode
if (!preferred_device.empty()) {
if (_pCTPUTLoadContext) {
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
devices = _autoSContext->_devicePriorities;
} else {
// if the device needed by customer is not ready, need to wait for it
WaitActualNetworkReady();
// the preferred_device should be the selected device in AUTO work mode
@ -567,6 +688,14 @@ bool AutoSchedule::ScheduleToWorkerInferRequest(IE::Task inferPipelineTask, Devi
IE_THROW(NotFound) << "The preferred device should be the selected device";
}
devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
}
} else {
if (_pCTPUTLoadContext) {
// Devices that fail infer will be removed from the priority list in the callback, need lock here
std::lock_guard<std::mutex> lock(_autoSContext->_fallbackMutex);
for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) {
devices.push_back(_autoSContext->_devicePriorities[i]);
}
} else {
// _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU
if (_loadContext[FALLBACKDEVICE].isAlready) {
@ -583,6 +712,10 @@ bool AutoSchedule::ScheduleToWorkerInferRequest(IE::Task inferPipelineTask, Devi
}
}
}
}
if (devices.size() == 0) {
IE_THROW(GeneralError) << "No device to run pipeline task";
}
for (auto&& device : devices) {
if (!preferred_device.empty() && (device.deviceName != preferred_device)) {
continue;
@ -644,27 +777,12 @@ IInferPtr AutoSchedule::CreateInferRequest() {
if (!syncRequestImpl)
syncRequestImpl = CreateInferRequestImpl(execNetwork->_networkInputs, execNetwork->_networkOutputs);
syncRequestImpl->setPointerToExecutableNetworkInternal(execNetwork);
bool isCumulative = (_autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) ? true : false;
bool isCTPUTSingleDevice =
isCumulative && _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("MULTI:") == std::string::npos ? true
: false;
if ((_passthroughExeNet && !isCumulative) || isCTPUTSingleDevice) {
std::string perfmode;
try {
perfmode = _passthroughExeNet->GetConfig(
CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>();
} catch (const IE::Exception&) {
LOG_INFO("query perf hint from passthrough network failed");
}
if (_autoSContext->_batchingDisabled || perfmode != CONFIG_VALUE(THROUGHPUT)) {
syncRequestImpl->setPointerToSo(_passthroughExeNet._so);
} else {
if (_passthroughExeNet) {
auto so = _passthroughExeNet._ptr->GetPointerToSo();
// Get the _so from passthrough executable network when batch plugin is disable.
if (!so)
so = _passthroughExeNet._so;
syncRequestImpl->setPointerToSo(so);
}
} else if (std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest()) {
// cumulative case, load to MULTI:*
auto sharedMultiRequest = std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest();

View File

@ -50,6 +50,7 @@ public:
public:
AutoLoadContext _loadContext[CONTEXTNUM];
std::unique_ptr<AutoLoadContext[]> _pCTPUTLoadContext = nullptr;
protected:
void GenerateWorkers(const std::string& device, const SoExecNetwork& executableNetwork) override;
@ -60,7 +61,7 @@ protected:
private:
void WaitFirstNetworkReady();
void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network);
void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative);
bool selectOtherDevice(const std::string& currentDeviceName);
IE::Task releaseActualdeviceTask;

View File

@ -307,22 +307,11 @@ IInferPtr MultiSchedule::CreateInferRequest() {
syncRequestImpl = CreateInferRequestImpl(execNetwork->_networkInputs, execNetwork->_networkOutputs);
syncRequestImpl->setPointerToExecutableNetworkInternal(execNetwork);
if (_passthroughExeNet) {
std::string perfmode;
try {
perfmode = _passthroughExeNet->GetConfig(
CONFIG_KEY(PERFORMANCE_HINT)).as<std::string>();
} catch (const IE::Exception&) {
LOG_INFO("query perf hint from passthrough network failed");
}
if (_multiSContext->_batchingDisabled || perfmode != CONFIG_VALUE(THROUGHPUT)) {
syncRequestImpl->setPointerToSo(_passthroughExeNet._so);
} else {
auto so = _passthroughExeNet._ptr->GetPointerToSo();
// Get the _so from passthrough executable network when batch plugin is disable.
if (!so)
so = _passthroughExeNet._so;
syncRequestImpl->setPointerToSo(so);
}
} else if (_multiSContext->_bindBuffer) {
auto sharedRequest = std::static_pointer_cast<MultiDeviceInferRequest>(syncRequestImpl)->GetSharedRequest();
if (sharedRequest._ptr->getPointerToSo())

View File

@ -6,7 +6,7 @@
#include <string>
#include <vector>
#include <thread>
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph/function.hpp"
#include "ngraph_functions/subgraph_builders.hpp"
@ -90,9 +90,11 @@ protected:
} else {
m_extList.push_back(ext);
}
std::replace(test_name.begin(), test_name.end(), '/', '_');
std::replace(test_name.begin(), test_name.end(), '\\', '_');
cache_path = "LoadNetwork" + test_name + "_cache";
auto hash = std::hash<std::string>()(test_name);
std::stringstream ss;
ss << std::this_thread::get_id();
cache_path = "LoadNetwork" + std::to_string(hash) + "_"
+ ss.str() + "_" + GetTimestamp() + "_cache";
}
void TearDown() override {
APIBaseTest::TearDown();

View File

@ -515,9 +515,11 @@ void CompiledKernelsCacheTest::SetUp() {
} else {
m_extList.push_back(ext);
}
std::replace(test_name.begin(), test_name.end(), '/', '_');
std::replace(test_name.begin(), test_name.end(), '\\', '_');
cache_path = "compiledModel" + test_name + "_cache";
auto hash = std::hash<std::string>()(test_name);
std::stringstream ss;
ss << std::this_thread::get_id();
cache_path = "compiledModel" + std::to_string(hash) + "_"
+ ss.str() + "_" + GetTimestamp() + "_cache";
}
void CompiledKernelsCacheTest::TearDown() {

View File

@ -200,12 +200,6 @@ TEST_P(LoadNetworkWithCTPUTMockTest, CTPUTSingleDevLogicTest) {
::testing::Matcher<const std::map<std::string, std::string>&>(
ComparePerfHint(InferenceEngine::PluginConfigParams::THROUGHPUT))))
.Times(1);
// no MULTI logic to be called
EXPECT_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>("MULTI:" + targetDevice),
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
.Times(0);
// if target device only has GPU, no CPU helper to be called
if (targetDevice.find("GPU") != std::string::npos) {
EXPECT_CALL(*core,
@ -220,14 +214,14 @@ TEST_P(LoadNetworkWithCTPUTMockTest, CTPUTSingleDevLogicTest) {
for (auto& deviceName : targetDevices) {
targetDev += deviceName;
targetDev += ((deviceName == targetDevices.back()) ? "" : ",");
}
config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
// Call MULTI logic
EXPECT_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>("MULTI:" + targetDev),
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
::testing::Matcher<const std::string&>(deviceName),
::testing::Matcher<const std::map<std::string, std::string>&>(
ComparePerfHint(InferenceEngine::PluginConfigParams::THROUGHPUT))))
.Times(1);
}
config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
// no CPU helper to be called
EXPECT_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),

View File

@ -230,6 +230,8 @@ public:
}
};
using AutoCTPUTRuntimeFallback = AutoRuntimeFallback;
TEST_P(AutoRuntimeFallback, releaseResource) {
std::string targetDev;
std::vector<std::tuple<std::string, bool>> targetDevices;
@ -362,3 +364,93 @@ const std::vector<ConfigParams> testConfigs = {
INSTANTIATE_TEST_SUITE_P(smoke_AutoRuntimeFallback, AutoRuntimeFallback,
::testing::ValuesIn(testConfigs),
AutoRuntimeFallback::getTestCaseName);
TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) {
std::string targetDev;
std::vector<std::tuple<std::string, bool>> targetDevices; //std::tuple<deviceName, will infer throw exception>
int loadNetworkNum;
bool enableRumtimeFallback;
bool expectThrow;
bool loadNetworkFail;
bool generateWorkersFail;
std::tie(targetDevices, loadNetworkNum, enableRumtimeFallback, expectThrow, loadNetworkFail, generateWorkersFail) = this->GetParam();
if (loadNetworkFail) {
ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(StrEq("GPU.1")),
::testing::Matcher<const Config&>(_))).WillByDefault(Throw(InferenceEngine::GeneralError{""}));
}
for (auto& deviceInfo : targetDevices) {
std::string deviceName;
bool ifThrow;
std::tie(deviceName, ifThrow) = deviceInfo;
targetDev += deviceName;
targetDev += ((deviceInfo == targetDevices.back()) ? "" : ",");
if (deviceName == "CPU") {
mockInferrequest = std::make_shared<mockAsyncInferRequest>(
inferReqInternal, mockExecutor, nullptr, ifThrow);
ON_CALL(*mockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(mockInferrequest));
} else if (deviceName == "GPU.0") {
mockInferrequestGPU_0 = std::make_shared<mockAsyncInferRequest>(
inferReqInternalGPU_0, mockExecutorGPU_0, nullptr, ifThrow);
ON_CALL(*mockIExeNetGPU_0.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
std::this_thread::sleep_for(std::chrono::milliseconds(0));
return mockInferrequestGPU_0; }));
} else if (deviceName == "GPU.1") {
if (generateWorkersFail) {
mockInferrequestGPU_1 =
std::make_shared<mockAsyncInferRequest>(inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow);
ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest())
.WillByDefault(Throw(InferenceEngine::GeneralError{""}));
} else {
mockInferrequestGPU_1 =
std::make_shared<mockAsyncInferRequest>(inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow);
ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() {
std::this_thread::sleep_for(std::chrono::milliseconds(0));
return mockInferrequestGPU_1;
}));
}
} else {
return;
}
}
plugin->SetName("AUTO");
config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev});
config.insert({InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT,
InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT});
if (!enableRumtimeFallback) {
config.insert({{"ENABLE_RUNTIME_FALLBACK", "NO"}});
}
EXPECT_CALL(*core,
LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
::testing::Matcher<const std::string&>(_),
::testing::Matcher<const std::map<std::string, std::string>&>(_)))
.Times(loadNetworkNum);
std::shared_ptr<InferenceEngine::IExecutableNetworkInternal> exeNetwork;
std::shared_ptr<IInferRequestInternal> infer_request;
ASSERT_NO_THROW(exeNetwork = plugin->LoadExeNetworkImpl(cnnNet, config));
ASSERT_NO_THROW(infer_request = exeNetwork->CreateInferRequest());
if (expectThrow) {
EXPECT_THROW(infer_request->Infer(), IE::Exception);
} else {
ASSERT_NO_THROW(infer_request->Infer());
}
}
// ConfigParams: targetDevices(deviceName, will infer throw exception), loadNetworkNum, enableRumtimeFallback,
// expectThrow, loadNetworkFail, generateWorkersFail
const std::vector<ConfigParams> testCtputConfigs = {
ConfigParams{{{"CPU", false}, {"GPU.0", true}, {"GPU.1", true}}, 3, true, false, false, false},
ConfigParams{{{"CPU", true}, {"GPU.0", false}, {"GPU.1", true}}, 3, true, false, false, false},
ConfigParams{{{"CPU", true}, {"GPU.0", true}, {"GPU.1", true}}, 3, true, true, false, false},
// disable RumtimeFallback
ConfigParams{{{"CPU", false}, {"GPU.0", false}, {"GPU.1", false}}, 3, false, false, false, false},
ConfigParams{{{"CPU", true}, {"GPU.0", false}, {"GPU.1", false}}, 3, false, true, false, false},
};
INSTANTIATE_TEST_SUITE_P(smoke_AutoCTPUTRuntimeFallback,
AutoCTPUTRuntimeFallback,
::testing::ValuesIn(testCtputConfigs),
AutoCTPUTRuntimeFallback::getTestCaseName);