diff --git a/src/plugins/auto/auto_executable_network.cpp b/src/plugins/auto/auto_executable_network.cpp index 48de040c398..cd83a1beca9 100644 --- a/src/plugins/auto/auto_executable_network.cpp +++ b/src/plugins/auto/auto_executable_network.cpp @@ -64,9 +64,8 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const { return decltype(ov::device::priorities)::value_type {value->second.as()}; } else if (name == ov::device::properties) { ov::AnyMap all_devices = {}; - if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) { - ov::AnyMap device_properties = {}; - auto& context = _autoSchedule->_loadContext[ACTUALDEVICE]; + auto get_device_supported_metrics = [&all_devices] (const AutoLoadContext& context) { + ov::AnyMap device_properties = {}; auto device_supported_metrics = context.executableNetwork->GetMetric(METRIC_KEY(SUPPORTED_METRICS)); for (auto&& property_name : device_supported_metrics.as>()) { device_properties[property_name] = context.executableNetwork->GetMetric(property_name); @@ -76,6 +75,26 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const { device_properties[property_name] = context.executableNetwork->GetConfig(property_name); } all_devices[context.deviceInfo.deviceName] = device_properties; + }; + if (_autoSchedule->_pCTPUTLoadContext) { + // need lock for inference failure + std::lock_guard lock(_autoSContext->_fallbackMutex); + auto load_count = _autoSContext->_devicePriorities.size(); + for (size_t i = 0; i < load_count; i++) + get_device_supported_metrics(_autoSchedule->_pCTPUTLoadContext[i]); + } else { + { + std::lock_guard lock(_autoSContext->_fallbackMutex); + if (_autoSchedule->_loadContext[FALLBACKDEVICE].isAlready) { + get_device_supported_metrics(_autoSchedule->_loadContext[FALLBACKDEVICE]); + } + } + std::lock_guard lock(_autoSContext->_confMutex); + if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) { + get_device_supported_metrics(_autoSchedule->_loadContext[ACTUALDEVICE]); + } else { + get_device_supported_metrics(_autoSchedule->_loadContext[CPU]); + } } return all_devices; } else if (name == ov::hint::model_priority) { @@ -91,6 +110,24 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const { const unsigned int defaultNumForTPUT = 4u; const unsigned int defaultNumForLatency = 1u; unsigned int real = 0; + if (_autoSchedule->_pCTPUTLoadContext) { + std::lock_guard lock(_autoSContext->_fallbackMutex); + unsigned int res = 0u; + auto load_count = _autoSContext->_devicePriorities.size(); + for (size_t i = 0; i < load_count; i++) { + try { + res += (_autoSchedule->_pCTPUTLoadContext[i]).executableNetwork->GetMetric( + METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as(); + } catch (const IE::Exception& iie) { + IE_THROW() + << "Every device used in cumulative mode should " + << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. " + << "Failed to query the metric for with error:" << + iie.what(); + } + } + return decltype(ov::optimal_number_of_infer_requests)::value_type {res}; + } if (_autoSchedule->_loadContext[ACTUALDEVICE].isAlready) { real = _autoSchedule->_loadContext[ACTUALDEVICE]. executableNetwork->GetMetric(name).as(); @@ -181,12 +218,13 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const { exeDevices.push_back(ExeDevicesString); execution_devices = decltype(ov::execution_devices)::value_type {exeDevices}; }; - if (_autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) { - try { - execution_devices = _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name); - } catch(const IE::Exception&) { - GetExecutionDevices(_autoSchedule->_loadContext[ACTUALDEVICE].workName); + if (_autoSchedule->_pCTPUTLoadContext) { + std::vector exeDevices = {}; + std::lock_guard lock(_autoSContext->_confMutex); + for (auto n : _autoSContext->_devicePriorities) { + exeDevices.push_back(n.deviceName); } + execution_devices = decltype(ov::execution_devices)::value_type {exeDevices}; } else { std::lock_guard lock(_autoSContext->_confMutex); for (int i = 0; i < CONTEXTNUM; i++) { @@ -203,9 +241,13 @@ IE::Parameter AutoExecutableNetwork::GetMetric(const std::string& name) const { return execution_devices; } else if (name == ov::model_name) { std::lock_guard lock(_autoSContext->_confMutex); - if (_autoSchedule->_loadContext[CPU].isEnabled && _autoSchedule->_loadContext[CPU].isAlready) - return _autoSchedule->_loadContext[CPU].executableNetwork->GetMetric(name); - return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name); + if (_autoSchedule->_pCTPUTLoadContext) { + return _autoSchedule->_pCTPUTLoadContext[0].executableNetwork->GetMetric(name); + } else { + if (_autoSchedule->_loadContext[CPU].isEnabled && _autoSchedule->_loadContext[CPU].isAlready) + return _autoSchedule->_loadContext[CPU].executableNetwork->GetMetric(name); + return _autoSchedule->_loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name); + } } else if (name == METRIC_KEY(SUPPORTED_METRICS)) { IE_SET_METRIC_RETURN(SUPPORTED_METRICS, {METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS), diff --git a/src/plugins/auto/auto_schedule.cpp b/src/plugins/auto/auto_schedule.cpp index c645334c2d3..7e5a85809b8 100644 --- a/src/plugins/auto/auto_schedule.cpp +++ b/src/plugins/auto/auto_schedule.cpp @@ -3,7 +3,6 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// - #include "auto_schedule.hpp" #include "async_infer_request.hpp" #include "auto_executable_network.hpp" @@ -183,6 +182,27 @@ bool AutoSchedule::selectOtherDevice(const std::string& currentDeviceName) { return getExecutionDevices(_loadContext[FALLBACKDEVICE].deviceInfo.deviceName.c_str()); } }; + + auto removeInferFailDevice = [&](const std::string& deviceName) { + if (_autoSContext->_devicePriorities.size() > 1) { + const auto CurrentDeviceIter = + std::find_if(_autoSContext->_devicePriorities.begin(), + _autoSContext->_devicePriorities.end(), + [=](const DeviceInformation& d) -> bool { + return d.deviceName.find(deviceName) != std::string::npos; + }); + if (CurrentDeviceIter != _autoSContext->_devicePriorities.end()) { + _autoSContext->_devicePriorities.erase(CurrentDeviceIter); + return true; + } + } + return false; + }; + + if (_pCTPUTLoadContext) { + return removeInferFailDevice(currentDeviceName); + } + return getExecutionDevices(currentDeviceName); } } @@ -217,30 +237,40 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) { std::list validDevices = _autoSContext->_plugin->GetValidDevice(_autoSContext->_devicePriorities, _loadContext[ACTUALDEVICE].networkPrecision); + // When the hint is ctput and there is only one device, the single-device logic is used if (validDevices.size() == 1) { - // When the hint is ctput and there is only one device, the single-device logic is used instead of - // the MULTI logic - // can not change _autoSContext->_performanceHint to THROUGHPUT, because GetMetric needs to return CTPUT _loadContext[ACTUALDEVICE].deviceInfo = validDevices.front(); _loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] = IE::PluginConfigParams::THROUGHPUT; - isCumulative = false; - } else { - // When the hint is ctput and there are more than one device, the MULTI logic is used - std::string deviceName = "MULTI:"; + } else if (validDevices.size() > 1) { + _loadContext[ACTUALDEVICE].isEnabled = false; + _autoSContext->_devicePriorities.clear(); + std::copy(std::begin(validDevices), + std::end(validDevices), + std::back_inserter(_autoSContext->_devicePriorities)); + // Total number of devices in CTPUT + auto nCTputDeviceNums = validDevices.size(); + // Generate contexts for loading each device + _pCTPUTLoadContext.reset(new AutoLoadContext[nCTputDeviceNums]); + int idx = 0; + DeviceInformation cpuDeviceInformation; for (auto& device : validDevices) { - deviceName += device.deviceName; - deviceName += ((device.deviceName == validDevices.back().deviceName) ? "" : ","); + if (device.deviceName.find("CPU") == std::string::npos) { + _pCTPUTLoadContext[idx].deviceInfo = device; + _pCTPUTLoadContext[idx].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] = + IE::PluginConfigParams::THROUGHPUT; + idx++; + } else { + cpuDeviceInformation = device; + cpuDeviceInformation.config.insert( + {ov::affinity.name(), ov::Any(ov::Affinity::CORE).as()}); + } + } + if (!cpuDeviceInformation.deviceName.empty()) { + _pCTPUTLoadContext[idx].deviceInfo = cpuDeviceInformation; + _pCTPUTLoadContext[idx].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] = + IE::PluginConfigParams::THROUGHPUT; } - _loadContext[ACTUALDEVICE].deviceInfo.deviceName = deviceName; - _loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] = - InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT; - _loadContext[ACTUALDEVICE].deviceInfo.config[CONFIG_KEY(PERF_COUNT)] = - _autoSContext->_needPerfCounters ? InferenceEngine::PluginConfigParams::YES - : InferenceEngine::PluginConfigParams::NO; - if (_autoSContext->_bindBuffer) - _loadContext[ACTUALDEVICE].deviceInfo.config[ov::intel_auto::device_bind_buffer.name()] = - InferenceEngine::PluginConfigParams::YES; } } else { _loadContext[ACTUALDEVICE].deviceInfo = @@ -248,76 +278,127 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) { _loadContext[ACTUALDEVICE].networkPrecision, _autoSContext->_modelPriority); } - LOG_INFO_TAG("select device:%s", _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str()); - bool isActualDevCPU = - _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") !=std::string::npos && !isCumulative; - // if Actual device is CPU or perf_hint is cumulative, disabled _loadContext[CPU], only use _loadContext[ACTUALDEVICE] - if (isActualDevCPU || isCumulative || !_autoSContext->_startupfallback) { - _loadContext[CPU].isEnabled = false; - } else { - const auto CPUIter = std::find_if(_autoSContext->_devicePriorities.begin(), _autoSContext->_devicePriorities.end(), - [=](const DeviceInformation& d) -> bool { return d.deviceName.find("CPU") != std::string::npos; }); - // if have CPU Device, enable _loadContext[CPU] - if (CPUIter != _autoSContext->_devicePriorities.end()) { - _loadContext[CPU].isEnabled = true; - _loadContext[CPU].deviceInfo = *CPUIter; - _loadContext[CPU].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] = IE::PluginConfigParams::LATENCY; - _loadContext[CPU].workName = "CPU_HELP"; - LOG_INFO_TAG("will load CPU for accelerator"); - } else { - _loadContext[CPU].isEnabled = false; - } - } - // initialize the rest members of load context - for (int i = 0; i < CONTEXTNUM; i++) { - if (_loadContext[i].isEnabled) { - _loadContext[i].future = _loadContext[i].promise.get_future(); - auto* contextPtr = &_loadContext[i]; - auto modelPath = _autoSContext->_modelPath; - auto network = _autoSContext->_network; - _loadContext[i].task = [this, contextPtr, modelPath, network, isCumulative]() mutable { - TryToLoadNetWork(*contextPtr, modelPath, network); - if (contextPtr->isLoadSuccess) { - if (contextPtr->workName.empty()) { - contextPtr->workName = contextPtr->deviceInfo.deviceName; - } - if (!isCumulative) - GenerateWorkers(contextPtr->workName, contextPtr->executableNetwork); - //need lock - { - std::lock_guard lock(_autoSContext->_confMutex); - _autoSContext->_config.insert(contextPtr->deviceInfo.config.begin(), contextPtr->deviceInfo.config.end()); - } - contextPtr->isAlready = true; - // reloadsuccess flag only for _loadContext[FALLBACKDEVICE] - contextPtr->isReloadSuccess = true; - auto& deviceName = contextPtr->deviceInfo.deviceName; - LOG_INFO_TAG("device:%s loading Network finished", deviceName.c_str()); - if (!isCumulative) { - auto supported_config_keys = - _autoSContext->_core->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS)) - .as>(); - DEBUG_RUN([this, &contextPtr, &deviceName, &supported_config_keys] { - std::lock_guard lock(_autoSContext->_confMutex); - for (const auto& cfg : supported_config_keys) { - try { - LOG_DEBUG_TAG( - "device:%s, GetConfig:%s=%s", - deviceName.c_str(), - cfg.c_str(), - contextPtr->executableNetwork->GetConfig(cfg).as().c_str()); - } catch (const IE::Exception&) { - } - } - }); + + auto loadDeviceTask = [&](AutoLoadContext* contextPtr, + const std::string& modelPath, + const IE::CNNNetwork& network, + bool isCumulative) { + TryToLoadNetWork(*contextPtr, modelPath, network, isCumulative); + if (contextPtr->isLoadSuccess) { + if (contextPtr->workName.empty()) { + contextPtr->workName = contextPtr->deviceInfo.deviceName; + } + GenerateWorkers(contextPtr->workName, contextPtr->executableNetwork); + // need lock + { + std::lock_guard lock(_autoSContext->_confMutex); + _autoSContext->_config.insert(contextPtr->deviceInfo.config.begin(), + contextPtr->deviceInfo.config.end()); + } + contextPtr->isAlready = true; + // reloadsuccess flag only for _loadContext[FALLBACKDEVICE] + contextPtr->isReloadSuccess = true; + auto& deviceName = contextPtr->deviceInfo.deviceName; + LOG_INFO_TAG("device:%s loading Network finished", deviceName.c_str()); + auto supported_config_keys = _autoSContext->_core->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS)) + .as>(); + DEBUG_RUN([this, &contextPtr, &deviceName, &supported_config_keys] { + std::lock_guard lock(_autoSContext->_confMutex); + for (const auto& cfg : supported_config_keys) { + try { + LOG_DEBUG_TAG("device:%s, GetConfig:%s=%s", + deviceName.c_str(), + cfg.c_str(), + contextPtr->executableNetwork->GetConfig(cfg).as().c_str()); + } catch (const IE::Exception&) { } } - contextPtr->promise.set_value(); - // the first load network process finished - std::call_once(_firstLoadOC, [this]() { - _firstLoadPromise.set_value(); - }); - }; + }); + } + // Handle device load failure in case of ctput + if (isCumulative && !contextPtr->isLoadSuccess) { + std::string failedDeviceName = contextPtr->deviceInfo.deviceName; + std::lock_guard lock(_autoSContext->_confMutex); + const auto DeviceIter = + std::find_if(_autoSContext->_devicePriorities.begin(), + _autoSContext->_devicePriorities.end(), + [&](const DeviceInformation& d) -> bool { + return d.deviceName.find(failedDeviceName) != std::string::npos; + }); + // Remove failed device from _devicePriorities + if (DeviceIter != _autoSContext->_devicePriorities.end()) { + _autoSContext->_devicePriorities.erase(DeviceIter); + } + // Remove failed device from ov::device::priorities in config + auto it_prior = _autoSContext->_config.find(ov::device::priorities.name()); + if (it_prior != _autoSContext->_config.end()) { + auto priorities = it_prior->second.as(); + size_t nPos = priorities.find(failedDeviceName); + if (nPos != std::string::npos) { + // If need to delete failed device and "," then length plus 1 + size_t nNameLen = (nPos + failedDeviceName.length()) == priorities.length() + ? failedDeviceName.length() + : failedDeviceName.length() + 1; + priorities.erase(nPos, nNameLen); + it_prior->second = priorities; + } + } + } + contextPtr->promise.set_value(); + // the first load network process finished + std::call_once(_firstLoadOC, [this]() { + _firstLoadPromise.set_value(); + }); + }; + if (_loadContext[ACTUALDEVICE].isEnabled) { + LOG_INFO_TAG("select device:%s", _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str()); + bool isActualDevCPU = _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") != std::string::npos; + // if Actual device is CPU or perf_hint is cumulative, disabled _loadContext[CPU], only use + // _loadContext[ACTUALDEVICE] + if (isActualDevCPU || !_autoSContext->_startupfallback) { + _loadContext[CPU].isEnabled = false; + } else { + const auto CPUIter = std::find_if(_autoSContext->_devicePriorities.begin(), + _autoSContext->_devicePriorities.end(), + [](const DeviceInformation& d) -> bool { + return d.deviceName.find("CPU") != std::string::npos; + }); + // if have CPU Device, enable _loadContext[CPU] + if (CPUIter != _autoSContext->_devicePriorities.end()) { + _loadContext[CPU].isEnabled = true; + _loadContext[CPU].deviceInfo = *CPUIter; + _loadContext[CPU].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] = IE::PluginConfigParams::LATENCY; + _loadContext[CPU].workName = "CPU_HELP"; + LOG_INFO_TAG("will load CPU for accelerator"); + } else { + _loadContext[CPU].isEnabled = false; + } + } + // initialize the rest members of load context + for (int i = 0; i < CONTEXTNUM; i++) { + if (_loadContext[i].isEnabled) { + _loadContext[i].future = _loadContext[i].promise.get_future(); + auto* contextPtr = &_loadContext[i]; + auto modelPath = _autoSContext->_modelPath; + auto network = _autoSContext->_network; + _loadContext[i].task = std::bind(loadDeviceTask, contextPtr, modelPath, network, isCumulative); + } + } + } + std::vector otherDevicesloads; + std::vector cpuLoads; + if (_pCTPUTLoadContext) { + for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) { + auto* contextPtr = &_pCTPUTLoadContext[i]; + auto modelPath = _autoSContext->_modelPath; + auto network = _autoSContext->_network; + _pCTPUTLoadContext[i].task = std::bind(loadDeviceTask, contextPtr, modelPath, network, isCumulative); + if (i == _autoSContext->_devicePriorities.size() - 1 && + _pCTPUTLoadContext[i].deviceInfo.deviceName.find("CPU") != std::string::npos) { + cpuLoads.push_back(_pCTPUTLoadContext[i].task); + } else { + otherDevicesloads.push_back(_pCTPUTLoadContext[i].task); + } } } OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, @@ -350,13 +431,11 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) { _loadContext[CPU].future.wait(); // clean up helper infer requests // first, wait for all the remaining requests to finish - if (!_autoSContext->_runtimeFallback) { - for (auto& iter : _workerRequests["CPU_HELP"]) { - try { - iter._inferRequest._ptr->Wait(IE::InferRequest::WaitMode::RESULT_READY); - } catch (const IE::Exception& iie) { - LOG_DEBUG_TAG("No infer results expected, infer in CPU_HELP throw some errors: %s", iie.what()); - } + for (auto& iter : _workerRequests["CPU_HELP"]) { + try { + iter._inferRequest._ptr->Wait(IE::InferRequest::WaitMode::RESULT_READY); + } catch (const IE::Exception& iie) { + LOG_DEBUG_TAG("No infer results expected, infer in CPU_HELP throw some errors: %s", iie.what()); } } // late enough to check the idle queue now @@ -411,14 +490,38 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) { } _loadContext[ACTUALDEVICE].task(); } else { - // only one device need to load network, do not need to load it async - _loadContext[ACTUALDEVICE].task(); - _passthroughExeNet = _loadContext[ACTUALDEVICE].executableNetwork; + if (_pCTPUTLoadContext) { + for (auto&& device : _autoSContext->_devicePriorities) { + // initialize containers before run async task, if not initialized, it will hang during infer + _idleWorkerRequests[device.deviceName]; + _workerRequests[device.deviceName]; + _inferPipelineTasksDeviceSpecific[device.deviceName] = nullptr; + } + _executor = _autoSContext->_plugin->executorManager()->getIdleCPUStreamsExecutor(IStreamsExecutor::Config{ + "CTPUTDeviceAsyncLoad", + static_cast(std::thread::hardware_concurrency()) /* max possible #streams*/, + 0 /*default threads per stream, workaround for ticket 62376*/, + IStreamsExecutor::ThreadBindingType::NONE}); + // load devices other than CPU first + if (otherDevicesloads.size() > 0) { + // Wait for the devices other than CPU to load the network + _executor->runAndWait(otherDevicesloads); + } + // Finally load the CPU + if (cpuLoads.size() > 0) { + // Wait for CPU to load the network + _executor->runAndWait(cpuLoads); + } + } else { + // only one device need to load network, do not need to load it async + _loadContext[ACTUALDEVICE].task(); + _passthroughExeNet = _loadContext[ACTUALDEVICE].executableNetwork; + } } WaitFirstNetworkReady(); } -void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network) { +void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative) { auto& device = context.deviceInfo.deviceName; auto& deviceConfig = context.deviceInfo.config; auto& deviceList = context.metaDevices; @@ -458,7 +561,7 @@ void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string& context.errMessage += device + ":" + e.what(); context.isLoadSuccess = false; } - if (context.isLoadSuccess || curDevIsCPU) { + if (context.isLoadSuccess || curDevIsCPU || isCumulative) { return; } // need to reload network, unregister it's priority @@ -512,7 +615,7 @@ void AutoSchedule::TryToLoadNetWork(AutoLoadContext& context, const std::string& } LOG_DEBUG_TAG("try to load %s", context.deviceInfo.deviceName.c_str()); // try to load this candidate device - TryToLoadNetWork(context, modelPath, network); + TryToLoadNetWork(context, modelPath, network, isCumulative); } void AutoSchedule::WaitFirstNetworkReady() { @@ -542,6 +645,20 @@ void AutoSchedule::WaitFirstNetworkReady() { LOG_ERROR_TAG("load failed, %s", _loadContext[i].errMessage.c_str()); } } + // devices loaded successfully in CTPUT + if (_pCTPUTLoadContext) { + int nLoadSucNums = 0; + for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) { + // check if device loaded successfully + if (_pCTPUTLoadContext[i].isAlready) { + nLoadSucNums++; + } + } + // one or more devices loaded successfully + if (nLoadSucNums > 0) { + return; + } + } IE_THROW() << GetLogTag() << "load all devices failed"; } @@ -560,29 +677,45 @@ bool AutoSchedule::ScheduleToWorkerInferRequest(IE::Task inferPipelineTask, Devi std::vector devices; // AUTO work mode if (!preferred_device.empty()) { - // if the device needed by customer is not ready, need to wait for it - WaitActualNetworkReady(); - // the preferred_device should be the selected device in AUTO work mode - if (preferred_device != _loadContext[ACTUALDEVICE].deviceInfo.deviceName) { - IE_THROW(NotFound) << "The preferred device should be the selected device"; - } - devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo); - } else { - // _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU - if (_loadContext[FALLBACKDEVICE].isAlready) { - devices.push_back(_loadContext[FALLBACKDEVICE].deviceInfo); + if (_pCTPUTLoadContext) { + std::lock_guard lock(_autoSContext->_fallbackMutex); + devices = _autoSContext->_devicePriorities; } else { - if (_loadContext[ACTUALDEVICE].isAlready) { - devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo); + // if the device needed by customer is not ready, need to wait for it + WaitActualNetworkReady(); + // the preferred_device should be the selected device in AUTO work mode + if (preferred_device != _loadContext[ACTUALDEVICE].deviceInfo.deviceName) { + IE_THROW(NotFound) << "The preferred device should be the selected device"; + } + devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo); + } + } else { + if (_pCTPUTLoadContext) { + // Devices that fail infer will be removed from the priority list in the callback, need lock here + std::lock_guard lock(_autoSContext->_fallbackMutex); + for (size_t i = 0; i < _autoSContext->_devicePriorities.size(); i++) { + devices.push_back(_autoSContext->_devicePriorities[i]); + } + } else { + // _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU + if (_loadContext[FALLBACKDEVICE].isAlready) { + devices.push_back(_loadContext[FALLBACKDEVICE].deviceInfo); } else { - // replace deviceName with workName, so schedule can select correct - // idleWorkerQueue - auto deviceInfo = _loadContext[CPU].deviceInfo; - deviceInfo.deviceName = _loadContext[CPU].workName; - devices.push_back(std::move(deviceInfo)); + if (_loadContext[ACTUALDEVICE].isAlready) { + devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo); + } else { + // replace deviceName with workName, so schedule can select correct + // idleWorkerQueue + auto deviceInfo = _loadContext[CPU].deviceInfo; + deviceInfo.deviceName = _loadContext[CPU].workName; + devices.push_back(std::move(deviceInfo)); + } } } } + if (devices.size() == 0) { + IE_THROW(GeneralError) << "No device to run pipeline task"; + } for (auto&& device : devices) { if (!preferred_device.empty() && (device.deviceName != preferred_device)) { continue; @@ -644,27 +777,12 @@ IInferPtr AutoSchedule::CreateInferRequest() { if (!syncRequestImpl) syncRequestImpl = CreateInferRequestImpl(execNetwork->_networkInputs, execNetwork->_networkOutputs); syncRequestImpl->setPointerToExecutableNetworkInternal(execNetwork); - bool isCumulative = (_autoSContext->_performanceHint == IE::PluginConfigParams::CUMULATIVE_THROUGHPUT) ? true : false; - bool isCTPUTSingleDevice = - isCumulative && _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("MULTI:") == std::string::npos ? true - : false; - if ((_passthroughExeNet && !isCumulative) || isCTPUTSingleDevice) { - std::string perfmode; - try { - perfmode = _passthroughExeNet->GetConfig( - CONFIG_KEY(PERFORMANCE_HINT)).as(); - } catch (const IE::Exception&) { - LOG_INFO("query perf hint from passthrough network failed"); - } - if (_autoSContext->_batchingDisabled || perfmode != CONFIG_VALUE(THROUGHPUT)) { - syncRequestImpl->setPointerToSo(_passthroughExeNet._so); - } else { - auto so = _passthroughExeNet._ptr->GetPointerToSo(); - // Get the _so from passthrough executable network when batch plugin is disable. - if (!so) - so = _passthroughExeNet._so; - syncRequestImpl->setPointerToSo(so); - } + if (_passthroughExeNet) { + auto so = _passthroughExeNet._ptr->GetPointerToSo(); + // Get the _so from passthrough executable network when batch plugin is disable. + if (!so) + so = _passthroughExeNet._so; + syncRequestImpl->setPointerToSo(so); } else if (std::static_pointer_cast(syncRequestImpl)->GetSharedRequest()) { // cumulative case, load to MULTI:* auto sharedMultiRequest = std::static_pointer_cast(syncRequestImpl)->GetSharedRequest(); diff --git a/src/plugins/auto/auto_schedule.hpp b/src/plugins/auto/auto_schedule.hpp index 6e08a94005a..08be8e5a5e5 100644 --- a/src/plugins/auto/auto_schedule.hpp +++ b/src/plugins/auto/auto_schedule.hpp @@ -50,6 +50,7 @@ public: public: AutoLoadContext _loadContext[CONTEXTNUM]; + std::unique_ptr _pCTPUTLoadContext = nullptr; protected: void GenerateWorkers(const std::string& device, const SoExecNetwork& executableNetwork) override; @@ -60,7 +61,7 @@ protected: private: void WaitFirstNetworkReady(); - void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network); + void TryToLoadNetWork(AutoLoadContext& context, const std::string& modelPath, const IE::CNNNetwork& network, bool isCumulative); bool selectOtherDevice(const std::string& currentDeviceName); IE::Task releaseActualdeviceTask; diff --git a/src/plugins/auto/multi_schedule.cpp b/src/plugins/auto/multi_schedule.cpp index 888c25095bd..b2dd7097587 100644 --- a/src/plugins/auto/multi_schedule.cpp +++ b/src/plugins/auto/multi_schedule.cpp @@ -307,26 +307,15 @@ IInferPtr MultiSchedule::CreateInferRequest() { syncRequestImpl = CreateInferRequestImpl(execNetwork->_networkInputs, execNetwork->_networkOutputs); syncRequestImpl->setPointerToExecutableNetworkInternal(execNetwork); if (_passthroughExeNet) { - std::string perfmode; - try { - perfmode = _passthroughExeNet->GetConfig( - CONFIG_KEY(PERFORMANCE_HINT)).as(); - } catch (const IE::Exception&) { - LOG_INFO("query perf hint from passthrough network failed"); - } - if (_multiSContext->_batchingDisabled || perfmode != CONFIG_VALUE(THROUGHPUT)) { - syncRequestImpl->setPointerToSo(_passthroughExeNet._so); - } else { - auto so = _passthroughExeNet._ptr->GetPointerToSo(); - // Get the _so from passthrough executable network when batch plugin is disable. - if (!so) - so = _passthroughExeNet._so; - syncRequestImpl->setPointerToSo(so); - } + auto so = _passthroughExeNet._ptr->GetPointerToSo(); + // Get the _so from passthrough executable network when batch plugin is disable. + if (!so) + so = _passthroughExeNet._so; + syncRequestImpl->setPointerToSo(so); } else if (_multiSContext->_bindBuffer) { auto sharedRequest = std::static_pointer_cast(syncRequestImpl)->GetSharedRequest(); if (sharedRequest._ptr->getPointerToSo()) - syncRequestImpl->setPointerToSo(sharedRequest._ptr->getPointerToSo()); + syncRequestImpl->setPointerToSo(sharedRequest._ptr->getPointerToSo()); else syncRequestImpl->setPointerToSo(sharedRequest._so); } diff --git a/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp index 89b86ae7fe3..975b253178a 100644 --- a/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp @@ -6,7 +6,7 @@ #include #include - +#include #include "shared_test_classes/base/layer_test_utils.hpp" #include "ngraph/function.hpp" #include "ngraph_functions/subgraph_builders.hpp" @@ -90,9 +90,11 @@ protected: } else { m_extList.push_back(ext); } - std::replace(test_name.begin(), test_name.end(), '/', '_'); - std::replace(test_name.begin(), test_name.end(), '\\', '_'); - cache_path = "LoadNetwork" + test_name + "_cache"; + auto hash = std::hash()(test_name); + std::stringstream ss; + ss << std::this_thread::get_id(); + cache_path = "LoadNetwork" + std::to_string(hash) + "_" + + ss.str() + "_" + GetTimestamp() + "_cache"; } void TearDown() override { APIBaseTest::TearDown(); diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp index f6c7912a0ff..97ade0c6998 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp @@ -510,14 +510,16 @@ void CompiledKernelsCacheTest::SetUp() { std::string ext = userConfig.second; std::string::size_type pos = 0; if ((pos = ext.find(",", pos)) != std::string::npos) { - m_extList.push_back(ext.substr(0, pos)); - m_extList.push_back(ext.substr(pos + 1)); -} else { - m_extList.push_back(ext); -} - std::replace(test_name.begin(), test_name.end(), '/', '_'); - std::replace(test_name.begin(), test_name.end(), '\\', '_'); - cache_path = "compiledModel" + test_name + "_cache"; + m_extList.push_back(ext.substr(0, pos)); + m_extList.push_back(ext.substr(pos + 1)); + } else { + m_extList.push_back(ext); + } + auto hash = std::hash()(test_name); + std::stringstream ss; + ss << std::this_thread::get_id(); + cache_path = "compiledModel" + std::to_string(hash) + "_" + + ss.str() + "_" + GetTimestamp() + "_cache"; } void CompiledKernelsCacheTest::TearDown() { diff --git a/src/tests/unit/auto/auto_ctput_test.cpp b/src/tests/unit/auto/auto_ctput_test.cpp index 7585fb3b25b..28c990ffdc3 100644 --- a/src/tests/unit/auto/auto_ctput_test.cpp +++ b/src/tests/unit/auto/auto_ctput_test.cpp @@ -200,12 +200,6 @@ TEST_P(LoadNetworkWithCTPUTMockTest, CTPUTSingleDevLogicTest) { ::testing::Matcher&>( ComparePerfHint(InferenceEngine::PluginConfigParams::THROUGHPUT)))) .Times(1); - // no MULTI logic to be called - EXPECT_CALL(*core, - LoadNetwork(::testing::Matcher(_), - ::testing::Matcher("MULTI:" + targetDevice), - ::testing::Matcher&>(_))) - .Times(0); // if target device only has GPU, no CPU helper to be called if (targetDevice.find("GPU") != std::string::npos) { EXPECT_CALL(*core, @@ -220,14 +214,14 @@ TEST_P(LoadNetworkWithCTPUTMockTest, CTPUTSingleDevLogicTest) { for (auto& deviceName : targetDevices) { targetDev += deviceName; targetDev += ((deviceName == targetDevices.back()) ? "" : ","); + EXPECT_CALL(*core, + LoadNetwork(::testing::Matcher(_), + ::testing::Matcher(deviceName), + ::testing::Matcher&>( + ComparePerfHint(InferenceEngine::PluginConfigParams::THROUGHPUT)))) + .Times(1); } config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev}); - // Call MULTI logic - EXPECT_CALL(*core, - LoadNetwork(::testing::Matcher(_), - ::testing::Matcher("MULTI:" + targetDev), - ::testing::Matcher&>(_))) - .Times(1); // no CPU helper to be called EXPECT_CALL(*core, LoadNetwork(::testing::Matcher(_), diff --git a/src/tests/unit/auto/auto_runtime_fallback_test.cpp b/src/tests/unit/auto/auto_runtime_fallback_test.cpp index d177ea48c2d..9b4e76a3b61 100644 --- a/src/tests/unit/auto/auto_runtime_fallback_test.cpp +++ b/src/tests/unit/auto/auto_runtime_fallback_test.cpp @@ -230,6 +230,8 @@ public: } }; +using AutoCTPUTRuntimeFallback = AutoRuntimeFallback; + TEST_P(AutoRuntimeFallback, releaseResource) { std::string targetDev; std::vector> targetDevices; @@ -362,3 +364,93 @@ const std::vector testConfigs = { INSTANTIATE_TEST_SUITE_P(smoke_AutoRuntimeFallback, AutoRuntimeFallback, ::testing::ValuesIn(testConfigs), AutoRuntimeFallback::getTestCaseName); + +TEST_P(AutoCTPUTRuntimeFallback, ctputDeviceInferFailTest) { + std::string targetDev; + std::vector> targetDevices; //std::tuple + int loadNetworkNum; + bool enableRumtimeFallback; + bool expectThrow; + bool loadNetworkFail; + bool generateWorkersFail; + std::tie(targetDevices, loadNetworkNum, enableRumtimeFallback, expectThrow, loadNetworkFail, generateWorkersFail) = this->GetParam(); + if (loadNetworkFail) { + ON_CALL(*core, LoadNetwork(::testing::Matcher(_), + ::testing::Matcher(StrEq("GPU.1")), + ::testing::Matcher(_))).WillByDefault(Throw(InferenceEngine::GeneralError{""})); + } + for (auto& deviceInfo : targetDevices) { + std::string deviceName; + bool ifThrow; + std::tie(deviceName, ifThrow) = deviceInfo; + targetDev += deviceName; + targetDev += ((deviceInfo == targetDevices.back()) ? "" : ","); + if (deviceName == "CPU") { + mockInferrequest = std::make_shared( + inferReqInternal, mockExecutor, nullptr, ifThrow); + ON_CALL(*mockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(mockInferrequest)); + } else if (deviceName == "GPU.0") { + mockInferrequestGPU_0 = std::make_shared( + inferReqInternalGPU_0, mockExecutorGPU_0, nullptr, ifThrow); + ON_CALL(*mockIExeNetGPU_0.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + return mockInferrequestGPU_0; })); + } else if (deviceName == "GPU.1") { + if (generateWorkersFail) { + mockInferrequestGPU_1 = + std::make_shared(inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow); + ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest()) + .WillByDefault(Throw(InferenceEngine::GeneralError{""})); + } else { + mockInferrequestGPU_1 = + std::make_shared(inferReqInternalGPU_1, mockExecutorGPU_1, nullptr, ifThrow); + ON_CALL(*mockIExeNetGPU_1.get(), CreateInferRequest()).WillByDefault(InvokeWithoutArgs([this]() { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + return mockInferrequestGPU_1; + })); + } + } else { + return; + } + } + plugin->SetName("AUTO"); + config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, targetDev}); + config.insert({InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, + InferenceEngine::PluginConfigParams::CUMULATIVE_THROUGHPUT}); + if (!enableRumtimeFallback) { + config.insert({{"ENABLE_RUNTIME_FALLBACK", "NO"}}); + } + + EXPECT_CALL(*core, + LoadNetwork(::testing::Matcher(_), + ::testing::Matcher(_), + ::testing::Matcher&>(_))) + .Times(loadNetworkNum); + + std::shared_ptr exeNetwork; + std::shared_ptr infer_request; + + ASSERT_NO_THROW(exeNetwork = plugin->LoadExeNetworkImpl(cnnNet, config)); + ASSERT_NO_THROW(infer_request = exeNetwork->CreateInferRequest()); + if (expectThrow) { + EXPECT_THROW(infer_request->Infer(), IE::Exception); + } else { + ASSERT_NO_THROW(infer_request->Infer()); + } +} + +// ConfigParams: targetDevices(deviceName, will infer throw exception), loadNetworkNum, enableRumtimeFallback, +// expectThrow, loadNetworkFail, generateWorkersFail +const std::vector testCtputConfigs = { + ConfigParams{{{"CPU", false}, {"GPU.0", true}, {"GPU.1", true}}, 3, true, false, false, false}, + ConfigParams{{{"CPU", true}, {"GPU.0", false}, {"GPU.1", true}}, 3, true, false, false, false}, + ConfigParams{{{"CPU", true}, {"GPU.0", true}, {"GPU.1", true}}, 3, true, true, false, false}, + // disable RumtimeFallback + ConfigParams{{{"CPU", false}, {"GPU.0", false}, {"GPU.1", false}}, 3, false, false, false, false}, + ConfigParams{{{"CPU", true}, {"GPU.0", false}, {"GPU.1", false}}, 3, false, true, false, false}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_AutoCTPUTRuntimeFallback, + AutoCTPUTRuntimeFallback, + ::testing::ValuesIn(testCtputConfigs), + AutoCTPUTRuntimeFallback::getTestCaseName);