continue to load candidate device when load device failed (#8006)

* fix potential risk Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * try catch exception when load nextwork Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * wait for the first ready device instead of cpu Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * continue to select next device if load failed Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * Refactoring code to make code easy and readable Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * debug core dump Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * improve the code Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * add test code Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * test draft Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * improve test code Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * remove debug print Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * fix some reviews add comments on test init flags in hpp remove draft code Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * fix code format Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * try to fix compile static library issue in ci Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * fix compile issue in centos76 Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * fix build issue on windows Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * fix a review merge loop Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * rebase master fix conflict with PR 8389 Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * fix some review issue lock issue change NULL to nullptr misprint issue add EXPECT_CALL for ON_CALL Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * run test in ci Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * add more EXPECT_CALL test Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * fix the comment of test config Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * Fix merge from master (was required to resolve conflicts) Co-authored-by: Maxim Shevtsov <maxim.y.shevtsov@intel.com>
2021-11-20 22:49:19 +08:00 · 2021-11-20 22:49:19 +08:00 · 063df47ec9
commit 063df47ec9
parent 543ca4f318
12 changed files with 575 additions and 112 deletions
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@ -276,6 +276,10 @@ jobs:
    displayName: 'VPU UT'
    continueOnError: false

+  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:TEST-ieMultiPluginUnitTests.xml
+    displayName: 'MULTI UT'
+    continueOnError: false
+
  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/onnxImporterUnitTests --gtest_output=xml:TEST-onnxImporterUnitTests.xml
    displayName: 'ONNX Importer UT'
    continueOnError: false
--- a/.ci/azure/mac.yml
+++ b/.ci/azure/mac.yml
@ -153,6 +153,10 @@ jobs:
    displayName: 'ONNX Importer UT'
    continueOnError: false

+  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieMultiPluginUnitTests --gtest_output=xml:TEST-ieMultiPluginUnitTests.xml
+    displayName: 'MULTI UT'
+    continueOnError: false
+
  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ieFuncTests --gtest_output=xml:TEST-ieFuncTests.xml
    displayName: 'IE FuncTests'
    continueOnError: false
--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@ -207,6 +207,10 @@ jobs:
    displayName: 'ONNX Importer UT'
    continueOnError: false

+  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieMultiPluginUnitTests --gtest_output=xml:TEST-ieMultiPluginUnitTests.xml
+    displayName: 'MULTI UT'
+    continueOnError: false
+
  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieFuncTests --gtest_output=xml:TEST-ieFuncTests.xml
    displayName: 'IE FuncTests'
    continueOnError: false
--- a/inference-engine/src/multi_device/multi_device_async_infer_request.hpp
+++ b/inference-engine/src/multi_device/multi_device_async_infer_request.hpp
@ -15,6 +15,13 @@
 #include "multi_device_infer_request.hpp"
 #include "multi_device_exec_network.hpp"

+#ifdef  MULTIUNITTEST
+#define MOCKTESTMACRO virtual
+#define MultiDevicePlugin MockMultiDevicePlugin
+#else
+#define MOCKTESTMACRO
+#endif
+
 namespace MultiDevicePlugin {

 class MultiDeviceAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
--- a/inference-engine/src/multi_device/multi_device_exec_network.cpp
+++ b/inference-engine/src/multi_device/multi_device_exec_network.cpp
@ -159,109 +159,178 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
    _core = _multiPlugin->GetCore(); // shared_ptr that holds the Core
    _config[MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = strDevices;

-    std::vector<DeviceInformation> needLoadDevices;
    std::string profilingTask = "MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork:AutoMode";
-    // check if have cpu device
-    const auto CPUIter = std::find_if(metaDevices.begin(), metaDevices.end(),
-                                      [=](const DeviceInformation& d)->bool{return d.deviceName.find("CPU") != std::string::npos;});
-    if (CPUIter != metaDevices.end()) {
-        _cpuDevice = *CPUIter;
-        _config.insert(_cpuDevice.config.begin(), _cpuDevice.config.end());
-        needLoadDevices.push_back(_cpuDevice);
-        _cpuFuture = _cpuPromise.get_future();
-        profilingTask += _cpuDevice.deviceName;
+
+    // loadContext[ACTUALDEVICE] is always enabled,
+    // when there is CPU and there are more than two devices, loadContext[CPU] is enabled
+    _loadContext[ACTUALDEVICE].isEnabled = true;
+    _loadContext[ACTUALDEVICE].networkPrecision = GetNetworkPrecision(network);
+    _loadContext[ACTUALDEVICE].metaDevices = metaDevices;
+    _loadContext[ACTUALDEVICE].deviceInfo = _multiPlugin->SelectDevice(metaDevices, _loadContext[ACTUALDEVICE].networkPrecision);
+    bool isActualDevCPU =
+        _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") != std::string::npos;
+    // if Actual device is CPU, disabled _loadContext[CPU], only use _loadContext[ACTUALDEVICE]
+    if (isActualDevCPU) {
+        _loadContext[CPU].isEnabled = false;
+    } else {
+        const auto CPUIter = std::find_if(metaDevices.begin(), metaDevices.end(),
+                [=](const DeviceInformation& d)->bool{return d.deviceName.find("CPU") != std::string::npos;});
+        // if have CPU Device,  enable _loadContext[CPU]
+        if (CPUIter != metaDevices.end()) {
+            _loadContext[CPU].isEnabled = true;
+            _loadContext[CPU].deviceInfo = *CPUIter;
+        } else {
+            _loadContext[CPU].isEnabled = false;
+        }
    }

-    // get accelerator device, like GPU
-    auto networkPrecision = GetNetworkPrecision(network);
-    _acceleratorDevice = _multiPlugin->SelectDevice(metaDevices, networkPrecision);
-    bool isAccelerator =
-        _acceleratorDevice.deviceName.find("CPU") == std::string::npos;
-    if (isAccelerator) {
-        _config.insert(_acceleratorDevice.config.begin(), _acceleratorDevice.config.end());
-        needLoadDevices.push_back(_acceleratorDevice);
-        _acceleratorFuture = _acceleratorPromise.get_future();
-        profilingTask += _acceleratorDevice.deviceName;
+
+    // initialize the rest members of load context
+    for (int i = 0; i < CONTEXTNUM; i++) {
+         if (_loadContext[i].isEnabled) {
+             _loadContext[i].future =  _loadContext[i].promise.get_future();
+              auto* contextPtr = &_loadContext[i];
+             _loadContext[i].task = [this, contextPtr, modelPath, network]() mutable {
+                      TryToLoadNetWork(*contextPtr, modelPath, network);
+                      if (contextPtr->isLoadSuccess) {
+                          GenerateWorkers(contextPtr->deviceInfo.deviceName, contextPtr->executableNetwork);
+                          //need lock
+                          {
+                             std::lock_guard<std::mutex> lock(_confMutex);
+                             _config.insert(contextPtr->deviceInfo.config.begin(),
+                                            contextPtr->deviceInfo.config.end());
+                          }
+                          contextPtr->isAlready = true;
+                      }
+                      contextPtr->promise.set_value();
+                      // the first load network process finished
+                      std::call_once(_firstLoadOC, [this] () {
+                              _firstLoadPromise.set_value();
+                              });
+             };
+         }
    }
+
    OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, openvino::itt::handle(profilingTask));
-    if (needLoadDevices.size() == 0) {
-        IE_THROW() << "No device set";
-    }
-
-    // will not wait for loading accelerator network,
-    // so the executor can't be destroyed before finished the task,
-    // so use executor as a member of MultiDeviceExecutableNetwork.
-    _executor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(
-            IStreamsExecutor::Config{"AutoDeviceAsyncLoad",
-            static_cast<int>(std::thread::hardware_concurrency()) /* max possible #streams*/,
-            0 /*default threads per stream, workaround for ticket 62376*/,
-            IStreamsExecutor::ThreadBindingType::NONE});
-
-    for (auto& p : needLoadDevices) {
-        // initialize these containers firstly to avoid insert operation in threads
-        _idleWorkerRequests[p.deviceName];
-        _workerRequests[p.deviceName];
-        _inferPipelineTasksDeviceSpecific[p.deviceName] = NULL;
-        const auto device = p.deviceName;
-        const auto deviceConfig = p.config;
+    if (_loadContext[CPU].isEnabled) {
+        _firstLoadFuture = _firstLoadPromise.get_future();
        // will not wait for loading accelerator network,
-        // so some parameters need to be transferred by value.
-       _executor->run([&, modelPath, network, device, deviceConfig]() {
-            SoExecutableNetworkInternal executableNetwork;
-            if (!modelPath.empty()) {
-                executableNetwork = _core->LoadNetwork(modelPath, device, deviceConfig);
-            } else {
-                executableNetwork = _core->LoadNetwork(network, device, deviceConfig);
-            }
-
-            GenerateWorkers(device, executableNetwork);
-
-            if (device.find("CPU") == std::string::npos) {
-                _alreadyActualNetwork = true;
-                _acceleratorPromise.set_value(executableNetwork);
-            } else {
-                _cpuPromise.set_value(executableNetwork);
-            }
-        });
+        // so the executor can't be destroyed before finished the task,
+        // so use executor as a member of MultiDeviceExecutableNetwork.
+        _executor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(
+                IStreamsExecutor::Config{"AutoDeviceAsyncLoad",
+                static_cast<int>(std::thread::hardware_concurrency()) /* max possible #streams*/,
+                0 /*default threads per stream, workaround for ticket 62376*/,
+                IStreamsExecutor::ThreadBindingType::NONE});
+        for (auto&& device : metaDevices) {
+            // initialize containers before run async task
+            _idleWorkerRequests[device.deviceName];
+            _workerRequests[device.deviceName];
+            _inferPipelineTasksDeviceSpecific[device.deviceName] = nullptr;
+        }
+        _executor->run(_loadContext[CPU].task);
+        _executor->run(_loadContext[ACTUALDEVICE].task);
+    } else {
+        // only one device need to load network, do not need to load it async
+        _loadContext[ACTUALDEVICE].task();
    }

    WaitFirstNetworkReady();
 }
+void MultiDeviceExecutableNetwork::TryToLoadNetWork(AutoLoadContext& context,
+                                                    const std::string& modelPath,
+                                                    const InferenceEngine::CNNNetwork& network) {
+    auto& device = context.deviceInfo.deviceName;
+    auto& deviceConfig = context.deviceInfo.config;
+    auto& deviceList = context.metaDevices;
+    bool curDevIsCPU = (device.find("CPU") != std::string::npos);
+    try {
+        if (!modelPath.empty()) {
+            context.executableNetwork = _core->LoadNetwork(modelPath, device, deviceConfig);
+        } else {
+            context.executableNetwork = _core->LoadNetwork(network, device, deviceConfig);
+        }
+        context.isLoadSuccess = true;
+    } catch (const std::exception& e) {
+        context.errMessage += device + ":" + e.what();
+        context.isLoadSuccess = false;
+    }

-void MultiDeviceExecutableNetwork::WaitFirstNetworkReady() {
-    if (_alreadyActualNetwork) {
+    if (context.isLoadSuccess || curDevIsCPU) {
        return;
    }
-    if (_cpuFuture.valid() && _acceleratorFuture.valid()) {
-        try {
-            _networkFirstReady = _cpuFuture.get();
-        } catch (const std::exception& e) {
-            printf("Warning: load network to CPU failed: %s\n", e.what());
-            _networkActualNeeded = _acceleratorFuture.get();
-        }
-    } else if (_acceleratorFuture.valid()) {  // only accelerator is valid, like AUTO:GPU
-        _networkActualNeeded = _acceleratorFuture.get();
-    } else if (_cpuFuture.valid()) {  // only CPU is valid, like AUTO:CPU
-        _networkActualNeeded = _cpuFuture.get();
-    } else {
-        IE_THROW() << "No device task available";
+
+    // remove the current device from deviceList
+    auto eraseDevice = std::find_if(deviceList.begin(), deviceList.end(),
+            [device](DeviceInformation& d){
+            return d.deviceName == device;
+            });
+    deviceList.erase(eraseDevice);
+
+    if (deviceList.empty()) {
+        return;
    }

-    // if there is only one device or loading CPU device is failed,
-    // the ActualNetwork is already ok now.
-    if (!_acceleratorFuture.valid()) {
-        _alreadyActualNetwork = true;
+    // select next candidate device
+    try {
+        context.deviceInfo = _multiPlugin->SelectDevice(deviceList, context.networkPrecision);
    }
+    catch (const std::exception& e) {
+        return;
+    }
+
+    // if selec device is CPU, do not need to load CPU again, context[CPU] must have loaded CPU
+    curDevIsCPU = (context.deviceInfo.deviceName.find("CPU") != std::string::npos);
+    if (curDevIsCPU) {
+        return;
+    }
+
+    // try to load this candidate device
+    TryToLoadNetWork(context, modelPath, network);
+}
+
+void MultiDeviceExecutableNetwork::WaitFirstNetworkReady() {
+    if (_firstLoadFuture.valid()) {
+        // wait for the first loading finished
+        _firstLoadFuture.wait();
+    }
+
+    // check if there is any device that have loaded network successfully
+    for (int i = CONTEXTNUM - 1; i >= 0; i--) {
+        if (_loadContext[i].isEnabled && _loadContext[i].isAlready) {
+            return;
+        }
+    }
+
+    // the first loading is failed, wait for another loading
+    for (int i = CONTEXTNUM - 1; i >= 0; i--) {
+        if (_loadContext[i].isEnabled) {
+            _loadContext[i].future.wait();
+            // check if loading is successful
+            if (_loadContext[i].isAlready) {
+                return;
+            }
+        }
+    }
+
+    // ToDo need to print failed error mesage
+    IE_THROW() << "[AUTO] load all devices failed";
 }

 void MultiDeviceExecutableNetwork::WaitActualNetworkReady() const {
+    OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceExecutableNetwork::WaitActualNetworkReady");
    // Maybe different API will call this function, so add call once here
    // for every MultiDeviceExecutableNetwork instance
-    OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceExecutableNetwork::WaitActualNetworkReady");
-    std::call_once(_oc, [&] () {
-            if (_acceleratorFuture.valid()) {
-                _networkActualNeeded = _acceleratorFuture.get();
-            }
+    std::call_once(_oc, [this] () {
+               if (_loadContext[ACTUALDEVICE].future.valid()) {
+                   _loadContext[ACTUALDEVICE].future.get();
+               }
+               // if _loadContext[ACTUALDEVICE] load failed,  fall back to _loadContext[CPU]
+               if (!_loadContext[ACTUALDEVICE].isAlready) {
+                   _loadContext[ACTUALDEVICE].executableNetwork = _loadContext[CPU].executableNetwork;
+                   _loadContext[ACTUALDEVICE].deviceInfo = _loadContext[CPU].deviceInfo;
+                   _loadContext[ACTUALDEVICE].isAlready = true;
+               }
            });
 }

@ -270,19 +339,18 @@ void MultiDeviceExecutableNetwork::ScheduleToWorkerInferRequest(Task inferPipeli
    // AUTO work mode
    if (_workModeIsAUTO) {
        if (!preferred_device.empty()) {
-            // the preferred_device should be the selected device in AUTO work mode
-            if (preferred_device != _acceleratorDevice.deviceName) {
-                IE_THROW(NotFound) << "The preferred_device should be the selected device";
-            }
            // if the device needed by customer is not ready, need to wait for it
            WaitActualNetworkReady();
-            devices.push_back(_acceleratorDevice);
+            // the preferred_device should be the selected device in AUTO work mode
+            if (preferred_device != _loadContext[ACTUALDEVICE].deviceInfo.deviceName) {
+                IE_THROW(NotFound) << "The preferred_device should be the selected device";
+            }
+            devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
        } else {
-            // _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU
-            if (_alreadyActualNetwork) {
-                devices.push_back(_acceleratorDevice);
+            if (_loadContext[ACTUALDEVICE].isAlready) {
+                devices.push_back(_loadContext[ACTUALDEVICE].deviceInfo);
            } else {
-                devices.push_back(_cpuDevice);
+                devices.push_back(_loadContext[CPU].deviceInfo);
            }
        }
    } else {
@ -329,7 +397,8 @@ void MultiDeviceExecutableNetwork::run(Task inferPipelineTask) {

 MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {
    // this is necessary to guarantee member destroyed after getting future
-    if (_workModeIsAUTO) {
+    if (_workModeIsAUTO && _loadContext[CPU].isEnabled) {
+        _loadContext[CPU].future.get();
        WaitActualNetworkReady();
        // it's necessary to wait the loading network threads to stop here.
        InferenceEngine::ExecutorManager::getInstance()->clear("AutoDeviceAsyncLoad");
@ -352,7 +421,7 @@ MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {
 std::shared_ptr<InferenceEngine::RemoteContext> MultiDeviceExecutableNetwork::GetContext() const {
    if (_workModeIsAUTO) {
        WaitActualNetworkReady();
-        return _networkActualNeeded->GetContext();
+        return _loadContext[ACTUALDEVICE].executableNetwork->GetContext();
    }
    auto devices = [&] {
        std::lock_guard<std::mutex> lock(_mutex);
@ -383,8 +452,8 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create
    InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with;

    if (_workModeIsAUTO) {
-        if (!_networkFirstReady && _networkActualNeeded) {
-            auto& dev_requests = _workerRequests[_acceleratorDevice.deviceName];
+        if (!_loadContext[CPU].isEnabled && _loadContext[ACTUALDEVICE].isAlready) {
+            auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
            if (num < dev_requests.size()) {
                request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
            }
@ -413,8 +482,8 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create
    InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with;

    if (_workModeIsAUTO) {
-        if (!_networkFirstReady && _networkActualNeeded) {
-            auto& dev_requests = _workerRequests[_acceleratorDevice.deviceName];
+        if (!_loadContext[CPU].isEnabled && _loadContext[ACTUALDEVICE].isAlready) {
+            auto& dev_requests = _workerRequests[_loadContext[ACTUALDEVICE].deviceInfo.deviceName];
            if (num < dev_requests.size()) {
                request_to_share_blobs_with = dev_requests.at(num)._inferRequest;
            }
@ -482,16 +551,21 @@ void MultiDeviceExecutableNetwork::SetConfig(const std::map<std::string, Inferen
            _devicePriorities = metaDevices;

            // update value in config
+            _confMutex.lock();
            _config[MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = priorities->second;
+            _confMutex.unlock();
        }
    }
 }

 InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetConfig(const std::string &name) const {
+    _confMutex.lock();
    auto it = _config.find(name);
    if (it != _config.end()) {
+        _confMutex.unlock();
        return it->second;
    } else {
+        _confMutex.unlock();
        // find config key among networks config keys
        for (const auto& desc : _networksPerDevice) {
            const auto& execNetwork = desc.second;
@ -509,11 +583,10 @@ InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetConfig(const std::st
 InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetMetric(const std::string &name) const {
    if (_workModeIsAUTO) {
        // fixme: should we wait actual device? meanwhile it will block inference, how to fix?
-        if (_alreadyActualNetwork) {
-            WaitActualNetworkReady();
-            return _networkActualNeeded->GetMetric(name);
+        if (_loadContext[ACTUALDEVICE].isAlready) {
+            return _loadContext[ACTUALDEVICE].executableNetwork->GetMetric(name);
        }
-        return _networkFirstReady->GetMetric(name);
+        return _loadContext[CPU].executableNetwork->GetMetric(name);
    }

    if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
--- a/inference-engine/src/multi_device/multi_device_exec_network.hpp
+++ b/inference-engine/src/multi_device/multi_device_exec_network.hpp
@ -23,6 +23,12 @@
 # include <tbb/concurrent_queue.h>
 #endif

+#ifdef  MULTIUNITTEST
+#define MOCKTESTMACRO virtual
+#define MultiDevicePlugin MockMultiDevicePlugin
+#else
+#define MOCKTESTMACRO
+#endif

 namespace MultiDevicePlugin {

@ -39,6 +45,26 @@ struct DeviceInformation {
    std::string defaultDeviceID;
 };

+struct AutoLoadContext {
+    std::atomic<bool> isEnabled = {false};
+    std::atomic<bool> isAlready = {false};
+    std::atomic<bool> isLoadSuccess = {false};
+    std::future<void> future;
+    std::promise<void> promise;
+    InferenceEngine::SoExecutableNetworkInternal executableNetwork;
+    DeviceInformation  deviceInfo;
+    std::vector<DeviceInformation> metaDevices;
+    std::string networkPrecision;
+    std::string errMessage;
+    InferenceEngine::Task task;
+};
+
+enum AutoLoadContextIndex {
+     CPU = 0,
+     ACTUALDEVICE = 1,
+     CONTEXTNUM = 2
+};
+
 template<typename T>
 using DeviceMap = std::unordered_map<DeviceName, T>;

@ -163,22 +189,21 @@ private:
    static bool RunPipelineTask(InferenceEngine::Task& inferPipelineTask,
                                NotBusyWorkerRequests& idleWorkerRequests,
                                const DeviceName& preferred_device);
+    void TryToLoadNetWork(AutoLoadContext& context,
+                          const std::string& modelPath,
+                          const InferenceEngine::CNNNetwork& network);

 private:
    std::shared_ptr<InferenceEngine::ICore>                             _core;
    InferenceEngine::IStreamsExecutor::Ptr                              _executor;
    MultiDeviceInferencePlugin*                                         _multiPlugin;
-    InferenceEngine::SoExecutableNetworkInternal                        _networkFirstReady;
-    mutable InferenceEngine::SoExecutableNetworkInternal                _networkActualNeeded;
-    NetworkFuture                                                       _cpuFuture;
-    NetworkPromise                                                      _cpuPromise;
-    mutable NetworkFuture                                               _acceleratorFuture;
-    mutable NetworkPromise                                              _acceleratorPromise;
-    mutable std::atomic<bool>                                           _alreadyActualNetwork = {false};
    bool                                                                _workModeIsAUTO = {false};
-    DeviceInformation                                                   _cpuDevice;
-    DeviceInformation                                                   _acceleratorDevice;
    mutable std::once_flag                                              _oc;
+    std::once_flag                                                      _firstLoadOC;
+    std::future<void>                                                   _firstLoadFuture;
+    std::promise<void>                                                  _firstLoadPromise;
+    mutable AutoLoadContext                                             _loadContext[CONTEXTNUM];
+    mutable std::mutex                                                  _confMutex;
 };

 }  // namespace MultiDevicePlugin
--- a/inference-engine/src/multi_device/multi_device_infer_request.hpp
+++ b/inference-engine/src/multi_device/multi_device_infer_request.hpp
@ -16,6 +16,13 @@
 #include <string>
 #include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>

+#ifdef  MULTIUNITTEST
+#define MOCKTESTMACRO virtual
+#define MultiDevicePlugin MockMultiDevicePlugin
+#else
+#define MOCKTESTMACRO
+#endif
+
 namespace MultiDevicePlugin {

 class MultiDeviceInferRequest : public InferenceEngine::IInferRequestInternal {
--- a/inference-engine/src/multi_device/multi_device_plugin.hpp
+++ b/inference-engine/src/multi_device/multi_device_plugin.hpp
@ -13,6 +13,13 @@
 #include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 #include "multi_device_exec_network.hpp"

+#ifdef  MULTIUNITTEST
+#define MOCKTESTMACRO virtual
+#define MultiDevicePlugin MockMultiDevicePlugin
+#else
+#define MOCKTESTMACRO
+#endif
+
 namespace MultiDevicePlugin {

 class MultiDeviceInferencePlugin : public InferenceEngine::IInferencePlugin {
@ -33,11 +40,11 @@ public:
    InferenceEngine::Parameter GetMetric(const std::string& name,
                                         const std::map<std::string, InferenceEngine::Parameter>& options) const override;

-    std::vector<MultiDevicePlugin::DeviceInformation> ParseMetaDevices(const std::string & devicesRequestsCfg,
+    MOCKTESTMACRO std::vector<MultiDevicePlugin::DeviceInformation> ParseMetaDevices(const std::string & devicesRequestsCfg,
                                                                       const std::map<std::string, std::string> & config) const;

    std::string GetDeviceList(const std::map<std::string, std::string>& config) const;
-    DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
+    MOCKTESTMACRO DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));

 protected:
    std::map<std::string, std::string> GetSupportedConfig(const std::map<std::string, std::string>& config,
--- a/inference-engine/tests/unit/CMakeLists.txt
+++ b/inference-engine/tests/unit/CMakeLists.txt
@ -27,3 +27,5 @@ endif ()
 if(NGRAPH_ONNX_FRONTEND_ENABLE)
    add_subdirectory(frontends/onnx_import)
 endif()
+
+add_subdirectory(multi)
--- a/inference-engine/tests/unit/multi/CMakeLists.txt
+++ b/inference-engine/tests/unit/multi/CMakeLists.txt
@ -0,0 +1,36 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(TARGET_NAME ieMultiPluginUnitTests)
+
+set(CI_BUILD_NUMBER "unittest")
+addVersionDefines(${IE_MAIN_SOURCE_DIR}/src/multi_device/multi_device_plugin.cpp CI_BUILD_NUMBER)
+add_definitions(-DMULTIUNITTEST)
+
+addIeTargetTest(
+        NAME ${TARGET_NAME}
+        ROOT ${CMAKE_CURRENT_SOURCE_DIR}
+        ADDITIONAL_SOURCE_DIRS ${IE_MAIN_SOURCE_DIR}/src/multi_device
+        INCLUDES
+            ${IE_MAIN_SOURCE_DIR}/src/multi_device
+        LINK_LIBRARIES
+            inference_engine_lp_transformations
+            ngraphFunctions
+            inference_engine
+            ngraph
+            inference_engine_transformations
+            unitTestUtils
+        ADD_CPPLINT
+        DEPENDENCIES
+            template_extension
+            mock_engine
+            ngraphFunctions
+        LABELS
+            Multi
+            Auto
+)
+
+set_ie_threading_interface_for(${TARGET_NAME})
+
+set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
--- a/inference-engine/tests/unit/multi/auto_select_device_failed_test.cpp
+++ b/inference-engine/tests/unit/multi/auto_select_device_failed_test.cpp
@ -0,0 +1,273 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_metric_helpers.hpp>
+#include <common_test_utils/test_constants.hpp>
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
+#include "unit_test_utils/mocks/mock_iinfer_request.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_ivariable_state_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
+#include <ie_core.hpp>
+#include <multi-device/multi_device_config.hpp>
+#include <ngraph_functions/subgraph_builders.hpp>
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include "plugin/mock_multi_device_plugin.hpp"
+#include "cpp/ie_plugin.hpp"
+
+using ::testing::MatcherCast;
+using ::testing::AllOf;
+using ::testing::Throw;
+using ::testing::Matches;
+using ::testing::_;
+using ::testing::StrEq;
+using ::testing::Return;
+using ::testing::Property;
+using ::testing::Eq;
+using ::testing::ReturnRef;
+using ::testing::AtLeast;
+using ::testing::AnyNumber;
+using Config = std::map<std::string, std::string>;
+using namespace MockMultiDevice;
+
+#define IE_SET_METRIC(key, name,  ...)                                                            \
+    typename ::InferenceEngine::Metrics::MetricType<::InferenceEngine::Metrics::key>::type name = \
+        __VA_ARGS__;
+
+using DeviceParams = std::tuple<std::string, bool>;
+
+using ConfigParams = std::tuple<
+        bool,                        // if can continue to run
+        bool,                        // if select throw exception
+        std::vector<DeviceParams>,   // {device, loadSuccess}
+        unsigned int,                // select count
+        unsigned int,                // load count
+        unsigned int                 // load device success count
+        >;
+class AutoLoadFailedTest : public ::testing::TestWithParam<ConfigParams> {
+public:
+    std::shared_ptr<ngraph::Function>               function;
+    InferenceEngine::CNNNetwork                     cnnNet;
+    std::shared_ptr<MockICore>                      core;
+    std::shared_ptr<MockMultiDeviceInferencePlugin> plugin;
+
+    //mock exeNetwork
+    std::shared_ptr<MockIExecutableNetworkInternal> mockIExeNet;
+    ov::runtime::SoPtr<IExecutableNetworkInternal>  mockExeNetwork;
+    MockIInferencePlugin*                           mockIPlugin;
+    InferenceEngine::InferencePlugin                mockPlugin;
+    // config for Auto device
+    std::map<std::string, std::string>              config;
+    std::vector<DeviceInformation>                  metaDevices;
+    std::shared_ptr<MockIInferRequestInternal>     inferReqInternal;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ConfigParams> obj) {
+        unsigned int selectCount;
+        unsigned int loadCount;
+        unsigned int loadSuccessCount;
+        std::vector<std::tuple<std::string, bool>> deviceConfigs;
+        bool continueRun;
+        bool thrExcWheSelect;
+        std::tie(continueRun, thrExcWheSelect, deviceConfigs,
+                 selectCount, loadCount, loadSuccessCount) = obj.param;
+        std::ostringstream result;
+        for (auto& item : deviceConfigs) {
+            if (std::get<1>(item)) {
+                result << std::get<0>(item) << "_success_";
+            } else {
+                result << std::get<0>(item) << "_failed_";
+            }
+        }
+        if (thrExcWheSelect) {
+            result << "select_failed_";
+        } else {
+            result << "select_success_";
+        }
+        result << "select_" << selectCount << "_loadCount_"
+               << loadCount << "_loadSuccessCount_" << loadSuccessCount;
+        return result.str();
+    }
+
+    void TearDown() override {
+        core.reset();
+        plugin.reset();
+        mockIExeNet.reset();
+        mockExeNetwork = {};
+        mockPlugin = {};
+        config.clear();
+        metaDevices.clear();
+        inferReqInternal.reset();
+    }
+
+    void SetUp() override {
+       // prepare mockExeNetwork
+       mockIExeNet = std::make_shared<MockIExecutableNetworkInternal>();
+       auto mockIPluginPtr = std::make_shared<MockIInferencePlugin>();
+       ON_CALL(*mockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _)).WillByDefault(Return(mockIExeNet));
+       mockPlugin = InferenceEngine::InferencePlugin{{}, mockIPluginPtr};
+       // remove annoying ON CALL message
+       EXPECT_CALL(*mockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _)).Times(1);
+       mockExeNetwork = {{}, mockPlugin.LoadNetwork(CNNNetwork{}, {})};
+
+       // prepare mockicore and cnnNetwork for loading
+       core  = std::shared_ptr<MockICore>(new MockICore());
+       auto* origin_plugin = new MockMultiDeviceInferencePlugin();
+       plugin  = std::shared_ptr<MockMultiDeviceInferencePlugin>(origin_plugin);
+       function = ngraph::builder::subgraph::makeConvPoolRelu();
+       cnnNet = InferenceEngine::CNNNetwork(function);
+       // replace core with mock Icore
+       plugin->SetCore(core);
+       // mock execNetwork can work
+       inferReqInternal = std::make_shared<MockIInferRequestInternal>();
+       ON_CALL(*mockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(inferReqInternal));
+       IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, optimalNum, 2);
+       ON_CALL(*mockIExeNet.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
+           .WillByDefault(Return(optimalNum));
+    }
+};
+
+TEST_P(AutoLoadFailedTest, LoadCNNetWork) {
+    // get Parameter
+    unsigned int selectCount;
+    unsigned int loadCount;
+    unsigned int loadSuccessCount;
+    std::vector<std::tuple<std::string, bool>> deviceConfigs;
+    bool continueRun;
+    bool thrExcWheSelect;
+    std::tie(continueRun, thrExcWheSelect, deviceConfigs, selectCount,
+             loadCount, loadSuccessCount) = this->GetParam();
+
+    // test auto plugin
+    config.insert({CONFIG_KEY_INTERNAL(MULTI_WORK_MODE_AS_AUTO), InferenceEngine::PluginConfigParams::YES});
+    std::string devicesStr = "";
+    int selDevsSize = deviceConfigs.size();
+    for (auto iter = deviceConfigs.begin(); iter != deviceConfigs.end(); selDevsSize--) {
+        std::string deviceName = std::get<0>(*iter);
+        bool loadSuccess = std::get<1>(*iter);
+        // accoding to device loading config, set if the loading will successful or throw exception.
+        if (loadSuccess) {
+            ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                        ::testing::Matcher<const std::string&>(StrEq(deviceName)),
+                        ::testing::Matcher<const Config&>(_))).WillByDefault(Return(mockExeNetwork));
+        } else {
+            ON_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                        ::testing::Matcher<const std::string&>(StrEq(deviceName)),
+                        ::testing::Matcher<const Config&>(_)))
+                .WillByDefault(Throw(InferenceEngine::GeneralError{""}));
+        }
+        DeviceInformation devInfo = {deviceName, {}, 2, ""};
+        metaDevices.push_back(std::move(devInfo));
+        // set the return value of SelectDevice
+        // for example if there are three device, if will return GPU on the first call, and then MYRIAD
+        // at last CPU
+        ON_CALL(*plugin, SelectDevice(Property(&std::vector<DeviceInformation>::size, Eq(selDevsSize)), _))
+            .WillByDefault(Return(metaDevices[deviceConfigs.size() - selDevsSize]));
+        devicesStr += deviceName;
+        devicesStr += ((++iter) == deviceConfigs.end()) ? "" : ",";
+    }
+    ON_CALL(*plugin, ParseMetaDevices(_, _)).WillByDefault(Return(metaDevices));
+    config.insert({InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , devicesStr});
+    // if set this parameter true, the second selecting call will thrown exception,
+    // if there is only one device, it will thrown exception at the first call
+    if (thrExcWheSelect) {
+        selDevsSize = deviceConfigs.size();
+        if (selDevsSize > 1) {
+            ON_CALL(*plugin, SelectDevice(Property(&std::vector<DeviceInformation>::size, Eq(selDevsSize - 1)), _))
+                .WillByDefault(Throw(InferenceEngine::GeneralError{""}));
+        } else {
+            ON_CALL(*plugin, SelectDevice(Property(&std::vector<DeviceInformation>::size, Eq(1)), _))
+                .WillByDefault(Throw(InferenceEngine::GeneralError{""}));
+        }
+    }
+
+    EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(AtLeast(1));
+    EXPECT_CALL(*plugin, SelectDevice(_, _)).Times(selectCount);
+    EXPECT_CALL(*core, LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                ::testing::Matcher<const std::string&>(_),
+                ::testing::Matcher<const Config&>(_))).Times(loadCount);
+
+    // if loadSuccess will get the optimalNum requset of per device, in this test is 2;
+    EXPECT_CALL(*mockIExeNet.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
+        .Times(loadSuccessCount);
+    EXPECT_CALL(*inferReqInternal, SetCallback(_)).Times(loadSuccessCount * 2);
+    EXPECT_CALL(*mockIExeNet.get(), CreateInferRequest()).Times(loadSuccessCount * 2);
+    if (continueRun) {
+        ASSERT_NO_THROW(plugin->LoadExeNetworkImpl(cnnNet, config));
+    } else {
+        ASSERT_THROW(plugin->LoadExeNetworkImpl(cnnNet, config), InferenceEngine::Exception);
+    }
+}
+
+// the test configure, for example
+// ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, false},
+//               DeviceParams {CommonTestUtils::DEVICE_MYRIAD, true},
+//                DeviceParams {CommonTestUtils::DEVICE_CPU, true}}, 2, 3, 2},
+//
+// every element for ConfigParams
+// {continueRun, selectThrowException, deviceLoadsuccessVector, selectCount, loadCount, loadSuccessCount}
+// {       true,                false,                3 device,           2,         3,                2}
+//
+// there are three devices for loading
+// CPU load for accelerator success, but GPU will load faild and then select MYRIAD and load again
+// LoadExeNetworkImpl will not throw exception and can continue to run,
+// it will select twice, first select GPU, second select MYRIAD
+// it will load network three times(CPU, GPU, MYRIAD)
+// the inference request num is loadSuccessCount * optimalNum, in this test case optimalNum is 2
+// so inference request num is 4 (CPU 2, MYRIAD 2)
+//
+const std::vector<ConfigParams> testConfigs = {ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_MYRIAD, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, true}}, 1, 2, 2},
+                                               ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_MYRIAD, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, true}}, 2, 3, 2},
+                                               ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_MYRIAD, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, true}}, 1, 2, 2},
+                                               ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_MYRIAD, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, false}}, 1, 2, 1},
+                                               ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_MYRIAD, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, false}}, 1, 2, 1},
+                                               ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_MYRIAD, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, false}}, 2, 3, 1},
+                                               ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_MYRIAD, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, true}}, 3, 3, 1},
+                                               ConfigParams {false, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_MYRIAD, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, false}}, 3, 3, 0},
+                                               ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, true}}, 1, 2, 2},
+                                               ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, true}}, 2, 2, 1},
+                                               ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, false}}, 1, 2, 1},
+                                               ConfigParams {false, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, false}}, 2, 2, 0},
+                                               ConfigParams {false, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, false}}, 1, 1, 0},
+                                               ConfigParams {false, false, {DeviceParams {CommonTestUtils::DEVICE_CPU, false}}, 1, 1, 0},
+                                               ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_GPU, true}}, 1, 1, 1},
+                                               ConfigParams {true, false, {DeviceParams {CommonTestUtils::DEVICE_CPU, true}}, 1, 1, 1},
+                                               ConfigParams {false, true, {DeviceParams {CommonTestUtils::DEVICE_GPU, true}}, 1, 0, 0},
+                                               ConfigParams {false, true, {DeviceParams {CommonTestUtils::DEVICE_CPU, true}}, 1, 0, 0},
+                                               ConfigParams {true, true, {DeviceParams {CommonTestUtils::DEVICE_GPU, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_MYRIAD, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, true}}, 2, 2, 1},
+                                               ConfigParams {false, true, {DeviceParams {CommonTestUtils::DEVICE_GPU, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_MYRIAD, true},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, false}}, 2, 2, 0},
+                                               ConfigParams {true, true, {DeviceParams {CommonTestUtils::DEVICE_GPU, false},
+                                                        DeviceParams {CommonTestUtils::DEVICE_CPU, true}}, 2, 2, 1}
+                                              };
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, AutoLoadFailedTest,
+                ::testing::ValuesIn(testConfigs),
+            AutoLoadFailedTest::getTestCaseName);
+
--- a/inference-engine/tests/unit/multi/plugin/mock_multi_device_plugin.hpp
+++ b/inference-engine/tests/unit/multi/plugin/mock_multi_device_plugin.hpp
@ -0,0 +1,21 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include <gmock/gmock.h>
+#include "ie_icore.hpp"
+#include "multi_device_plugin.hpp"
+#include <iostream>
+
+using namespace MockMultiDevicePlugin;
+namespace MockMultiDevice {
+
+class MockMultiDeviceInferencePlugin : public MultiDeviceInferencePlugin {
+public:
+    MOCK_METHOD(DeviceInformation, SelectDevice, ((const std::vector<DeviceInformation>&),
+                const std::string&), (override));
+    MOCK_METHOD((std::vector<DeviceInformation>), ParseMetaDevices,
+                (const std::string&, (const std::map<std::string, std::string>&)), (const, override));
+};
+}// namespace MockMultiDevice