[MULTI]Data affinity remote context and blobs (#3342)

* zero-copy (assuming determenistic app-level scheduling) for the multi-device, via "borrowing" the corresponding device-specific blobs and letting the app to implicitly use these * Optimized Infer Request Scheduling * remoteblob checks in the conventional SetBlob * correctly (with status) reporting NOT_IMPLEMENTED * SetBlob to accomodate for the RemoteBobs * Tests for remote blobs support via MULTI: creating the shared_test in case the other (closed source) plugins would want to use that (in the private shared_tests instantiations). Also instantiating the remote blobs tests for the some basic combinations to test the MULTI supports them * macos compilation (and general plugin platform support) fix * shuffled files, so that the MULTI tests are now part of the ieFuncTests (and need no separate target). Also brushed the macro that handales the NOT_IMPLEMENTED as bit * further shuffled files, so that the initial MULTI tests are now part of the IE tests, yet specific instances do need separate targets * Fixed misprint * Brushing the code and comments a bit * further brushing of the ScheduleToWorkerRequest: moving the task execution directly into the loop over devices (avoids pointers and 'else' clause) * 1) zero-copy (assuming determenistic app-level scheduling) for the multi-device, via "borrowing" the corresponding device-specific blobs and letting the app to implicitly use these 2) Initial MULTI section in the opt guide (primarily to document a tip on helping the MULTI to keep the zero-copy path) * [MULTI] remote context support and associated scheduling (respecting the remote data affinity) * fix CentOS (old) gcc issue: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81880 since the intriduced therad_local string is template the bug manifests itself (and the string is not allocated/initialized). the QA is to wrap the std::string into the function * further fix for the old gcc versions issue, now with non-trivial thread_local destruction sefault: switching from the std::string to the plain const char* * additional tests for the MULTI and remote blobs (no remote context and multi GPUs cases) * fix for the tests (that now can check for more specific NotImplemented exeption). Alos couple of line endings
2020-12-09 09:52:19 +03:00 · 2020-12-09 09:52:19 +03:00 · d0eef043fd
commit d0eef043fd
parent 8b9feed603
14 changed files with 312 additions and 51 deletions
--- a/inference-engine/include/cpp/ie_infer_request.hpp
+++ b/inference-engine/include/cpp/ie_infer_request.hpp
@ -14,6 +14,7 @@
 #include <string>

 #include "cpp/ie_memory_state.hpp"
+#include "ie_remote_context.hpp"
 #include "ie_iinfer_request.hpp"
 #include "details/ie_exception_conversion.hpp"
 #include "details/ie_so_loader.h"
@ -123,8 +124,9 @@ public:
        CALL_STATUS_FNC(GetBlob, name.c_str(), data);
        std::string error = "Internal error: blob with name `" + name + "` is not allocated!";
        auto blobPtr = data.get();
+        const bool remoteBlobPassed = blobPtr->is<RemoteBlob>();
        if (blobPtr == nullptr) THROW_IE_EXCEPTION << error;
-        if (blobPtr->buffer() == nullptr) THROW_IE_EXCEPTION << error;
+        if (!remoteBlobPassed && blobPtr->buffer() == nullptr) THROW_IE_EXCEPTION << error;
        return data;
    }

--- a/inference-engine/src/multi_device/multi_device_async_infer_request.cpp
+++ b/inference-engine/src/multi_device/multi_device_async_infer_request.cpp
@ -22,6 +22,7 @@ MultiDeviceAsyncInferRequest::MultiDeviceAsyncInferRequest(
    _multiDeviceExecutableNetwork{multiDeviceExecutableNetwork},
    _inferRequest{inferRequest},
    _needPerfCounters{needPerfCounters} {
+    // this executor starts the inference while  the task (checking the result) is passed to the next stage
    struct ThisRequestExecutor : public ITaskExecutor {
        explicit ThisRequestExecutor(MultiDeviceAsyncInferRequest* _this_) : _this{_this_} {}
        void run(Task task) override {
@ -32,22 +33,52 @@ MultiDeviceAsyncInferRequest::MultiDeviceAsyncInferRequest(
        MultiDeviceAsyncInferRequest* _this = nullptr;
    };
    _pipeline = {
-        {_multiDeviceExecutableNetwork, [this] {
-            _workerInferRequest = MultiDeviceExecutableNetwork::_thisWorkerInferRequest;
-            _inferRequest->SetBlobsToAnotherRequest(_workerInferRequest->_inferRequest);
+        // if the request is coming with device-specific remote blobs make sure it is scheduled to the specific device only:
+        { /*TaskExecutor*/ std::make_shared<ImmediateExecutor>(), /*task*/ [this] {
+               // by default, no preferred device:
+               _multiDeviceExecutableNetwork->_thisPreferredDeviceName = "";
+               // if any input is remote (e.g. was set with SetBlob), let' use the corresponding device
+               for (const auto &it : _multiDeviceExecutableNetwork->GetInputsInfo()) {
+                   Blob::Ptr b;
+                   _inferRequest->GetBlob(it.first.c_str(), b);
+                   auto r = b->as<RemoteBlob>();
+                   if (r) {
+                       const auto name = r->getDeviceName();
+                       const auto res = std::find_if(
+                               _multiDeviceExecutableNetwork->_devicePrioritiesInitial.cbegin(),
+                               _multiDeviceExecutableNetwork->_devicePrioritiesInitial.cend(),
+                               [&name](const MultiDevicePlugin::DeviceInformation& d){ return d.deviceName == name; });
+                       if (_multiDeviceExecutableNetwork->_devicePrioritiesInitial.cend() == res) {
+                           THROW_IE_EXCEPTION << "None of the devices (for which current MULTI-device configuration was "
+                                                 "initialized) supports a remote blob created on the device named " << name;
+
+                       } else {
+                            // it is ok to take the c_str() here (as pointed in the multi_device_exec_network.hpp we need to use const char*)
+                            // as the original strings are from the "persistent" vector (with the right lifetime)
+                           _multiDeviceExecutableNetwork->_thisPreferredDeviceName = res->deviceName.c_str();
+                           break;
+                       }
+                   }
+               }
        }},
-        {std::make_shared<ThisRequestExecutor>(this), [this] {
-            auto status = _workerInferRequest->_status;
-            if (InferenceEngine::StatusCode::OK != status) {
-                if (nullptr != InferenceEngine::CurrentException()) {
-                    std::rethrow_exception(InferenceEngine::CurrentException());
-                } else {
-                    THROW_IE_EXCEPTION << InferenceEngine::details::as_status << status;
-                }
-            }
-            if (_needPerfCounters) {
-                _perfMap = _workerInferRequest->_inferRequest.GetPerformanceCounts();
-            }
+        // as the scheduling algo may select any device, this stage accepts the scheduling decision (actual workerRequest)
+        // then sets the device-agnostic blobs to the actual (device-specific) request
+        {
+         /*TaskExecutor*/ _multiDeviceExecutableNetwork, /*task*/ [this] {
+               _workerInferRequest = MultiDeviceExecutableNetwork::_thisWorkerInferRequest;
+               _inferRequest->SetBlobsToAnotherRequest(_workerInferRequest->_inferRequest);
+        }},
+        // final task in the pipeline:
+        { /*TaskExecutor*/std::make_shared<ThisRequestExecutor>(this), /*task*/ [this] {
+              auto status = _workerInferRequest->_status;
+              if (InferenceEngine::StatusCode::OK != status) {
+                  if (nullptr != InferenceEngine::CurrentException())
+                      std::rethrow_exception(InferenceEngine::CurrentException());
+                  else
+                      THROW_IE_EXCEPTION << InferenceEngine::details::as_status << status;
+              }
+              if (_needPerfCounters)
+                  _perfMap = _workerInferRequest->_inferRequest.GetPerformanceCounts();
        }}
    };
 }
--- a/inference-engine/src/multi_device/multi_device_exec_network.cpp
+++ b/inference-engine/src/multi_device/multi_device_exec_network.cpp
@ -3,9 +3,7 @@
 //

 ///////////////////////////////////////////////////////////////////////////////////////////////////
-#include <atomic>
 #include <mutex>
-#include <queue>
 #include <string>
 #include <vector>
 #include <memory>
@ -27,6 +25,8 @@ namespace MultiDevicePlugin {
    using namespace InferenceEngine;

 thread_local MultiDeviceExecutableNetwork::WorkerInferRequest* MultiDeviceExecutableNetwork::_thisWorkerInferRequest = nullptr;
+// TODO: revert to the plain variable (see header file), when we moved to the next CentOS 8.x in our support matrix
+thread_local const char* MultiDeviceExecutableNetwork::_thisPreferredDeviceName = "";

 struct IdleGuard {
    explicit IdleGuard(MultiDeviceExecutableNetwork::WorkerInferRequest* workerInferRequestPtr,
@ -68,7 +68,7 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap<Infer
        unsigned int optimalNum = 0;
        try {
            optimalNum = network.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
-        } catch (const details::InferenceEngineException &iie) {
+        } catch (const InferenceEngine::details::InferenceEngineException &iie) {
            THROW_IE_EXCEPTION
                    << "Every device used with the Multi-Device should "
                    << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
@ -79,6 +79,7 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap<Infer
        auto& workerRequests = _workerRequests[device];
        auto& idleWorkerRequests = _idleWorkerRequests[device];
        workerRequests.resize(numRequests);
+        _inferPipelineTasksDeviceSpecific[device] = std::unique_ptr<ThreadSafeQueue<Task>>(new ThreadSafeQueue<Task>);
        auto* idleWorkerRequestsPtr = &(idleWorkerRequests);
        idleWorkerRequests.set_capacity(numRequests);
        for (auto&& workerRequest : workerRequests) {
@ -95,24 +96,27 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap<Infer
                    }
                    // try to return the request to the idle list (fails if the overall object destruction has began)
                    if (idleGuard.Release()->try_push(workerRequestPtr)) {
+                        // let's try to pop a task, as we know there is at least one idle request, schedule if succeeded
+                        // if no device-agnostic tasks, let's try pop the device specific task, schedule if succeeded
                        Task t;
-                        // try pop the task, as we know there is at least one idle request
-                        if (_inferPipelineTasks.try_pop(t)) {
-                            // if succeeded, let's schedule that
+                        if (_inferPipelineTasks.try_pop(t))
                            ScheduleToWorkerInferRequest(std::move(t));
-                        }
+                        else if (_inferPipelineTasksDeviceSpecific[device]->try_pop(t))
+                            ScheduleToWorkerInferRequest(std::move(t), device);
                    }
                });
        }
    }
 }

-void MultiDeviceExecutableNetwork::ScheduleToWorkerInferRequest(Task inferPipelineTask) {
+void MultiDeviceExecutableNetwork::ScheduleToWorkerInferRequest(Task inferPipelineTask, DeviceName preferred_device) {
    auto devices = [&] {
        std::lock_guard<std::mutex> lock(_mutex);
        return _devicePriorities;
    }();
    for (auto&& device : devices) {
+        if (!preferred_device.empty() && (device.deviceName != preferred_device))
+            continue;
        WorkerInferRequest* workerRequestPtr = nullptr;
        NotBusyWorkerRequests& idleWorkerRequests = _idleWorkerRequests[device.deviceName];
        if (idleWorkerRequests.try_pop(workerRequestPtr)) {
@ -126,12 +130,15 @@ void MultiDeviceExecutableNetwork::ScheduleToWorkerInferRequest(Task inferPipeli
            return;
        }
    }
-    // no vacant requests this time, storing the task to the queue
-    _inferPipelineTasks.push(std::move(inferPipelineTask));
+    // no vacant requests this time, storing the task to the respective queue
+    if (!preferred_device.empty())
+        _inferPipelineTasksDeviceSpecific[preferred_device]->push(std::move(inferPipelineTask));
+    else
+        _inferPipelineTasks.push(std::move(inferPipelineTask));
 }

 void MultiDeviceExecutableNetwork::run(Task inferPipelineTask) {
-    ScheduleToWorkerInferRequest(std::move(inferPipelineTask));
+    ScheduleToWorkerInferRequest(std::move(inferPipelineTask), _thisPreferredDeviceName);
 }

 MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {
@ -149,6 +156,26 @@ MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {
    _workerRequests.clear();
 }

+RemoteContext::Ptr MultiDeviceExecutableNetwork::GetContext() const {
+    auto devices = [&] {
+        std::lock_guard<std::mutex> lock(_mutex);
+        return _devicePriorities;
+    }();
+
+    std::string devices_names;
+    for (auto&& device : devices) {
+        devices_names += device.deviceName + " ";
+        const auto& n  = _networksPerDevice.at(device.deviceName);
+        try {
+            return n.GetContext();
+        } catch (const NotImplemented& ex) {
+        }
+    }
+    THROW_IE_EXCEPTION << InferenceEngine::details::as_status << StatusCode::NOT_IMPLEMENTED
+                       << NOT_IMPLEMENTED_str << "None of the devices in the MULTI has an associated remote context."
+                       << "Current list of devices allowed via the DEVICE_PRIORITIES config: " << devices_names;
+}
+
 InferenceEngine::InferRequestInternal::Ptr MultiDeviceExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
                                                                                                InferenceEngine::OutputsDataMap networkOutputs) {
    auto num = _numRequestsCreated++;
@ -230,7 +257,7 @@ InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetMetric(const std::st
        for (auto n : _networksPerDevice) {
            try {
                res += n.second.GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as<unsigned int>();
-            } catch (const details::InferenceEngineException &iie) {
+            } catch (const InferenceEngine::details::InferenceEngineException &iie) {
                  THROW_IE_EXCEPTION
                        << "Every device used with the Multi-Device should "
                        << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. "
--- a/inference-engine/src/multi_device/multi_device_exec_network.hpp
+++ b/inference-engine/src/multi_device/multi_device_exec_network.hpp
@ -117,17 +117,22 @@ public:
    InferenceEngine::IInferRequest::Ptr CreateInferRequest() override;
    InferenceEngine::InferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
                                                                      InferenceEngine::OutputsDataMap networkOutputs) override;
+    InferenceEngine::RemoteContext::Ptr GetContext() const override;
    ~MultiDeviceExecutableNetwork() override;

-    void ScheduleToWorkerInferRequest(InferenceEngine::Task);
+    void ScheduleToWorkerInferRequest(InferenceEngine::Task, DeviceName preferred_device = "");

    static thread_local WorkerInferRequest*                     _thisWorkerInferRequest;
-    std::atomic_bool                                            _terminate = {false};
-    std::mutex                                                  _mutex;
+    // have to use the const char* ptr rather than std::string due to a bug in old gcc versions,
+    // the bug is e.g. manifesting on the old CentOS (and it's 4.8.x gcc) used in our testing
+    // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81880
+    static thread_local const char*                             _thisPreferredDeviceName;
+    mutable std::mutex                                          _mutex;
    std::vector<DeviceInformation>                              _devicePriorities;
    const std::vector<DeviceInformation>                        _devicePrioritiesInitial;
    DeviceMap<InferenceEngine::ExecutableNetwork>               _networksPerDevice;
    ThreadSafeQueue<InferenceEngine::Task>                      _inferPipelineTasks;
+    DeviceMap<std::unique_ptr<ThreadSafeQueue<InferenceEngine::Task>>> _inferPipelineTasksDeviceSpecific;
    DeviceMap<NotBusyWorkerRequests>                            _idleWorkerRequests;
    DeviceMap<std::vector<WorkerInferRequest>>                  _workerRequests;
    std::unordered_map<std::string, InferenceEngine::Parameter> _config;
--- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_executable_network_internal.hpp
@ -64,7 +64,7 @@ public:

    void Export(const std::string& modelFileName) override {
        (void)modelFileName;
-        THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
+        THROW_IE_EXCEPTION << InferenceEngine::details::as_status << StatusCode::NOT_IMPLEMENTED << NOT_IMPLEMENTED_str;
    }

    void Export(std::ostream& networkModel) override {
@ -76,7 +76,7 @@ public:
    }

    CNNNetwork GetExecGraphInfo() override {
-        THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
+        THROW_IE_EXCEPTION << InferenceEngine::details::as_status << StatusCode::NOT_IMPLEMENTED << NOT_IMPLEMENTED_str;
    }

    /**
@ -89,7 +89,7 @@ public:
    }

    std::vector<IVariableStateInternal::Ptr> QueryState() override {
-        THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
+        THROW_IE_EXCEPTION << InferenceEngine::details::as_status << StatusCode::NOT_IMPLEMENTED << NOT_IMPLEMENTED_str;
    }

    void SetConfig(const std::map<std::string, Parameter>& config) override {
@ -107,11 +107,11 @@ public:

    Parameter GetMetric(const std::string& name) const override {
        (void)name;
-        THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
+        THROW_IE_EXCEPTION << InferenceEngine::details::as_status << StatusCode::NOT_IMPLEMENTED << NOT_IMPLEMENTED_str;
    }

    RemoteContext::Ptr GetContext() const override {
-        THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
+        THROW_IE_EXCEPTION << InferenceEngine::details::as_status << StatusCode::NOT_IMPLEMENTED << NOT_IMPLEMENTED_str;
    }

 protected:
@ -123,7 +123,7 @@ protected:
     */
    virtual void ExportImpl(std::ostream& networkModel) {
        (void)networkModel;
-        THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
+        THROW_IE_EXCEPTION << InferenceEngine::details::as_status << StatusCode::NOT_IMPLEMENTED << NOT_IMPLEMENTED_str;
    }

    InferenceEngine::InputsDataMap _networkInputs;  //!< Holds infromation about network inputs info
--- a/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_request_internal.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/impl/ie_infer_request_internal.hpp
@ -76,7 +76,8 @@ public:
        }
        if (!data) THROW_IE_EXCEPTION << NOT_ALLOCATED_str << "Failed to set empty blob with name: \'" << name << "\'";
        const bool compoundBlobPassed = data->is<CompoundBlob>();
-        if (!compoundBlobPassed && data->buffer() == nullptr)
+        const bool remoteBlobPassed = data->is<RemoteBlob>();
+        if (!compoundBlobPassed && !remoteBlobPassed && data->buffer() == nullptr)
            THROW_IE_EXCEPTION << "Input data was not allocated. Input name: \'" << name << "\'";
        if (data->size() == 0) {
            THROW_IE_EXCEPTION << "Input data is empty. Input name: \'" << name << "\'";
@ -348,7 +349,8 @@ protected:
        if (refSize != blob->size()) {
            THROW_IE_EXCEPTION << strNotMatched + ": got " << blob->size() << " expecting " << refSize;
        }
-        if (blob->buffer() == nullptr) THROW_IE_EXCEPTION << strNotAllocated;
+        const bool remoteBlobPassed = blob->is<RemoteBlob>();
+        if (!remoteBlobPassed && blob->buffer() == nullptr) THROW_IE_EXCEPTION << strNotAllocated;
    }

    /**
--- a/inference-engine/tests/functional/plugin/CMakeLists.txt
+++ b/inference-engine/tests/functional/plugin/CMakeLists.txt
@ -18,4 +18,5 @@ endif()

 if (ENABLE_MYRIAD)
    add_subdirectory(myriad)
-endif()
+endif()
+
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/multi/cpu_remote_blob_tests.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/multi/cpu_remote_blob_tests.cpp
@ -0,0 +1,15 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <vector>
+#include "multi/multi_remote_blob_tests.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+const std::vector<DevicesNamesAndSupportPair> device_names_and_support_for_remote_blobs {
+        {{CPU}, false}, // CPU via MULTI
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_RemoteBlobMultiCPU, MultiDevice_SupportTest,
+        ::testing::ValuesIn(device_names_and_support_for_remote_blobs), MultiDevice_SupportTest::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/multi/gpu_remote_blob_tests.cpp
@ -0,0 +1,57 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <vector>
+#include "multi/multi_remote_blob_tests.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+const std::vector<DevicesNamesAndSupportPair> device_names_and_support_for_remote_blobs {
+        {{GPU}, true}, // GPU via MULTI,
+#if ENABLE_MKL_DNN
+        {{GPU, CPU}, true}, // GPU+CPU
+        {{CPU, GPU}, true}, // CPU+GPU
+#endif
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_RemoteBlobMultiGPU, MultiDevice_SupportTest,
+                        ::testing::ValuesIn(device_names_and_support_for_remote_blobs), MultiDevice_SupportTest::getTestCaseName);
+
+TEST_P(MultiDevice_Test, cannotInferRemoteBlobIfNotInitializedForDevice) {
+    InferenceEngine::CNNNetwork net;
+    net = CNNNetwork(fn_ptr);
+    auto ie = PluginCache::get().ie();
+    // load a network to the GPU to make sure we have a remote context
+    auto exec_net = ie->LoadNetwork(net, GPU);
+    auto ctx = exec_net.GetContext();
+
+    const InferenceEngine::ConstInputsDataMap inputInfo = exec_net.GetInputsInfo();
+    auto& first_input_name = inputInfo.begin()->first;
+    auto& first_input = inputInfo.begin()->second;
+    auto rblob = InferenceEngine::make_shared_blob(first_input->getTensorDesc(), ctx);
+    rblob->allocate();
+
+    ExecutableNetwork exec_net_multi;
+    try {
+        exec_net_multi = ie->LoadNetwork(net, device_names);
+    } catch(...) {
+        // device is unavailable (e.g. for the "second GPU" test) or other (e.g. env) issues not related to the test
+        return;
+    }
+    InferRequest req = exec_net_multi.CreateInferRequest();
+    ASSERT_NE((std::shared_ptr<InferenceEngine::IInferRequest>)req, nullptr);
+    ASSERT_NO_THROW(req.SetBlob(first_input_name, rblob));
+    ASSERT_NO_THROW(req.StartAsync());
+    ASSERT_THROW(req.Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY), InferenceEngine::details::InferenceEngineException);
+}
+
+const std::vector<DevicesNames> device_names_and_support_for_remote_blobs2 {
+#if ENABLE_MKL_DNN
+        {CPU},  // stand-alone CPU via MULTI (no GPU), no OCL context
+#endif
+        {"GPU.1"},  // another GPU (the test will test its presence), different OCL contexts
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_RemoteBlobMultiInitializedWithoutGPU, MultiDevice_Test,
+                        ::testing::ValuesIn(device_names_and_support_for_remote_blobs2), MultiDevice_Test::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/myriad_remote_blobs_tests.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/myriad_remote_blobs_tests.cpp
@ -0,0 +1,18 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <vector>
+#include "multi/multi_remote_blob_tests.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+const std::vector<DevicesNamesAndSupportPair> device_names_and_support_for_remote_blobs {
+        {{MYRIAD}, false}, // MYX via MULTI
+#if ENABLE_MKL_DNN
+        {{CPU, MYRIAD}, false},  // CPU+MYX
+#endif
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_RemoteBlobMultiMyriad, MultiDevice_SupportTest,
+                        ::testing::ValuesIn(device_names_and_support_for_remote_blobs), MultiDevice_SupportTest::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
@ -23,6 +23,7 @@
 #include <functional_test_utils/skip_tests_config.hpp>
 #include <common_test_utils/common_utils.hpp>
 #include <common_test_utils/test_assertions.hpp>
+#include <cpp_interfaces/exception2status.hpp>

 #ifdef ENABLE_UNICODE_PATH_SUPPORT
 #include <iostream>
@ -60,16 +61,18 @@ namespace BehaviorTestsDefinitions {
 {                                                                               \
    try {                                                                       \
        __VA_ARGS__;                                                            \
-    } catch(InferenceEngine::details::InferenceEngineException ieException) {   \
+    } catch(InferenceEngine::details::InferenceEngineException& ieException) {  \
        auto notImplementedExceptionIsThrown =                                  \
            std::string::npos != std::string {ieException.what()}               \
-            .find(std::string{"[NOT_IMPLEMENTED] "});                           \
+            .find(NOT_IMPLEMENTED_str);                                         \
        if (notImplementedExceptionIsThrown) {                                  \
            GTEST_SKIP();                                                       \
        } else {                                                                \
            FAIL() << "thrown from expression: " # __VA_ARGS__ << std::endl     \
            << "what: " << ieException.what();                                  \
        }                                                                       \
+    } catch (const InferenceEngine::NotImplemented& ex) {                       \
+        GTEST_SKIP();                                                           \
    }                                                                           \
 }

--- a/inference-engine/tests/functional/plugin/shared/include/behavior/exec_graph_info.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/exec_graph_info.hpp
@ -61,7 +61,7 @@ TEST_P(ExecGraphTests, CheckExecGraphInfoBeforeExecution) {
    InferenceEngine::CNNNetwork execGraph;
    if (targetDevice != CommonTestUtils::DEVICE_MULTI && targetDevice != CommonTestUtils::DEVICE_GNA) {
        // Load CNNNetwork to target plugins
-        auto execNet = ie->LoadNetwork(cnnNet, targetDevice);
+        auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration);
        ASSERT_NO_THROW(execGraph = execNet.GetExecGraphInfo());
        // Create InferRequest
        InferenceEngine::InferRequest req;
@ -135,8 +135,8 @@ TEST_P(ExecGraphTests, CheckExecGraphInfoBeforeExecution) {
            ASSERT_GE(layer.second, 0);
        }
    } else {
-        ASSERT_THROW(ie->LoadNetwork(cnnNet, targetDevice).GetExecGraphInfo(),
-                InferenceEngine::details::InferenceEngineException);
+        ASSERT_THROW(ie->LoadNetwork(cnnNet, targetDevice, configuration).GetExecGraphInfo(),
+                InferenceEngine::NotImplemented);
    }
 }

@ -148,7 +148,7 @@ TEST_P(ExecGraphTests, CheckExecGraphInfoAfterExecution) {
    InferenceEngine::CNNNetwork execGraph;
    if (targetDevice != CommonTestUtils::DEVICE_MULTI && targetDevice != CommonTestUtils::DEVICE_GNA) {
        // Load CNNNetwork to target plugins
-        auto execNet = ie->LoadNetwork(cnnNet, targetDevice);
+        auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration);
        ASSERT_NO_THROW(execGraph = execNet.GetExecGraphInfo());
        // Create InferRequest
        InferenceEngine::InferRequest req;
@ -235,8 +235,8 @@ TEST_P(ExecGraphTests, CheckExecGraphInfoAfterExecution) {
            ASSERT_GE(layer.second, 0);
        }
    } else {
-        ASSERT_THROW(ie->LoadNetwork(cnnNet, targetDevice).GetExecGraphInfo(),
-                InferenceEngine::details::InferenceEngineException);
+        ASSERT_THROW(ie->LoadNetwork(cnnNet, targetDevice, configuration).GetExecGraphInfo(),
+                InferenceEngine::NotImplemented);
    }
 }

@ -252,7 +252,7 @@ TEST_P(ExecGraphTests, CheckExecGraphInfoSerialization) {
    InferenceEngine::CNNNetwork execGraph;
    if (targetDevice != CommonTestUtils::DEVICE_MULTI && targetDevice != CommonTestUtils::DEVICE_GNA) {
        // Load CNNNetwork to target plugins
-        auto execNet = ie->LoadNetwork(cnnNet, targetDevice);
+        auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration);
        ASSERT_NO_THROW(execGraph = execNet.GetExecGraphInfo());
        // Create InferRequest
        InferenceEngine::InferRequest req;
@ -261,8 +261,8 @@ TEST_P(ExecGraphTests, CheckExecGraphInfoSerialization) {
        ASSERT_EQ(0, std::remove(out_xml_path.c_str()));
        ASSERT_EQ(0, std::remove(out_bin_path.c_str()));
    } else {
-        ASSERT_THROW(ie->LoadNetwork(cnnNet, targetDevice).GetExecGraphInfo(),
-                     InferenceEngine::details::InferenceEngineException);
+        ASSERT_THROW(ie->LoadNetwork(cnnNet, targetDevice, configuration).GetExecGraphInfo(),
+                     InferenceEngine::NotImplemented);
    }
 }
 }  // namespace BehaviorTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/include/multi/multi_remote_blob_tests.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/multi/multi_remote_blob_tests.hpp
@ -0,0 +1,36 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <vector>
+#include "multi/multi_helpers.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+
+TEST_P(MultiDevice_SupportTest, canCreateContextThenRequestThenBlobsAndInfer) {
+    InferenceEngine::CNNNetwork net;
+    net = CNNNetwork(fn_ptr);
+    net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
+    net.getInputsInfo().begin()->second->setPrecision(Precision::U8);
+
+    auto ie = PluginCache::get().ie();
+
+    auto exec_net = ie->LoadNetwork(net, device_names);
+    if (expected_status) {
+        InferenceEngine::RemoteContext::Ptr ctx;
+        ASSERT_NE(ctx = exec_net.GetContext(), nullptr);
+        InferRequest req = exec_net.CreateInferRequest();
+        ASSERT_NE((std::shared_ptr<InferenceEngine::IInferRequest>)req, nullptr);
+        const InferenceEngine::ConstInputsDataMap inputInfo = exec_net.GetInputsInfo();
+        for (auto i : inputInfo) {
+            auto rblob = InferenceEngine::make_shared_blob(i.second->getTensorDesc(), ctx);
+            rblob->allocate();
+            req.SetBlob(i.first, rblob);
+        }
+        ASSERT_NO_THROW(req.StartAsync());
+        ASSERT_EQ(req.Wait(IInferRequest::RESULT_READY), StatusCode::OK);
+
+    } else {
+        ASSERT_THROW(exec_net.GetContext(), InferenceEngine::NotImplemented);
+    }
+}
--- a/inference-engine/tests/ie_test_utils/multi/multi_helpers.hpp
+++ b/inference-engine/tests/ie_test_utils/multi/multi_helpers.hpp
@ -0,0 +1,64 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <vector>
+
+#include "multi-device/multi_device_config.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "common_test_utils/test_constants.hpp"
+#include "ngraph_functions/subgraph_builders.hpp"
+
+using namespace ::testing;
+using namespace InferenceEngine;
+
+static std::string getDeviceStringWithMulti(std::vector<std::string> names) {
+    std::string allDevices = "MULTI:";
+    for (auto && device : names) {
+        allDevices += device;
+        allDevices += ((device == names[names.size()-1]) ? "" : ",");
+    }
+    return allDevices;
+}
+using DeviceName = std::string;
+using DevicesNames = std::vector<DeviceName>;
+using DevicesNamesAndSupportPair = std::pair<DevicesNames, bool>;
+
+class MultiDevice_Test : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<DevicesNames> {
+    void SetUp() override {
+        device_names = getDeviceStringWithMulti(this->GetParam());
+        fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
+    }
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<DevicesNames> &obj) {
+        auto s = getDeviceStringWithMulti(obj.param);
+        std::replace(s.begin(), s.end(), ',', '_');
+        return "device_names_" + s;
+    }
+protected:
+    std::string device_names;
+    std::shared_ptr<ngraph::Function> fn_ptr;
+};
+
+class MultiDevice_SupportTest : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<DevicesNamesAndSupportPair> {
+    void SetUp() override {
+        device_names = getDeviceStringWithMulti(this->GetParam().first);
+        expected_status = this->GetParam().second;
+        fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
+    }
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<DevicesNamesAndSupportPair> &obj) {
+        auto s = getDeviceStringWithMulti(obj.param.first);
+        std::replace(s.begin(), s.end(), ',', '_');
+        return "device_names_" + s;
+    }
+protected:
+    std::string device_names;
+    bool expected_status;
+    std::shared_ptr<ngraph::Function> fn_ptr;
+};
+#define MULTI  CommonTestUtils::DEVICE_MULTI
+#define CPU    CommonTestUtils::DEVICE_CPU
+#define GPU    CommonTestUtils::DEVICE_GPU
+#define MYRIAD CommonTestUtils::DEVICE_MYRIAD