change AUTO default hint to Latency (#14236)

* change AUTO default hint to Latency * Change the comment tput to latency according to wangyang opinion * fix testcase for MULTI, the hint default value returned by MULTI is throughput * Remove the redundant testcase and modify the name of the testcase that returns the default value of hint * Code optimization according to bell opinion, add comments to testcase * Correct the comments of testcase * When user sets num_streams, AUTO/MULTI does not set the default hint to HW plugin * Fix the problem that smoke_AUTO_MULTI_ReturnDefaultHintTest fails to run * add num_streams and default hint mock testcase * add auto default perf hint mock testcase
2022-12-06 11:41:43 +08:00 · 2022-12-06 11:41:43 +08:00 · 796ce53371
commit 796ce53371
parent e1acaf67c5
6 changed files with 604 additions and 63 deletions
--- a/src/plugins/auto/auto_schedule.cpp
+++ b/src/plugins/auto/auto_schedule.cpp
@ -156,8 +156,7 @@ void AutoSchedule::init(const ScheduleContext::Ptr& sContext) {
        if (CPUIter != _autoSContext->_devicePriorities.end()) {
            _loadContext[CPU].isEnabled = true;
            _loadContext[CPU].deviceInfo = *CPUIter;
-            _loadContext[CPU].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] =
-                IE::PluginConfigParams::LATENCY;
+            _loadContext[CPU].deviceInfo.config[CONFIG_KEY(PERFORMANCE_HINT)] = IE::PluginConfigParams::LATENCY;
            _loadContext[CPU].workName = "CPU_HELP";
            LOG_INFO_TAG("will load CPU for accelerator");
        } else {
--- a/src/plugins/auto/plugin.cpp
+++ b/src/plugins/auto/plugin.cpp
@ -64,28 +64,33 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(cons
    // parsing the string and splitting to tokens
    std::vector<std::string> devicesWithRequests = _pluginConfig.ParsePrioritiesDevices(priorities);

-    auto setTputAsDefault = [&](const std::string& targetDevice,
-                                std::map<std::string, std::string>& deviceConfig,
-                                const std::map<std::string, std::string>& mergedConfig) {
+    auto setDefaultHint = [&](const std::string& targetDevice,
+                              std::map<std::string, std::string>& deviceConfig,
+                              const std::map<std::string, std::string>& mergedConfig) {
        auto isSetPerHint = mergedConfig.find(PluginConfigParams::KEY_PERFORMANCE_HINT) != mergedConfig.end();
-        if (GetName() == "AUTO" && !isSetPerHint && mergedConfig.find(targetDevice) == mergedConfig.end()) {
-            // setting tput as the default performance mode if no hints setting for AUTO plugin and no properties
-            // specified for target device.
-            deviceConfig[PluginConfigParams::KEY_PERFORMANCE_HINT] = PluginConfigParams::THROUGHPUT;
+        auto isSetDeviceProperties = mergedConfig.find(targetDevice) != mergedConfig.end();
+        auto isSetNumStreams = deviceConfig.find(ov::num_streams.name()) != deviceConfig.end();
+        if (GetName() == "AUTO" && !isSetPerHint && !isSetDeviceProperties && !isSetNumStreams) {
+            // setting latency as the default performance mode if
+            // 1. no hints setting for AUTO plugin
+            // 2. no ov::device::properties(secondary properties) setting for target device
+            // 3. no ov::num_streams setting for target device
+            deviceConfig[PluginConfigParams::KEY_PERFORMANCE_HINT] = PluginConfigParams::LATENCY;
            return;
        }

-        // set TPUT for MULTI if no above propertis were set by user
        if (GetName() == "MULTI") {
-            if (isSetPerHint || mergedConfig.find(targetDevice) != mergedConfig.end())
-                return;
-            for (auto&& kvp : mergedConfig) {
-                if (kvp.first == ov::affinity || kvp.first == ov::num_streams ||
-                    kvp.first == ov::inference_num_threads) {
-                    return;
-                }
+            auto isSetAffinity = mergedConfig.find(ov::affinity.name()) != mergedConfig.end();
+            auto isSetNumThreads = mergedConfig.find(ov::inference_num_threads.name()) != mergedConfig.end();
+            if (!isSetPerHint && !isSetAffinity && !isSetNumThreads && !isSetDeviceProperties && !isSetNumStreams) {
+                // setting tput as the default performance mode if
+                // 1. no hints setting for MULTI plugin
+                // 2. no affinity setting for MULTI plugin
+                // 3. no inference_num_threads setting for MULTI plugin
+                // 4. no ov::device::properties(secondary properties) setting for target device
+                // 5. no ov::num_streams setting for target device
+                deviceConfig[PluginConfigParams::KEY_PERFORMANCE_HINT] = PluginConfigParams::THROUGHPUT;
            }
-            deviceConfig[PluginConfigParams::KEY_PERFORMANCE_HINT] = PluginConfigParams::THROUGHPUT;
        }
    };

@ -100,7 +105,7 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(cons
            tconfig[PluginConfigParams::KEY_DEVICE_ID] = deviceIDLocal;
        }
        auto deviceConfig = GetCore()->GetSupportedConfig(deviceName, tconfig);
-        setTputAsDefault(deviceName, deviceConfig, tconfig);
+        setDefaultHint(deviceName, deviceConfig, tconfig);
        return deviceConfig;
    };

@ -329,18 +334,25 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons
    // updateFromMap will check config valid
    loadConfig.UpdateFromMap(config, GetName(), true);
    auto fullConfig = loadConfig._keyConfigMap;
+    bool workModeAuto = GetName() == "AUTO";
    // Remove the performance hint if no setting to this property from user.
    if (!loadConfig._isSetPerHint) {
        fullConfig.erase(PluginConfigParams::KEY_PERFORMANCE_HINT);
-        // set performance hint to 'THROUGHPUT' model for AutoExecutable Network.
-        loadConfig._perfHintsConfig.SetConfig(PluginConfigParams::KEY_PERFORMANCE_HINT, PluginConfigParams::THROUGHPUT);
+        if (workModeAuto) {
+            // set performance hint to 'LATENCY' model for AutoExecutable Network.
+            loadConfig._perfHintsConfig.SetConfig(PluginConfigParams::KEY_PERFORMANCE_HINT,
+                                                  PluginConfigParams::LATENCY);
+        } else {
+            // set performance hint to 'THROUGHPUT' model for MultiExecutable Network.
+            loadConfig._perfHintsConfig.SetConfig(PluginConfigParams::KEY_PERFORMANCE_HINT,
+                                                  PluginConfigParams::THROUGHPUT);
+        }
    }
    if (!loadConfig._isSetCacheDir)
        fullConfig.erase(CONFIG_KEY(CACHE_DIR));
    // collect the settings that are applicable to the devices we are loading the network to
    std::unordered_map<std::string, InferenceEngine::Parameter> multiNetworkConfig;
    std::vector<DeviceInformation> metaDevices;
-    bool workModeAuto = GetName() == "AUTO";
    auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
    // If the user sets the property, insert the property into the deviceConfig
    auto insertPropToConfig = [&](std::string property,
--- a/src/plugins/auto/utils/config.hpp
+++ b/src/plugins/auto/utils/config.hpp
@ -164,6 +164,7 @@ struct PluginConfig {
                _devicePriority = kvp.second;
            } else if (std::find(perf_hints_configs.begin(), perf_hints_configs.end(), kvp.first) != perf_hints_configs.end()) {
                _perfHintsConfig.SetConfig(kvp.first, kvp.second);
+                // if first level property has perf_hint setting
                if (kvp.first == ov::hint::performance_mode.name())
                    _isSetPerHint = true;
            } else if (_availableDevices.end() != std::find(_availableDevices.begin(),
@ -172,9 +173,6 @@ struct PluginConfig {
                // AUTO and MULTI can accept secondary properites on calling both core::comile_model() and
                // core::set_property().
                _passThroughConfig.emplace(kvp.first, kvp.second);
-                // Not setting performance mode to 'THROUGHPUT' as the default value if any secondary properties
-                // appears in the configuration.
-                _isSetPerHint = true;
            } else if (kvp.first.find("AUTO_") == 0) {
                _passThroughConfig.emplace(kvp.first, kvp.second);
            } else if (kvp.first == ov::cache_dir.name()) {
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/core_integration.cpp
@ -281,35 +281,30 @@ INSTANTIATE_TEST_SUITE_P(
        smoke_OVClassLoadNetworkTest, OVClassLoadNetworkTest,
        ::testing::Values("CPU"));

-const std::vector<ov::AnyMap> default_properties = {{ov::device::priorities("CPU")}};
-const std::vector<ov::AnyMap> auto_default_properties = {{}};
-INSTANTIATE_TEST_SUITE_P(smoke_OVClassLoadNetworkWithDefaultPropertiesTest,
-                         OVClassLoadNetworkWithDefaultPropertiesTest,
-                         ::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_AUTO, CommonTestUtils::DEVICE_MULTI),
-                                            ::testing::ValuesIn(default_properties)));
-
-INSTANTIATE_TEST_SUITE_P(smoke_AUTO_OVClassLoadNetworkWithDefaultPropertiesTest,
-                         OVClassLoadNetworkWithDefaultPropertiesTest,
+const std::vector<ov::AnyMap> auto_multi_default_properties = {{}, {ov::hint::allow_auto_batching(true)}};
+INSTANTIATE_TEST_SUITE_P(smoke_AUTO_MULTI_ReturnDefaultHintTest,
+                         OVClassLoadNetWorkReturnDefaultHintTest,
                         ::testing::Combine(::testing::Values("AUTO:CPU", "MULTI:CPU"),
-                                            ::testing::ValuesIn(auto_default_properties)));
-
-const std::vector<ov::AnyMap> default_incorrect_properties = {
-        {ov::device::priorities("CPU"), ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)},
-        {ov::device::priorities("CPU"), ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)},
-        {ov::device::priorities("CPU"), ov::device::properties("CPU", ov::hint::allow_auto_batching(true))}
-        };
-const std::vector<ov::AnyMap> default_multi_incorrect_properties = {
-        {ov::device::priorities("CPU"), ov::affinity(ov::Affinity::NONE)},
-        {ov::device::priorities("CPU"), ov::num_streams(ov::streams::AUTO)},
-        {ov::device::priorities("CPU"), ov::inference_num_threads(1)}
-        };
-INSTANTIATE_TEST_SUITE_P(smoke_OVClassLoadNetworkWithDefaultIncorrectPropertiesTest,
-                         OVClassLoadNetworkWithDefaultIncorrectPropertiesTest,
-                         ::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_AUTO, CommonTestUtils::DEVICE_MULTI),
-                                            ::testing::ValuesIn(default_incorrect_properties)));
-
-INSTANTIATE_TEST_SUITE_P(smoke_Multi_OVClassLoadNetworkWithDefaultIncorrectPropertiesTest,
-                         OVClassLoadNetworkWithDefaultIncorrectPropertiesTest,
-                         ::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_MULTI),
-                                            ::testing::ValuesIn(default_multi_incorrect_properties)));
+                                            ::testing::ValuesIn(auto_multi_default_properties)));
+// For AUTO, User sets perf_hint, AUTO's perf_hint should not return default value LATENCY
+const std::vector<ov::AnyMap> default_auto_properties = {
+    {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
+    {ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)}};
+// For MULIT, User sets perf_hint or Affinity or num_streams or infer_num_threads, MULTI's perf_hint should
+// not return default value THROUGHPUT
+// For Secondary property test about default hint is in auto_load_network_properties_test.cpp
+const std::vector<ov::AnyMap> default_multi_properties = {
+    {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)},
+    {ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)},
+    {ov::affinity(ov::Affinity::NONE)},
+    {ov::num_streams(ov::streams::AUTO)},
+    {ov::inference_num_threads(1)}};
+INSTANTIATE_TEST_SUITE_P(smoke_AUTO_DoNotReturnDefaultHintTest,
+                         OVClassLoadNetWorkDoNotReturnDefaultHintTest,
+                         ::testing::Combine(::testing::Values("AUTO:CPU"),
+                                            ::testing::ValuesIn(default_auto_properties)));
+INSTANTIATE_TEST_SUITE_P(smoke_MULTI_DoNotReturnDefaultHintTest,
+                         OVClassLoadNetWorkDoNotReturnDefaultHintTest,
+                         ::testing::Combine(::testing::Values("MULTI:CPU"),
+                                            ::testing::ValuesIn(default_multi_properties)));
 } // namespace
--- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp
@ -124,8 +124,8 @@ using OVClassSetLogLevelConfigTest = OVClassBaseTestP;
 using OVClassSpecificDeviceTestSetConfig = OVClassBaseTestP;
 using OVClassSpecificDeviceTestGetConfig = OVClassBaseTestP;
 using OVClassLoadNetworkWithCorrectPropertiesTest = OVClassSetDevicePriorityConfigTest;
-using OVClassLoadNetworkWithDefaultPropertiesTest = OVClassSetDevicePriorityConfigTest;
-using OVClassLoadNetworkWithDefaultIncorrectPropertiesTest = OVClassSetDevicePriorityConfigTest;
+using OVClassLoadNetWorkReturnDefaultHintTest = OVClassSetDevicePriorityConfigTest;
+using OVClassLoadNetWorkDoNotReturnDefaultHintTest = OVClassSetDevicePriorityConfigTest;
 using OVClassLoadNetworkAndCheckSecondaryPropertiesTest = OVClassSetDevicePriorityConfigTest;

 class OVClassSeveralDevicesTest : public OVPluginTestBase,
@ -1104,22 +1104,30 @@ TEST_P(OVClassLoadNetworkAndCheckSecondaryPropertiesTest, LoadNetworkAndCheckSec
    ASSERT_EQ(actual, expect);
 }

-TEST_P(OVClassLoadNetworkWithDefaultPropertiesTest, LoadNetworkWithDefaultPropertiesTest) {
+TEST_P(OVClassLoadNetWorkReturnDefaultHintTest, LoadNetworkReturnDefaultHintTest) {
    ov::Core ie = createCoreWithTemplate();
    ov::CompiledModel model;
+    ov::hint::PerformanceMode value;
    OV_ASSERT_NO_THROW(model = ie.compile_model(actualNetwork, target_device, configuration));
-    ov::hint::PerformanceMode value = ov::hint::PerformanceMode::UNDEFINED;
    OV_ASSERT_NO_THROW(value = model.get_property(ov::hint::performance_mode));
-    ASSERT_EQ(value, ov::hint::PerformanceMode::THROUGHPUT);
+    if (target_device.find("AUTO") != std::string::npos) {
+        ASSERT_EQ(value, ov::hint::PerformanceMode::LATENCY);
+    } else {
+        ASSERT_EQ(value, ov::hint::PerformanceMode::THROUGHPUT);
+    }
 }

-TEST_P(OVClassLoadNetworkWithDefaultIncorrectPropertiesTest, LoadNetworkWithDefaultIncorrectPropertiesTest) {
+TEST_P(OVClassLoadNetWorkDoNotReturnDefaultHintTest, LoadNetworkDoNotReturnDefaultHintTest) {
    ov::Core ie = createCoreWithTemplate();
    ov::CompiledModel model;
+    ov::hint::PerformanceMode value;
    OV_ASSERT_NO_THROW(model = ie.compile_model(actualNetwork, target_device, configuration));
-    ov::hint::PerformanceMode value = ov::hint::PerformanceMode::THROUGHPUT;
    OV_ASSERT_NO_THROW(value = model.get_property(ov::hint::performance_mode));
-    ASSERT_NE(value, ov::hint::PerformanceMode::THROUGHPUT);
+    if (target_device.find("AUTO") != std::string::npos) {
+        ASSERT_NE(value, ov::hint::PerformanceMode::LATENCY);
+    } else {
+        ASSERT_NE(value, ov::hint::PerformanceMode::THROUGHPUT);
+    }
 }

 TEST_P(OVClassLoadNetworkTest, LoadNetworkWithInvalidDeviceIDThrows) {
--- a/src/tests/unit/auto/auto_default_perf_hint_test.cpp
+++ b/src/tests/unit/auto/auto_default_perf_hint_test.cpp
@ -0,0 +1,529 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <common_test_utils/test_constants.hpp>
+#include <ie_core.hpp>
+#include <ie_metric_helpers.hpp>
+#include <multi-device/multi_device_config.hpp>
+#include <ngraph_functions/subgraph_builders.hpp>
+#include <openvino/runtime/core.hpp>
+
+#include "cpp/ie_plugin.hpp"
+#include "mock_common.hpp"
+#include "plugin/mock_load_network_properties.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp"
+#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_ivariable_state_internal.hpp"
+#include "unit_test_utils/mocks/mock_iinfer_request.hpp"
+
+using ::testing::_;
+using ::testing::MatcherCast;
+using ::testing::Matches;
+using ::testing::NiceMock;
+using ::testing::Return;
+using ::testing::ReturnRef;
+using ::testing::StrEq;
+using ::testing::Throw;
+using Config = std::map<std::string, std::string>;
+
+// define a matcher to check if perf hint expects
+MATCHER_P(ComparePerfHint, perfHint, "Check if perf hint expects.") {
+    std::string arg_perfHint = "No PERFORMANCE_HINT";
+    auto itor = arg.find(PluginConfigParams::KEY_PERFORMANCE_HINT);
+    if (itor != arg.end()) {
+        arg_perfHint = itor->second;
+    }
+
+    return perfHint == arg_perfHint;
+}
+using namespace MockMultiDevice;
+
+using ConfigParams = std::tuple<std::string,               // virtual device name to load network
+                                std::vector<std::string>,  // hardware device name to expect loading network on
+                                Config>;                   // secondary property setting to device
+
+static std::vector<ConfigParams> testConfigs;
+
+class AutoDefaultPerfHintTest : public ::testing::TestWithParam<ConfigParams> {
+public:
+    std::shared_ptr<NiceMock<MockICore>> core;
+    std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>> plugin;
+    InferenceEngine::CNNNetwork simpleCnnNetwork;
+    // mock cpu exeNetwork
+    std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> cpuMockIExeNet;
+    ov::SoPtr<IExecutableNetworkInternal> cpuMockExeNetwork;
+    NiceMock<MockIInferencePlugin>* cpuMockIPlugin;
+    InferenceEngine::InferencePlugin cpuMockPlugin;
+
+    // mock gpu exeNetwork
+    std::shared_ptr<NiceMock<MockIExecutableNetworkInternal>> gpuMockIExeNet;
+    ov::SoPtr<IExecutableNetworkInternal> gpuMockExeNetwork;
+    NiceMock<MockIInferencePlugin>* gpuMockIPlugin;
+    InferenceEngine::InferencePlugin gpuMockPlugin;
+    std::shared_ptr<NiceMock<MockIInferRequestInternal>> inferReqInternal;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ConfigParams> obj) {
+        std::string deviceName;
+        std::vector<std::string> targetDevices;
+        Config deviceConfigs;
+        std::tie(deviceName, targetDevices, deviceConfigs) = obj.param;
+        std::ostringstream result;
+        result << deviceName;
+        result << "_loadnetwork_to_";
+        for (auto& device : targetDevices) {
+            result << device << "_";
+        }
+        auto cpuConfig = deviceConfigs.find("CPU");
+        auto gpuConfig = deviceConfigs.find("GPU");
+        auto priority = deviceConfigs.find("MULTI_DEVICE_PRIORITIES");
+        result << "properties_";
+        if (cpuConfig != deviceConfigs.end())
+            result << "CPU_" << cpuConfig->second << "_";
+        if (gpuConfig != deviceConfigs.end())
+            result << "GPU_" << gpuConfig->second << "_";
+        if (priority != deviceConfigs.end())
+            result << "priority_" << priority->second;
+        return result.str();
+    }
+
+    static std::vector<ConfigParams> CreateNumStreamsAndDefaultPerfHintTestConfigs() {
+        testConfigs.clear();
+        testConfigs.push_back(
+            ConfigParams{"AUTO", {"CPU"}, {{"MULTI_DEVICE_PRIORITIES", "CPU"}}});  // CPU: get default_hint:lantency
+        testConfigs.push_back(
+            ConfigParams{"AUTO",
+                         {"CPU"},
+                         {{"CPU", "NUM_STREAMS 3"}, {"MULTI_DEVICE_PRIORITIES", "CPU"}}});  // CPU: no perf_hint
+        testConfigs.push_back(
+            ConfigParams{"AUTO",
+                         {"CPU", "GPU"},
+                         {{"MULTI_DEVICE_PRIORITIES",
+                           "GPU,CPU"}}});  // CPU: as helper, get default_hint:lantency GPU:get default_hint:lantency
+        testConfigs.push_back(ConfigParams{
+            "AUTO",
+            {"CPU", "GPU"},
+            {{"CPU", "NUM_STREAMS 3"},
+             {"MULTI_DEVICE_PRIORITIES", "GPU,CPU"}}});  // CPU: as helper, get default_hint:lantency GPU:get default_hint:lantency
+        testConfigs.push_back(ConfigParams{
+            "AUTO",
+            {"CPU", "GPU"},
+            {{"GPU", "NUM_STREAMS 3"},
+             {"MULTI_DEVICE_PRIORITIES", "GPU,CPU"}}});  // CPU: as helper, get default_hint:lantency GPU:no perf_hint
+        testConfigs.push_back(
+            ConfigParams{"AUTO",
+                         {"CPU"},
+                         {{"CPU", "NUM_STREAMS 5"}, {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: no perf_hint
+        testConfigs.push_back(
+            ConfigParams{"AUTO", {"GPU"}, {{"MULTI_DEVICE_PRIORITIES", "GPU"}}});  // GPU: get default_hint:lantency
+        testConfigs.push_back(
+            ConfigParams{"AUTO",
+                         {"GPU"},
+                         {{"GPU", "NUM_STREAMS 3"}, {"MULTI_DEVICE_PRIORITIES", "GPU"}}});  // GPU: no perf_hint
+
+        testConfigs.push_back(ConfigParams{
+            "MULTI:CPU,GPU",
+            {"CPU", "GPU"},
+            {{"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get default_hint:tput  GPU: get default_hint:tput
+        testConfigs.push_back(
+            ConfigParams{"MULTI:CPU,GPU",
+                         {"CPU", "GPU"},
+                         {{"CPU", "NUM_STREAMS 3"},
+                          {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: no perf_hint  GPU: get default_hint:tput
+        testConfigs.push_back(
+            ConfigParams{"MULTI:CPU,GPU",
+                         {"CPU", "GPU"},
+                         {{"GPU", "NUM_STREAMS 3"},
+                          {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get default_hint:tput  GPU: no perf_hint
+        testConfigs.push_back(
+            ConfigParams{"MULTI:CPU,GPU",
+                         {"CPU", "GPU"},
+                         {{"CPU", "NUM_STREAMS 3"},
+                          {"GPU", "NUM_STREAMS 3"},
+                          {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: no perf_hint  GPU: no perf_hint
+        return testConfigs;
+    }
+
+    static std::vector<ConfigParams> CreatePerfHintAndDefaultPerfHintTestConfigs() {
+        testConfigs.clear();
+        testConfigs.push_back(ConfigParams{
+            "AUTO",
+            {"CPU"},
+            {{"CPU", "PERFORMANCE_HINT THROUGHPUT"}, {"MULTI_DEVICE_PRIORITIES", "CPU"}}});  // CPU: get perf_hint:tput
+        testConfigs.push_back(
+            ConfigParams{"AUTO",
+                         {"CPU", "GPU"},
+                         {{"CPU", "PERFORMANCE_HINT THROUGHPUT"},
+                          {"MULTI_DEVICE_PRIORITIES",
+                           "GPU,CPU"}}});  // CPU: as helper, get perf_hint:lantency GPU:get default_hint:lantency
+        testConfigs.push_back(
+            ConfigParams{"AUTO",
+                         {"CPU", "GPU"},
+                         {{"CPU", "PERFORMANCE_HINT THROUGHPUT"},
+                          {"GPU", "PERFORMANCE_HINT THROUGHPUT"},
+                          {"MULTI_DEVICE_PRIORITIES",
+                           "GPU,CPU"}}});  // CPU: as helper, get perf_hint:lantency GPU:get perf_hint:tput
+        testConfigs.push_back(ConfigParams{"AUTO",
+                                           {"CPU"},
+                                           {{"CPU", "PERFORMANCE_HINT THROUGHPUT"},
+                                            {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get perf_hint:tput
+        testConfigs.push_back(ConfigParams{
+            "AUTO",
+            {"GPU"},
+            {{"GPU", "PERFORMANCE_HINT THROUGHPUT"}, {"MULTI_DEVICE_PRIORITIES", "GPU"}}});  // GPU: get perf_hint:tput
+
+        testConfigs.push_back(ConfigParams{
+            "MULTI:CPU,GPU",
+            {"CPU", "GPU"},
+            {{"CPU", "PERFORMANCE_HINT LATENCY"},
+             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get perf_hint:latency  GPU: get default_hint:tput
+        testConfigs.push_back(ConfigParams{
+            "MULTI:CPU,GPU",
+            {"CPU", "GPU"},
+            {{"GPU", "PERFORMANCE_HINT LATENCY"},
+             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get default_hint:tput  GPU: get perf_hint:latency
+        testConfigs.push_back(ConfigParams{
+            "MULTI:CPU,GPU",
+            {"CPU", "GPU"},
+            {{"CPU", "PERFORMANCE_HINT LATENCY"},
+             {"GPU", "PERFORMANCE_HINT LATENCY"},
+             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get perf_hint:lantency  GPU: get perf_hint:lantency
+        return testConfigs;
+    }
+
+    static std::vector<ConfigParams> CreateSecPropAndDefaultPerfHintTestConfigs() {
+        testConfigs.clear();
+        testConfigs.push_back(ConfigParams{
+            "AUTO",
+            {"CPU"},
+            {{"CPU", "ALLOW_AUTO_BATCHING TRUE"}, {"MULTI_DEVICE_PRIORITIES", "CPU"}}});  // CPU: no perf_hint
+        testConfigs.push_back(
+            ConfigParams{"AUTO",
+                         {"CPU", "GPU"},
+                         {{"CPU", "ALLOW_AUTO_BATCHING TRUE"},
+                          {"MULTI_DEVICE_PRIORITIES",
+                           "GPU,CPU"}}});  // CPU: as helper, get perf_hint:lantency GPU:get default_hint:lantency
+        testConfigs.push_back(
+            ConfigParams{"AUTO",
+                         {"CPU", "GPU"},
+                         {{"CPU", "ALLOW_AUTO_BATCHING TRUE"},
+                          {"GPU", "ALLOW_AUTO_BATCHING TRUE"},
+                          {"MULTI_DEVICE_PRIORITIES",
+                           "GPU,CPU"}}});  // CPU: as helper, get perf_hint:lantency GPU:no perf_hint
+        testConfigs.push_back(ConfigParams{"AUTO",
+                                           {"CPU"},
+                                           {{"CPU", "ALLOW_AUTO_BATCHING FALSE"},
+                                            {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: no perf_hint
+        testConfigs.push_back(ConfigParams{
+            "AUTO",
+            {"GPU"},
+            {{"GPU", "ALLOW_AUTO_BATCHING FALSE"}, {"MULTI_DEVICE_PRIORITIES", "GPU"}}});  // GPU: no perf_hint
+
+        testConfigs.push_back(ConfigParams{
+            "MULTI:CPU,GPU",
+            {"CPU", "GPU"},
+            {{"CPU", "ALLOW_AUTO_BATCHING FALSE"},
+             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: no perf_hint  GPU: get default_hint:tput
+        testConfigs.push_back(ConfigParams{
+            "MULTI:CPU,GPU",
+            {"CPU", "GPU"},
+            {{"GPU", "ALLOW_AUTO_BATCHING FALSE"},
+             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: get default_hint:tput  GPU: no perf_hint
+        testConfigs.push_back(ConfigParams{
+            "MULTI:CPU,GPU",
+            {"CPU", "GPU"},
+            {{"CPU", "ALLOW_AUTO_BATCHING TRUE"},
+             {"GPU", "ALLOW_AUTO_BATCHING FALSE"},
+             {"MULTI_DEVICE_PRIORITIES", "CPU,GPU"}}});  // CPU: no perf_hint GPU: no perf_hint
+        return testConfigs;
+    }
+
+    void TearDown() override {
+        core.reset();
+        plugin.reset();
+    }
+
+    void SetUp() override {
+        // prepare cpuMockExeNetwork
+        cpuMockIExeNet = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
+        auto cpuMockIPluginPtr = std::make_shared<NiceMock<MockIInferencePlugin>>();
+        ON_CALL(*cpuMockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _))
+            .WillByDefault(Return(cpuMockIExeNet));
+        cpuMockPlugin = InferenceEngine::InferencePlugin{cpuMockIPluginPtr, {}};
+        // remove annoying ON CALL message
+        EXPECT_CALL(*cpuMockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _)).Times(1);
+        cpuMockExeNetwork = cpuMockPlugin.LoadNetwork(CNNNetwork{}, {});
+
+        // prepare gpuMockExeNetwork
+        gpuMockIExeNet = std::make_shared<NiceMock<MockIExecutableNetworkInternal>>();
+        auto gpuMockIPluginPtr = std::make_shared<NiceMock<MockIInferencePlugin>>();
+        ON_CALL(*gpuMockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _))
+            .WillByDefault(Return(gpuMockIExeNet));
+        gpuMockPlugin = InferenceEngine::InferencePlugin{gpuMockIPluginPtr, {}};
+        // remove annoying ON CALL message
+        EXPECT_CALL(*gpuMockIPluginPtr, LoadNetwork(MatcherCast<const CNNNetwork&>(_), _)).Times(1);
+        gpuMockExeNetwork = gpuMockPlugin.LoadNetwork(CNNNetwork{}, {});
+
+        // prepare mockicore and cnnNetwork for loading
+        core = std::shared_ptr<NiceMock<MockICore>>(new NiceMock<MockICore>());
+        auto* origin_plugin = new NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>();
+        plugin = std::shared_ptr<NiceMock<MockMultiPluginForLoadNetworkWithPropertiesTest>>(origin_plugin);
+        // replace core with mock Icore
+        plugin->SetCore(core);
+        inferReqInternal = std::make_shared<NiceMock<MockIInferRequestInternal>>();
+        ON_CALL(*cpuMockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(inferReqInternal));
+        ON_CALL(*gpuMockIExeNet.get(), CreateInferRequest()).WillByDefault(Return(inferReqInternal));
+
+        ON_CALL(*cpuMockIExeNet.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
+            .WillByDefault(Return("0"));
+        ON_CALL(*gpuMockIExeNet.get(), GetMetric(StrEq(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS))))
+            .WillByDefault(Return("0"));
+
+        std::vector<std::string> availableDevs = {"CPU", "GPU"};
+        ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(availableDevs));
+
+        std::vector<std::string> metrics = {METRIC_KEY(SUPPORTED_CONFIG_KEYS)};
+        ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_METRICS)), _)).WillByDefault(Return(metrics));
+
+        std::vector<std::string> configKeys = {"SUPPORTED_CONFIG_KEYS", "NUM_STREAMS"};
+        ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(Return(configKeys));
+
+        ON_CALL(*plugin, ParseMetaDevices)
+            .WillByDefault(
+                [this](const std::string& priorityDevices, const std::map<std::string, std::string>& config) {
+                    return plugin->MultiDeviceInferencePlugin::ParseMetaDevices(priorityDevices, config);
+                });
+
+        ON_CALL(*plugin, SelectDevice)
+            .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices,
+                                  const std::string& netPrecision,
+                                  unsigned int priority) {
+                return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, priority);
+            });
+
+        ON_CALL(*plugin, GetValidDevice)
+            .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices, const std::string& netPrecision) {
+                std::list<DeviceInformation> devices(metaDevices.begin(), metaDevices.end());
+                return devices;
+            });
+
+        ON_CALL(*core, GetSupportedConfig)
+            .WillByDefault([](const std::string& device, const std::map<std::string, std::string>& fullConfigs) {
+                auto item = fullConfigs.find(device);
+                Config deviceConfigs;
+                if (item != fullConfigs.end()) {
+                    std::stringstream strConfigs(item->second);
+                    ov::util::Read<Config>{}(strConfigs, deviceConfigs);
+                }
+                return deviceConfigs;
+            });
+
+        ON_CALL(*plugin, GetDeviceList).WillByDefault([this](const std::map<std::string, std::string>& config) {
+            return plugin->MultiDeviceInferencePlugin::GetDeviceList(config);
+        });
+        ON_CALL(*plugin, SelectDevice)
+            .WillByDefault([this](const std::vector<DeviceInformation>& metaDevices,
+                                  const std::string& netPrecision,
+                                  unsigned int Priority) {
+                return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, Priority);
+            });
+        std::vector<std::string> cpuCability{"FP32", "FP16", "INT8", "BIN"};
+        std::vector<std::string> gpuCability{"FP32", "FP16", "BATCHED_BLOB", "BIN", "INT8"};
+        ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_CPU), StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _))
+            .WillByDefault(Return(cpuCability));
+        ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_GPU), StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _))
+            .WillByDefault(Return(gpuCability));
+
+        ON_CALL(*core,
+                LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                            ::testing::Matcher<const std::string&>(StrEq("CPU")),
+                            ::testing::Matcher<const std::map<std::string, std::string>&>(_)))
+            .WillByDefault(Return(cpuMockExeNetwork));
+
+        ON_CALL(*core,
+                LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                            ::testing::Matcher<const std::string&>(StrEq("GPU")),
+                            ::testing::Matcher<const std::map<std::string, std::string>&>(_)))
+            .WillByDefault(Return(gpuMockExeNetwork));
+
+        std::shared_ptr<ngraph::Function> simpleNetwork = ngraph::builder::subgraph::makeSingleConv();
+        ASSERT_NO_THROW(simpleCnnNetwork = InferenceEngine::CNNNetwork(simpleNetwork));
+    }
+};
+
+using NumStreamsAndDefaultPerfHintMockTest = AutoDefaultPerfHintTest;
+using PerHintAndDefaultPerfHintMockTest = AutoDefaultPerfHintTest;
+using SecPropAndDefaultPerfHintMockTest = AutoDefaultPerfHintTest;
+
+TEST_P(NumStreamsAndDefaultPerfHintMockTest, NumStreamsAndDefaultPerfHintTest) {
+    std::string device;
+    std::vector<std::string> targetDevices;
+    Config config;
+    bool bIsAuto = false;
+    std::tie(device, targetDevices, config) = this->GetParam();
+    if (device.find("AUTO") != std::string::npos) {
+        bIsAuto = true;
+        plugin->SetName("AUTO");
+    }
+
+    if (device.find("MULTI") != std::string::npos)
+        plugin->SetName("MULTI");
+
+    for (auto& deviceName : targetDevices) {
+        bool isCPUHelper = false;
+        if (deviceName.find("CPU") != std::string::npos && bIsAuto) {
+            auto item = config.find("MULTI_DEVICE_PRIORITIES");
+            if (item != config.end() && item->second.find("GPU,CPU") != std::string::npos) {
+                isCPUHelper = true;
+            }
+        }
+        std::string HW_PerfHint;
+        if (isCPUHelper) {
+            // if it is CPU Helper, CPU should keep perf hint to LATENCY.
+            HW_PerfHint = "LATENCY";
+        } else {
+            // HW default perf_hint
+            HW_PerfHint = bIsAuto ? "LATENCY" : "THROUGHPUT";
+        }
+        auto item = config.find(deviceName);
+        Config deviceConfigs;
+        if (item != config.end()) {
+            std::stringstream strConfigs(item->second);
+            // Parse the device properties to common property into deviceConfigs.
+            ov::util::Read<Config>{}(strConfigs, deviceConfigs);
+        }
+        bool bNumStreams = deviceConfigs.find(ov::num_streams.name()) != deviceConfigs.end() ? true : false;
+        if (bNumStreams && !isCPUHelper) {
+            // do not pass default perf_hint to HW
+            HW_PerfHint = "No PERFORMANCE_HINT";
+        }
+        EXPECT_CALL(
+            *core,
+            LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                        ::testing::Matcher<const std::string&>(deviceName),
+                        ::testing::Matcher<const std::map<std::string, std::string>&>(ComparePerfHint(HW_PerfHint))))
+            .Times(1);
+    }
+
+    ASSERT_NO_THROW(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config));
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_AutoMultiMock_NumStreamsAndDefaultPerfHintToHWTest,
+                         NumStreamsAndDefaultPerfHintMockTest,
+                         ::testing::ValuesIn(NumStreamsAndDefaultPerfHintMockTest::CreateNumStreamsAndDefaultPerfHintTestConfigs()),
+                         NumStreamsAndDefaultPerfHintMockTest::getTestCaseName);
+
+TEST_P(PerHintAndDefaultPerfHintMockTest, PerfHintAndDefaultPerfHintTest) {
+    std::string device;
+    std::vector<std::string> targetDevices;
+    Config config;
+    bool bIsAuto = false;
+    std::tie(device, targetDevices, config) = this->GetParam();
+    if (device.find("AUTO") != std::string::npos) {
+        bIsAuto = true;
+        plugin->SetName("AUTO");
+    }
+
+    if (device.find("MULTI") != std::string::npos)
+        plugin->SetName("MULTI");
+
+    for (auto& deviceName : targetDevices) {
+        bool isCPUHelper = false;
+        if (deviceName.find("CPU") != std::string::npos && bIsAuto) {
+            auto item = config.find("MULTI_DEVICE_PRIORITIES");
+            if (item != config.end() && item->second.find("GPU,CPU") != std::string::npos) {
+                isCPUHelper = true;
+            }
+        }
+        std::string HW_PerfHint;
+        if (isCPUHelper) {
+            // if it is CPU Helper, CPU should keep perf hint to LATENCY.
+            HW_PerfHint = "LATENCY";
+        } else {
+            // HW default perf_hint
+            HW_PerfHint = bIsAuto ? "LATENCY" : "THROUGHPUT";
+        }
+        auto item = config.find(deviceName);
+        Config deviceConfigs;
+        if (item != config.end()) {
+            std::stringstream strConfigs(item->second);
+            // Parse the device properties to common property into deviceConfigs.
+            ov::util::Read<Config>{}(strConfigs, deviceConfigs);
+        }
+        auto itor = deviceConfigs.find(PluginConfigParams::KEY_PERFORMANCE_HINT);
+        if (itor != deviceConfigs.end() && !isCPUHelper) {
+            HW_PerfHint = itor->second;
+        }
+        EXPECT_CALL(
+            *core,
+            LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                        ::testing::Matcher<const std::string&>(deviceName),
+                        ::testing::Matcher<const std::map<std::string, std::string>&>(ComparePerfHint(HW_PerfHint))))
+            .Times(1);
+    }
+
+    ASSERT_NO_THROW(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config));
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_AutoMultiMock_PerHintAndDefaultPerfHintToHWTest,
+                         PerHintAndDefaultPerfHintMockTest,
+                         ::testing::ValuesIn(PerHintAndDefaultPerfHintMockTest::CreatePerfHintAndDefaultPerfHintTestConfigs()),
+                         PerHintAndDefaultPerfHintMockTest::getTestCaseName);
+
+TEST_P(SecPropAndDefaultPerfHintMockTest, SecPropAndDefaultPerfHintTest) {
+    std::string device;
+    std::vector<std::string> targetDevices;
+    Config config;
+    bool bIsAuto = false;
+    std::tie(device, targetDevices, config) = this->GetParam();
+    if (device.find("AUTO") != std::string::npos) {
+        bIsAuto = true;
+        plugin->SetName("AUTO");
+    }
+
+    if (device.find("MULTI") != std::string::npos)
+        plugin->SetName("MULTI");
+
+    for (auto& deviceName : targetDevices) {
+        bool isCPUHelper = false;
+        if (deviceName.find("CPU") != std::string::npos && bIsAuto) {
+            auto item = config.find("MULTI_DEVICE_PRIORITIES");
+            if (item != config.end() && item->second.find("GPU,CPU") != std::string::npos) {
+                isCPUHelper = true;
+            }
+        }
+        std::string HW_PerfHint;
+        if (isCPUHelper) {
+            // if it is CPU Helper, CPU should keep perf hint to LATENCY.
+            HW_PerfHint = "LATENCY";
+        } else {
+            // HW default perf_hint
+            HW_PerfHint = bIsAuto ? "LATENCY" : "THROUGHPUT";
+        }
+        auto item = config.find(deviceName);
+        if (item != config.end() && !isCPUHelper) {
+            // do not pass default perf_hint to HW
+            HW_PerfHint = "No PERFORMANCE_HINT";
+        }
+
+        EXPECT_CALL(
+            *core,
+            LoadNetwork(::testing::Matcher<const InferenceEngine::CNNNetwork&>(_),
+                        ::testing::Matcher<const std::string&>(deviceName),
+                        ::testing::Matcher<const std::map<std::string, std::string>&>(ComparePerfHint(HW_PerfHint))))
+            .Times(1);
+    }
+
+    ASSERT_NO_THROW(plugin->LoadExeNetworkImpl(simpleCnnNetwork, config));
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_AutoMultiMock_SecPropAndDefaultPerfHintToHWTest,
+                         SecPropAndDefaultPerfHintMockTest,
+                         ::testing::ValuesIn(SecPropAndDefaultPerfHintMockTest::CreateSecPropAndDefaultPerfHintTestConfigs()),
+                         SecPropAndDefaultPerfHintMockTest::getTestCaseName);