[AUTO] Enable round robin policy for cumulative throughput mode of AUTO plugin (#20439)

* Add and implement the logic of property SCHEDULE_POLICY for MULTI plugin. * Updated. * Enable test case for schedule policy test. * enable test case for property ov::intel_auto::schedule_policy. * Update. * Updated. * Updated. * Update. * Update the lock logic here by considering the runtime fallback case. * Update. * Update. * Update. * Update default value of schedule policy to DEVICE_PRIORITY * Enable the function test case for schedule policy. * Add description for inference requests schedule policy within AUTO plugin cumulative mode. * Updated. * Python bindings for enum SchedulePolicy and property ov::intel_auto::schedule_policy. * Update. * Update. * Update. * Updated. --------- Co-authored-by: Chen Peter <peter.chen@intel.com> Co-authored-by: Wanglei Shen <wanglei.shen@intel.com>
2023-12-07 14:04:56 +08:00
parent 33b2e6bb51
commit fd0809ead4
17 changed files with 372 additions and 14 deletions
--- a/docs/OV_Runtime_UG/auto_device_selection.rst
+++ b/docs/OV_Runtime_UG/auto_device_selection.rst
@@ -167,6 +167,17 @@ Following the OpenVINO™ naming convention, the Automatic Device Selection mode
 |                                              |                                                                    |
 |                                              | The default value is ``true``.                                     |
 +----------------------------------------------+--------------------------------------------------------------------+
+| ``ov::intel_auto::schedule_policy``          | **Values**:                                                        |
+|                                              |                                                                    |
+|                                              | ``ROUND_ROBIN``                                                    |
+|                                              |                                                                    |
+|                                              | ``DEVICE_PRIORITY``                                                |
+|                                              |                                                                    |
+|                                              | Specify the schedule policy of infer request assigned to hardware  |
+|                                              | plugin for AUTO cumulative mode (MULTI).                           |
+|                                              |                                                                    |
+|                                              | The default value is ``DEVICE_PRIORITY``.                          |
+----------------------------------------------+--------------------------------------------------------------------+

 Inference with AUTO is configured similarly to when device plugins are used:
 you compile the model on the plugin with configuration and execute inference.
--- a/docs/snippets/ov_auto.py
+++ b/docs/snippets/ov_auto.py
@@ -8,6 +8,7 @@ import openvino.properties as properties
 import openvino.properties.device as device
 import openvino.properties.hint as hints
 import openvino.properties.streams as streams
+import openvino.properties.intel_auto as intel_auto
 #! [py_ov_property_import_header]
 import openvino.properties.log as log

@@ -96,11 +97,13 @@ def part3():
        },
    )
    # To use the “CUMULATIVE_THROUGHPUT” mode:
+    # To use the ROUND_ROBIN schedule policy:
    compiled_model = core.compile_model(
        model=model,
        device_name="AUTO",
        config={
-            hints.performance_mode: hints.PerformanceMode.CUMULATIVE_THROUGHPUT
+            hints.performance_mode: hints.PerformanceMode.CUMULATIVE_THROUGHPUT,
+            intel_auto.schedule_policy: intel_auto.SchedulePolicy.ROUND_ROBIN
        },
    )
    #! [part3]
--- a/src/bindings/python/src/openvino/properties/intel_auto/init.py
+++ b/src/bindings/python/src/openvino/properties/intel_auto/init.py
@@ -2,7 +2,11 @@
 # Copyright (C) 2018-2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0

+# Enums
+from openvino._pyopenvino.properties.intel_auto import SchedulePolicy
+
 # Properties
 import openvino._pyopenvino.properties.intel_auto as __intel_auto
 from openvino.properties._properties import __make_properties
+
 __make_properties(__intel_auto, __name__)
--- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp
@@ -282,8 +282,14 @@ void regmodule_properties(py::module m) {
    py::module m_intel_auto =
        m_properties.def_submodule("intel_auto",
                                   "openvino.runtime.properties.intel_auto submodule that simulates ov::intel_auto");
+    // Submodule intel_auto - enums
+    py::enum_<ov::intel_auto::SchedulePolicy>(m_intel_auto, "SchedulePolicy", py::arithmetic())
+        .value("ROUND_ROBIN", ov::intel_auto::SchedulePolicy::ROUND_ROBIN)
+        .value("DEVICE_PRIORITY", ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY)
+        .value("DEFAULT", ov::intel_auto::SchedulePolicy::DEFAULT);

    wrap_property_RW(m_intel_auto, ov::intel_auto::device_bind_buffer, "device_bind_buffer");
    wrap_property_RW(m_intel_auto, ov::intel_auto::enable_startup_fallback, "enable_startup_fallback");
    wrap_property_RW(m_intel_auto, ov::intel_auto::enable_runtime_fallback, "enable_runtime_fallback");
+    wrap_property_RW(m_intel_auto, ov::intel_auto::schedule_policy, "schedule_policy");
 }
--- a/src/bindings/python/src/pyopenvino/utils/utils.cpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp
@@ -172,6 +172,8 @@ py::object from_ov_any(const ov::Any& any) {
        return py::cast(any.as<ov::hint::Priority>());
    } else if (any.is<ov::hint::PerformanceMode>()) {
        return py::cast(any.as<ov::hint::PerformanceMode>());
+    } else if (any.is<ov::intel_auto::SchedulePolicy>()) {
+        return py::cast(any.as<ov::intel_auto::SchedulePolicy>());
    } else if (any.is<ov::hint::SchedulingCoreType>()) {
        return py::cast(any.as<ov::hint::SchedulingCoreType>());
    } else if (any.is<ov::hint::ExecutionMode>()) {
@@ -357,6 +359,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
        return py::cast<ov::hint::Priority>(py_obj);
    } else if (py::isinstance<ov::hint::PerformanceMode>(py_obj)) {
        return py::cast<ov::hint::PerformanceMode>(py_obj);
+    } else if (py::isinstance<ov::intel_auto::SchedulePolicy>(py_obj)) {
+        return py::cast<ov::intel_auto::SchedulePolicy>(py_obj);
    } else if (py::isinstance<ov::hint::SchedulingCoreType>(py_obj)) {
        return py::cast<ov::hint::SchedulingCoreType>(py_obj);
    } else if (py::isinstance<ov::log::Level>(py_obj)) {
--- a/src/bindings/python/src/pyopenvino/utils/utils.hpp
+++ b/src/bindings/python/src/pyopenvino/utils/utils.hpp
@@ -20,6 +20,7 @@

 #include "openvino/core/type/element_type.hpp"
 #include "openvino/runtime/properties.hpp"
+#include "openvino/runtime/auto/properties.hpp"
 #include "openvino/pass/serialize.hpp"

 namespace py = pybind11;
--- a/src/bindings/python/tests/test_runtime/test_properties.py
+++ b/src/bindings/python/tests/test_runtime/test_properties.py
@@ -110,6 +110,14 @@ def test_deprecation():
                (log.Level.TRACE, "Level.TRACE", 4),
            ),
        ),
+        (
+            intel_auto.SchedulePolicy,
+            (
+                (intel_auto.SchedulePolicy.ROUND_ROBIN, "SchedulePolicy.ROUND_ROBIN", 0),
+                (intel_auto.SchedulePolicy.DEVICE_PRIORITY, "SchedulePolicy.DEVICE_PRIORITY", 1),
+                (intel_auto.SchedulePolicy.DEFAULT, "SchedulePolicy.DEVICE_PRIORITY", 1),
+            ),
+        ),
    ],
 )
 def test_properties_enums(ov_enum, expected_values):
--- a/src/inference/include/openvino/runtime/auto/properties.hpp
+++ b/src/inference/include/openvino/runtime/auto/properties.hpp
@@ -28,5 +28,51 @@ static constexpr Property<bool> enable_startup_fallback{"ENABLE_STARTUP_FALLBACK
 * selected device
 */
 static constexpr Property<bool> enable_runtime_fallback{"ENABLE_RUNTIME_FALLBACK"};
+
+/**
+ * @brief Enum to define the policy of scheduling inference request to target device in cumulative throughput mode on
+ * AUTO
+ * @ingroup ov_runtime_cpp_prop_api
+ */
+enum class SchedulePolicy {
+    ROUND_ROBIN = 0,            // will schedule the infer request using round robin policy
+    DEVICE_PRIORITY = 1,        // will schedule the infer request based on the device priority
+    DEFAULT = DEVICE_PRIORITY,  //!<  Default schedule policy is DEVICE_PRIORITY
+};
+
+/** @cond INTERNAL */
+inline std::ostream& operator<<(std::ostream& os, const SchedulePolicy& policy) {
+    switch (policy) {
+    case SchedulePolicy::ROUND_ROBIN:
+        return os << "ROUND_ROBIN";
+    case SchedulePolicy::DEVICE_PRIORITY:
+        return os << "DEVICE_PRIORITY";
+    default:
+        OPENVINO_THROW("Unsupported schedule policy value");
+    }
+}
+
+inline std::istream& operator>>(std::istream& is, SchedulePolicy& policy) {
+    std::string str;
+    is >> str;
+    if (str == "ROUND_ROBIN") {
+        policy = SchedulePolicy::ROUND_ROBIN;
+    } else if (str == "DEVICE_PRIORITY") {
+        policy = SchedulePolicy::DEVICE_PRIORITY;
+    } else if (str == "DEFAULT") {
+        policy = SchedulePolicy::DEFAULT;
+    } else {
+        OPENVINO_THROW("Unsupported schedule policy: ", str);
+    }
+    return is;
+}
+/** @endcond */
+
+/**
+ * @brief High-level OpenVINO model policy hint
+ * Defines what scheduling policy should be used in AUTO CUMULATIVE_THROUGHPUT or MULTI case
+ * @ingroup ov_runtime_cpp_prop_api
+ */
+static constexpr Property<SchedulePolicy> schedule_policy{"SCHEDULE_POLICY"};
 }  // namespace intel_auto
 }  // namespace ov
--- a/src/plugins/auto/src/common.hpp
+++ b/src/plugins/auto/src/common.hpp
@@ -219,6 +219,7 @@ public:
    std::string                                    m_str_devices;
    unsigned int                                   m_model_priority = 0;
    ov::Any                                        m_performance_hint;
+    ov::Any                                        m_schedule_policy = ov::intel_auto::SchedulePolicy::DEFAULT;
    std::mutex                                     m_mutex;
    std::mutex                                     m_fallback_mutex;
    SoCompiledModel                                m_hw_compiled_model;
--- a/src/plugins/auto/src/cumulative_compiled_model.cpp
+++ b/src/plugins/auto/src/cumulative_compiled_model.cpp
@@ -47,7 +47,8 @@ ov::Any AutoCumuCompiledModel::get_property(const std::string& name) const {
                                                    ov::optimal_number_of_infer_requests,
                                                    ov::device::properties,
                                                    ov::hint::model_priority,
-                                                    ov::loaded_from_cache};
+                                                    ov::loaded_from_cache,
+                                                    ov::intel_auto::schedule_policy};
        return ro_properties;
    };
    const auto& default_rw_properties = []() {
@@ -72,6 +73,8 @@ ov::Any AutoCumuCompiledModel::get_property(const std::string& name) const {
        return decltype(ov::supported_properties)::value_type(supported_properties);
    } else if (name == ov::hint::performance_mode) {
        return m_context->m_performance_hint;
+    } else if (name == ov::intel_auto::schedule_policy) {
+        return m_context->m_schedule_policy;
    } else if (name == ov::device::priorities) {
        // device priority does not support change on-the-fly
        return decltype(ov::device::priorities)::value_type(m_context->m_str_devices);
--- a/src/plugins/auto/src/cumulative_schedule.cpp
+++ b/src/plugins/auto/src/cumulative_schedule.cpp
@@ -10,6 +10,25 @@
 // ------------------------------CumuSchedule----------------------------
 namespace ov {
 namespace auto_plugin {
+std::string CumuSchedule::schedule_to_next_device(const std::vector<DeviceInformation>& devices,
+                                                  std::size_t current_device_index) {
+    std::string selected_device_name = "";
+    {
+        std::lock_guard<std::mutex> lock(m_context->m_mutex);
+        m_n_ctput_schedule_next_device =
+            m_n_ctput_schedule_next_device >= devices.size() ? 0 : m_n_ctput_schedule_next_device;
+        selected_device_name = devices[m_n_ctput_schedule_next_device].device_name;
+    }
+    auto schedule_policy = m_context->m_schedule_policy;
+    if (schedule_policy == ov::intel_auto::SchedulePolicy::ROUND_ROBIN) {
+        std::lock_guard<std::mutex> lock(m_context->m_mutex);
+        m_n_ctput_schedule_next_device++;
+    } else if (schedule_policy == ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY) {
+        selected_device_name = devices[current_device_index].device_name;
+    }
+    return selected_device_name;
+}
+
 bool CumuSchedule::select_other_device(const std::string& cur_dev_name) {
    {
        std::lock_guard<std::mutex> lock(m_context->m_fallback_mutex);
@@ -209,7 +228,7 @@ bool CumuSchedule::schedule_to_worker_infer_request(ov::threading::Task pipeline
    std::unique_lock<std::mutex> lock(m_context->m_fallback_mutex);
    if (!preferred_device.empty()) {
        devices = m_context->m_device_priorities;
-       if (!deviceChecker().check_if_device_in_list<DeviceInformation>(preferred_device, devices)) {
+        if (!deviceChecker().check_if_device_in_list<DeviceInformation>(preferred_device, devices)) {
            lock.unlock();
            OPENVINO_THROW("The preferred device should be the selected device");
        }
@@ -217,14 +236,22 @@ bool CumuSchedule::schedule_to_worker_infer_request(ov::threading::Task pipeline
        devices = m_context->m_device_priorities;
    }
    lock.unlock();
-    for (auto&& device : devices) {
-        if (!preferred_device.empty() && (device.device_name != preferred_device)) {
+
+    std::size_t current_device_index = 0;
+    while (current_device_index < devices.size()) {
+        if (!preferred_device.empty() && (devices[current_device_index].device_name != preferred_device)) {
+            current_device_index++;
            continue;
        }
-        if (run_pipeline_task(pipeline_task, m_idle_worker_requests[device.device_name], preferred_device)) {
+        auto selected_device_name =
+            preferred_device.empty() ? schedule_to_next_device(devices, current_device_index) : preferred_device;
+        if (run_pipeline_task(pipeline_task, m_idle_worker_requests[selected_device_name], preferred_device)) {
            return true;
+        } else {
+            current_device_index++;
        }
    }
+
    // no vacant requests this time, storing the task to the respective queue
    if (!preferred_device.empty()) {
        m_infer_pipeline_tasks_device_specific[preferred_device]->push(std::move(pipeline_task));
--- a/src/plugins/auto/src/cumulative_schedule.hpp
+++ b/src/plugins/auto/src/cumulative_schedule.hpp
@@ -17,7 +17,9 @@ public:
    virtual ~CumuSchedule();
    std::unique_ptr<AutoCompileContext[]>      m_p_ctput_loadcontext = nullptr;
    size_t                                  m_n_ctput_devicenums = 0;
-
+    size_t                                  m_n_ctput_schedule_next_device = 0;
+    std::string schedule_to_next_device(const std::vector<DeviceInformation>& devices,
+                                        std::size_t current_device_index);
 private:
    void init() override;
    SoCompiledModel wait_first_compiled_model_ready() override;
--- a/src/plugins/auto/src/plugin.cpp
+++ b/src/plugins/auto/src/plugin.cpp
@@ -555,6 +555,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model_impl(const std::string
    auto_s_context->m_startup_fallback = load_config.get_property(ov::intel_auto::enable_startup_fallback);
    auto_s_context->m_runtime_fallback = load_config.get_property(ov::intel_auto::enable_runtime_fallback);
    auto_s_context->m_bind_buffer = load_config.get_property(ov::intel_auto::device_bind_buffer);
+    auto_s_context->m_schedule_policy = load_config.get_property(ov::intel_auto::schedule_policy);
    std::shared_ptr<ov::ICompiledModel> impl;
    std::shared_ptr<Schedule> scheduler = is_cumulative ? std::static_pointer_cast<Schedule>(std::make_shared<CumuSchedule>()) :
                                std::static_pointer_cast<Schedule>(std::make_shared<AutoSchedule>());
--- a/src/plugins/auto/src/plugin_config.cpp
+++ b/src/plugins/auto/src/plugin_config.cpp
@@ -21,6 +21,7 @@ void PluginConfig::set_default() {
        std::make_tuple(ov::hint::model_priority, ov::hint::Priority::MEDIUM),
        std::make_tuple(ov::log::level, ov::log::Level::NO),
        std::make_tuple(ov::intel_auto::device_bind_buffer, false),
+        std::make_tuple(ov::intel_auto::schedule_policy, ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY),
        std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY),
        std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE),
        std::make_tuple(ov::hint::num_requests, 0, UnsignedTypeValidator()),
--- a/src/plugins/auto/tests/functional/behavior/infer_schedule_test.cpp
+++ b/src/plugins/auto/tests/functional/behavior/infer_schedule_test.cpp
@@ -0,0 +1,96 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "auto_func_test.hpp"
+
+namespace ov {
+namespace auto_plugin {
+namespace tests {
+using schedule_policy_param = std::tuple<ov::AnyMap,  // properties with schedule policy setting
+                                         int          // number of created infer requests
+                                         >;
+
+class InferSchedulePolicyTest : public AutoFuncTests, public testing::WithParamInterface<schedule_policy_param> {
+public:
+    void SetUp() override {
+        AutoFuncTests::SetUp();
+        std::tie(property, niters) = this->GetParam();
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<schedule_policy_param>& obj) {
+        ov::AnyMap property;
+        int niters;
+        std::tie(property, niters) = obj.param;
+        std::ostringstream result;
+        result << "numberOfInfer=" << niters << "_";
+        if (!property.empty()) {
+            for (auto& iter : property) {
+                result << "priority=" << iter.first << "_" << iter.second.as<std::string>();
+            }
+        }
+        return result.str();
+    }
+
+public:
+    ov::AnyMap property;
+    int niters;
+};
+
+TEST_P(InferSchedulePolicyTest, can_run_async_requests_with_different_schedule_policy) {
+    ov::CompiledModel compiled_model;
+    property.emplace(ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT));
+    ASSERT_NO_THROW(compiled_model = core.compile_model(model_cannot_batch, "AUTO", property));
+    std::vector<ov::InferRequest> inferReqsQueue;
+    int count = niters;
+    while (count--) {
+        ov::InferRequest req;
+        ASSERT_NO_THROW(req = compiled_model.create_infer_request());
+        inferReqsQueue.push_back(req);
+    }
+    for (auto& req : inferReqsQueue) {
+        ASSERT_NO_THROW(req.start_async());
+    }
+    for (auto& req : inferReqsQueue) {
+        ASSERT_NO_THROW(req.wait());
+    }
+}
+
+TEST_P(InferSchedulePolicyTest, can_run_sync_requests_with_different_schedule_policy) {
+    ov::CompiledModel compiled_model;
+    property.emplace(ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT));
+    ASSERT_NO_THROW(compiled_model = core.compile_model(model_cannot_batch, "AUTO", property));
+    std::vector<ov::InferRequest> inferReqsQueue;
+    int count = niters;
+    while (count--) {
+        ov::InferRequest req;
+        ASSERT_NO_THROW(req = compiled_model.create_infer_request());
+        inferReqsQueue.push_back(req);
+    }
+    for (auto& req : inferReqsQueue) {
+        ASSERT_NO_THROW(req.infer());
+        ASSERT_NO_THROW(req.wait());
+    }
+}
+
+auto properties = std::vector<ov::AnyMap>{
+    {ov::device::priorities("MOCK_GPU"), ov::intel_auto::schedule_policy(ov::intel_auto::SchedulePolicy::ROUND_ROBIN)},
+    {ov::device::priorities("MOCK_GPU"),
+     ov::intel_auto::schedule_policy(ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY)},
+    {ov::device::priorities("MOCK_CPU"), ov::intel_auto::schedule_policy(ov::intel_auto::SchedulePolicy::ROUND_ROBIN)},
+    {ov::device::priorities("MOCK_CPU"),
+     ov::intel_auto::schedule_policy(ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY)},
+    {ov::device::priorities("MOCK_GPU", "MOCK_CPU"),
+     ov::intel_auto::schedule_policy(ov::intel_auto::SchedulePolicy::ROUND_ROBIN)},
+    {ov::device::priorities("MOCK_GPU", "MOCK_CPU"),
+     ov::intel_auto::schedule_policy(ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY)},
+    {ov::device::priorities("MOCK_CPU", "MOCK_GPU"),
+     ov::intel_auto::schedule_policy(ov::intel_auto::SchedulePolicy::ROUND_ROBIN)}};
+auto niters = std::vector<int>{10, 20, 30};
+
+INSTANTIATE_TEST_SUITE_P(AutoFuncTests,
+                         InferSchedulePolicyTest,
+                         ::testing::Combine(::testing::ValuesIn(properties), ::testing::ValuesIn(niters)),
+                         InferSchedulePolicyTest::getTestCaseName);
+}  // namespace tests
+}  // namespace auto_plugin
+}  // namespace ov
--- a/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
+++ b/src/plugins/auto/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
@@ -80,13 +80,15 @@ INSTANTIATE_TEST_SUITE_P(smoke_AutoCompileModelBehaviorTests,
                                            ::testing::ValuesIn(auto_compileModel_properties)),
                         OVSetPropComplieModleGetPropTests::getTestCaseName);

-const std::vector<ov::AnyMap> default_properties = {{ov::enable_profiling(false)},
-                                                    {ov::log::level("LOG_NONE")},
-                                                    {ov::hint::model_priority(ov::hint::Priority::MEDIUM)},
-                                                    {ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE)},
-                                                    {ov::intel_auto::device_bind_buffer(false)},
-                                                    {ov::intel_auto::enable_startup_fallback(true)},
-                                                    {ov::device::priorities("")}};
+const std::vector<ov::AnyMap> default_properties = {
+    {ov::enable_profiling(false)},
+    {ov::log::level("LOG_NONE")},
+    {ov::hint::model_priority(ov::hint::Priority::MEDIUM)},
+    {ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE)},
+    {ov::intel_auto::device_bind_buffer(false)},
+    {ov::intel_auto::enable_startup_fallback(true)},
+    {ov::intel_auto::schedule_policy(ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY)},
+    {ov::device::priorities("")}};
 INSTANTIATE_TEST_SUITE_P(smoke_AutoBehaviorTests,
                         OVPropertiesDefaultTests,
                         ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_AUTO),
--- a/src/plugins/auto/tests/unit/infer_request_schedule_policy_test.cpp
+++ b/src/plugins/auto/tests/unit/infer_request_schedule_policy_test.cpp
@@ -0,0 +1,142 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "async_infer_request.hpp"
+#include "common.hpp"
+#include "cumulative_schedule.hpp"
+#include "openvino/runtime/auto/properties.hpp"
+#include "plugin.hpp"
+using ConfigParams = std::tuple<std::vector<ov::auto_plugin::DeviceInformation>,  // device candidate list
+                                ov::intel_auto::SchedulePolicy,                   // schedule policy
+                                std::map<std::string, int>,  // number of infer request for each device
+                                std::vector<std::string>  // the expected device where each of infer request comes from
+                                >;
+class MockCumuSchedule : public ov::auto_plugin::CumuSchedule, public ::testing::TestWithParam<ConfigParams> {
+protected:
+    std::vector<ov::auto_plugin::DeviceInformation> devicesInfo;
+    ov::intel_auto::SchedulePolicy schedulePolicy;
+    std::map<std::string, int> numOfInferRequests;
+    std::vector<std::string> expectedScheDevs;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<ConfigParams> obj) {
+        std::vector<ov::auto_plugin::DeviceInformation> devicesInfo;
+        ov::intel_auto::SchedulePolicy schedulePolicy;
+        std::map<std::string, int> numOfInferRequests;
+        std::vector<std::string> expectedScheDevs;
+        std::tie(devicesInfo, schedulePolicy, numOfInferRequests, expectedScheDevs) = obj.param;
+        std::ostringstream result;
+        std::string candidateDevList;
+        result << "candaidateDeviceList_";
+        for (auto dev : devicesInfo)
+            result << dev.device_name << "_";
+        result << "schedulePolicy_" << schedulePolicy << "_";
+        result << "inferRequestNumberOnEachDevice_";
+        for (auto ninfer : numOfInferRequests)
+            result << ninfer.first << "_" << ninfer.second << "_";
+        result << "expectedDeviceSelection_";
+        for (auto dev : expectedScheDevs)
+            result << dev << "_";
+        return result.str();
+    }
+
+    void TearDown() override {
+        devicesInfo.clear();
+        numOfInferRequests.clear();
+        expectedScheDevs.clear();
+        m_context.reset();
+    }
+
+    void SetUp() override {
+        std::tie(devicesInfo, schedulePolicy, numOfInferRequests, expectedScheDevs) = GetParam();
+        m_context = std::make_shared<ov::auto_plugin::ScheduleContext>();
+        m_context->m_schedule_policy = schedulePolicy;
+    }
+};
+
+TEST_P(MockCumuSchedule, scheduleInferRequestBasedOnSchedulePolicy) {
+    std::size_t deviceIndexWithInferReq = 0;
+    int expectedDevIndex = 0;
+    while (true) {
+        std::string actualSelectedDev;
+        ASSERT_NO_THROW(actualSelectedDev = schedule_to_next_device(devicesInfo, deviceIndexWithInferReq));
+        if (numOfInferRequests[actualSelectedDev] > 0) {
+            EXPECT_EQ(actualSelectedDev, expectedScheDevs[expectedDevIndex++]);
+            // consume an available infer request on selected device
+            numOfInferRequests[actualSelectedDev]--;
+        } else {
+            // schecdule to next priority device
+            deviceIndexWithInferReq++;
+            if (deviceIndexWithInferReq >= devicesInfo.size()) {
+                // no available infer request on all of the devices
+                break;
+            }
+        }
+    }
+}
+
+const std::vector<ov::auto_plugin::DeviceInformation> metaDevicesWithSingleDev = {
+    {"DEVICE_0", {}, -1, "01", "DEVICE_0_01", 0}};
+const std::vector<ov::auto_plugin::DeviceInformation> metaDevicesWithTwoDevs = {
+    {"DEVICE_0", {}, -1, "01", "DEVICE_0_01", 0},
+    {"DEVICE_1", {}, -1, "01", "DEVICE_1_01", 1}};
+const std::vector<ov::auto_plugin::DeviceInformation> metaDevices = {{"DEVICE_0", {}, -1, "01", "DEVICE_0_01", 0},
+                                                                     {"DEVICE_1", {}, -1, "01", "DEVICE_1_01", 1},
+                                                                     {"DEVICE_2", {}, -1, "01", "DEVICE_2_01", 2}};
+const std::vector<ConfigParams> configs = {
+    ConfigParams{
+        metaDevicesWithSingleDev,                     // param[in]: device candidate list for AUTO plugin
+        ov::intel_auto::SchedulePolicy::ROUND_ROBIN,  // param[in]: specified schedule policy
+        {{"DEVICE_0", 6}},  // param[in]: a map recorded the count of infer request on each hw device
+        {"DEVICE_0",
+         "DEVICE_0",
+         "DEVICE_0",
+         "DEVICE_0",
+         "DEVICE_0",
+         "DEVICE_0"}},  // param[output]: the expected device list where the next available infer request comes from
+    ConfigParams{metaDevicesWithSingleDev,
+                 ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY,
+                 {{"DEVICE_0", 6}},
+                 {"DEVICE_0", "DEVICE_0", "DEVICE_0", "DEVICE_0", "DEVICE_0", "DEVICE_0"}},
+    ConfigParams{metaDevicesWithTwoDevs,
+                 ov::intel_auto::SchedulePolicy::ROUND_ROBIN,
+                 {{"DEVICE_0", 3}, {"DEVICE_1", 2}},
+                 {"DEVICE_0", "DEVICE_1", "DEVICE_0", "DEVICE_1", "DEVICE_0"}},
+    ConfigParams{metaDevicesWithTwoDevs,
+                 ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY,
+                 {{"DEVICE_0", 3}, {"DEVICE_1", 2}},
+                 {"DEVICE_0", "DEVICE_0", "DEVICE_0", "DEVICE_1", "DEVICE_1"}},
+    ConfigParams{metaDevicesWithTwoDevs,
+                 ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY,
+                 {{"DEVICE_0", 2}, {"DEVICE_1", 3}},
+                 {"DEVICE_0", "DEVICE_0", "DEVICE_1", "DEVICE_1", "DEVICE_1"}},
+    ConfigParams{metaDevices,
+                 ov::intel_auto::SchedulePolicy::ROUND_ROBIN,
+                 {{"DEVICE_0", 3}, {"DEVICE_1", 2}, {"DEVICE_2", 1}},
+                 {"DEVICE_0", "DEVICE_1", "DEVICE_2", "DEVICE_0", "DEVICE_1", "DEVICE_0"}},
+    ConfigParams{metaDevices,
+                 ov::intel_auto::SchedulePolicy::ROUND_ROBIN,
+                 {{"DEVICE_0", 1}, {"DEVICE_1", 2}, {"DEVICE_2", 3}},
+                 {"DEVICE_0", "DEVICE_1", "DEVICE_2", "DEVICE_1", "DEVICE_2", "DEVICE_2"}},
+    ConfigParams{metaDevices,
+                 ov::intel_auto::SchedulePolicy::ROUND_ROBIN,
+                 {{"DEVICE_0", 1}, {"DEVICE_1", 3}, {"DEVICE_2", 2}},
+                 {"DEVICE_0", "DEVICE_1", "DEVICE_2", "DEVICE_1", "DEVICE_2", "DEVICE_1"}},
+    ConfigParams{metaDevices,
+                 ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY,
+                 {{"DEVICE_0", 1}, {"DEVICE_1", 3}, {"DEVICE_2", 2}},
+                 {"DEVICE_0", "DEVICE_1", "DEVICE_1", "DEVICE_1", "DEVICE_2", "DEVICE_2"}},
+    ConfigParams{metaDevices,
+                 ov::intel_auto::SchedulePolicy::DEVICE_PRIORITY,
+                 {{"DEVICE_0", 3}, {"DEVICE_1", 2}, {"DEVICE_2", 1}},
+                 {"DEVICE_0", "DEVICE_0", "DEVICE_0", "DEVICE_1", "DEVICE_1", "DEVICE_2"}}};
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests,
+                         MockCumuSchedule,
+                         ::testing::ValuesIn(configs),
+                         MockCumuSchedule::getTestCaseName);