From 1e2b7c66f7b0ca235aed201ce771cce94b912bbd Mon Sep 17 00:00:00 2001 From: "Wang, Yang" Date: Mon, 4 Dec 2023 10:51:10 +0800 Subject: [PATCH] [AUTO] Update AUTO logic to support state-full model (#21061) * 1. Update the logic for filtering out the device that supports the stateful model. 2. Enable the function to create stateful model in the test case. * 1. Enable unit test cases for stateful model support. 2. disable accelerator device(CPU_HELP) if model is stateful. Signed-off-by: Wang, Yang * Update. * Updated. * Updated. * Updated. --------- Signed-off-by: Wang, Yang Co-authored-by: Chen Peter --- src/plugins/auto/src/plugin.cpp | 74 +++- src/plugins/auto/src/plugin.hpp | 3 +- .../auto/tests/unit/stateful_model_test.cpp | 415 ++++++++++++++++++ 3 files changed, 476 insertions(+), 16 deletions(-) create mode 100644 src/plugins/auto/tests/unit/stateful_model_test.cpp diff --git a/src/plugins/auto/src/plugin.cpp b/src/plugins/auto/src/plugin.cpp index 59e79222540..5d612a54216 100644 --- a/src/plugins/auto/src/plugin.cpp +++ b/src/plugins/auto/src/plugin.cpp @@ -464,7 +464,7 @@ std::shared_ptr Plugin::compile_model_impl(const std::string std::list devices_with_priority(support_devices.begin(), support_devices.end()); std::shared_ptr cloned_model, ppp_model; if (model_path.empty()) { - support_devices = filter_device_by_model(support_devices_by_property, model); + support_devices = filter_device_by_model(support_devices_by_property, model, load_config); cloned_model = model->clone(); ppp_model = cloned_model->clone(); @@ -910,39 +910,83 @@ std::vector Plugin::filter_device(const std::vector Plugin::filter_device_by_model(const std::vector& meta_devices, - const std::shared_ptr& model) const { + const std::shared_ptr& model, + PluginConfig& load_config) const { if (meta_devices.empty()) { OPENVINO_THROW("No available device to filter ", get_device_name(), " plugin"); } - std::vector filter_device; - auto is_stateful = [&]() { - for (auto& op : model->get_ops()) { - if (std::dynamic_pointer_cast(op) || - std::dynamic_pointer_cast(op)) { - LOG_INFO_TAG("stateful mode, try deployed to CPU"); - return true; - } + auto disable_startup_runtime_fallback = [&]() { + if (load_config.get_property(ov::intel_auto::enable_startup_fallback)) { + LOG_WARNING_TAG("Setting property ov::intel_auto::enable_startup_fallback to false for stateful model."); + load_config.set_property(ov::intel_auto::enable_startup_fallback(false)); + } + if (load_config.get_property(ov::intel_auto::enable_runtime_fallback)) { + LOG_WARNING_TAG("Setting property ov::intel_auto::enable_running_fallback to false for stateful model."); + load_config.set_property(ov::intel_auto::enable_runtime_fallback(false)); } - return false; }; + if (meta_devices.size() == 1) { + return meta_devices; + } + + std::vector filter_device; + std::vector stateful_node_names; + // Check if CPU is in candidate list auto cpuiter = std::find_if(meta_devices.begin(), meta_devices.end(), [](const DeviceInformation& device_info) { return device_info.device_name.find("CPU") != std::string::npos; }); - // If CPU is in candidate list, load dynamic model to CPU first // For MULTI do not only load stateful model to CPU // For AUTO CTPUT only load stateful model to CPU - if (((model->is_dynamic()) || (is_stateful() && get_device_name() != "MULTI")) && cpuiter != meta_devices.end()) { + if (model->is_dynamic() && cpuiter != meta_devices.end()) { filter_device.push_back(*cpuiter); return filter_device; } - // If CPU is not in candidate list, continue to run selection logic regardless of whether the input model is a // dynamic model or not - return meta_devices; + + for (auto& op : model->get_ops()) { + if (std::dynamic_pointer_cast(op) || + std::dynamic_pointer_cast(op)) { + stateful_node_names.push_back(op->get_friendly_name()); + } + } + if (stateful_node_names.empty()) { + // not stateful model + return meta_devices; + } + + // disable CPU_HELP and runtime fallback if model is stateful + disable_startup_runtime_fallback(); + + auto is_supported_stateful = [&](const std::string& device_name, const ov::AnyMap& config) { + auto device_qm = get_core()->query_model(model, device_name, config); + for (auto&& node_name : stateful_node_names) { + if (device_qm.find(node_name) == device_qm.end()) + return false; + } + return true; + }; + + for (auto& item : meta_devices) { + if (is_supported_stateful(item.device_name, item.config)) + filter_device.push_back(item); + } + bool isCumulative = (get_device_name() == "MULTI") || (load_config.get_property(ov::hint::performance_mode) == + ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT); + if (isCumulative) { + if (filter_device.empty() || filter_device.size() > 1) + OPENVINO_THROW("AUTO cumulative model doesn't support stateful model."); + else + return filter_device; + } + if (filter_device.empty()) { + return meta_devices; + } + return filter_device; } std::string Plugin::get_log_tag() const noexcept { diff --git a/src/plugins/auto/src/plugin.hpp b/src/plugins/auto/src/plugin.hpp index a41b1113084..47e7e85a9df 100644 --- a/src/plugins/auto/src/plugin.hpp +++ b/src/plugins/auto/src/plugin.hpp @@ -78,7 +78,8 @@ private: std::vector filter_device(const std::vector& meta_devices, const ov::AnyMap& properties) const; std::vector filter_device_by_model(const std::vector& meta_devices, - const std::shared_ptr& model) const; + const std::shared_ptr& model, + PluginConfig& load_config) const; std::string get_log_tag() const noexcept; static std::mutex m_mtx; static std::map> m_priority_map; diff --git a/src/plugins/auto/tests/unit/stateful_model_test.cpp b/src/plugins/auto/tests/unit/stateful_model_test.cpp new file mode 100644 index 00000000000..8c4e8206b1c --- /dev/null +++ b/src/plugins/auto/tests/unit/stateful_model_test.cpp @@ -0,0 +1,415 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "include/auto_unit_test.hpp" +#include "openvino/opsets/opset11.hpp" + +using StatefulModelConfigParams = + std::tuple, // device and the flag if device supports stateful + bool, // is cumulative mode + std::vector> // expected compiling model times on each device + >; + +class StatefulModelSupportedTest : public tests::AutoTest, public ::testing::TestWithParam { +public: + std::shared_ptr create_dynamic_output_model(); + std::shared_ptr create_stateful_model(); + static std::string getTestCaseName(testing::TestParamInfo obj); + void SetUp() override; + +protected: + bool isDynamicModel; + bool isStatefulModel; + std::map isDevSupportStatefulMap; + std::vector> expectedCalledTimes; + bool isCumulative; + std::string devicesList; +}; + +std::string StatefulModelSupportedTest::getTestCaseName(testing::TestParamInfo obj) { + bool isDynamicModel; + bool isStatefulModel; + std::map isDevSupportStatefulMap; + std::vector> expectedCalledTimes; + bool isCumulative; + std::string devicesList; + + std::tie(devicesList, isDynamicModel, isStatefulModel, isDevSupportStatefulMap, isCumulative, expectedCalledTimes) = + obj.param; + std::ostringstream result; + result << "_devicesList_" << devicesList; + result << "_isDynamic_" << isDynamicModel; + result << "_isStatefulModel_" << isStatefulModel; + for (auto& item : isDevSupportStatefulMap) { + result << "_" << item.first << "_" << item.second; + } + result << "_isCumulative_" << isCumulative; + for (auto& item : expectedCalledTimes) { + result << "_calling_on_" << item.first << "_expected_times_" << item.second; + } + auto string = result.str(); + return string; +} + +std::shared_ptr StatefulModelSupportedTest::create_dynamic_output_model() { + auto boxes = std::make_shared(ov::element::f32, ov::Shape{1, 2, 4}); + boxes->set_friendly_name("param_1"); + boxes->get_output_tensor(0).set_names({"input_tensor_1"}); + auto scores = std::make_shared(ov::element::f32, ov::Shape{1, 1, 2}); + scores->set_friendly_name("param_2"); + scores->get_output_tensor(0).set_names({"input_tensor_2"}); + auto max_output_boxes_per_class = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {10}); + auto iou_threshold = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {0.75}); + auto score_threshold = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {0.7}); + auto nms = std::make_shared(boxes, + scores, + max_output_boxes_per_class, + iou_threshold, + score_threshold); + auto res = std::make_shared(nms); + res->set_friendly_name("output_dynamic"); + return std::make_shared(ov::NodeVector{nms}, ov::ParameterVector{boxes, scores}); +} + +std::shared_ptr StatefulModelSupportedTest::create_stateful_model() { + auto arg = std::make_shared(ov::element::f32, ov::Shape{1, 1}); + auto init_const = ov::opset11::Constant::create(ov::element::f32, ov::Shape{1, 1}, {0}); + // The ReadValue/Assign operations must be used in pairs in the model. + // For each such a pair, its own variable object must be created. + const std::string variable_name("variable0"); + // auto variable = std::make_shared( + // ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, variable_name}); + auto variable = std::make_shared( + ov::op::util::VariableInfo{init_const->get_shape(), ov::element::f32, variable_name}); + // Creating ov::Model + auto read = std::make_shared(init_const, variable); + std::vector> args = {arg, read}; + auto add = std::make_shared(arg, read); + add->set_friendly_name("add_sum"); + auto assign = std::make_shared(add, variable); + assign->set_friendly_name("save"); + auto res = std::make_shared(add); + res->set_friendly_name("res"); + + auto model = + std::make_shared(ov::ResultVector({res}), ov::SinkVector({assign}), ov::ParameterVector({arg})); + + return model; +} + +void StatefulModelSupportedTest::SetUp() { + std::tie(devicesList, isDynamicModel, isStatefulModel, isDevSupportStatefulMap, isCumulative, expectedCalledTimes) = + GetParam(); + if (isDynamicModel) { + model = create_dynamic_output_model(); + } else if (isStatefulModel) { + model = create_stateful_model(); + } + + std::map devicesSupportedLayers; + for (auto& item : isDevSupportStatefulMap) { + ov::SupportedOpsMap res; + auto deviceName = item.first; + auto isSupportStateful = item.second; + std::unordered_set device_supported_layers; + for (auto& op : model->get_ops()) { + if (!std::dynamic_pointer_cast(op) && + !std::dynamic_pointer_cast(op)) { + res[op->get_friendly_name()] = deviceName; + continue; + } + if (isSupportStateful) { + res[op->get_friendly_name()] = deviceName; + } + } + devicesSupportedLayers[deviceName] = res; + } + + for (auto& item : devicesSupportedLayers) { + ON_CALL(*core, + query_model(::testing::Matcher&>(_), + ::testing::Matcher(StrEq(item.first)), + _)) + .WillByDefault(Return(item.second)); + } + + ON_CALL(*core, + compile_model(::testing::Matcher&>(_), + ::testing::Matcher(StrEq(ov::test::utils::DEVICE_CPU)), + (_))) + .WillByDefault(Return(mockExeNetwork)); + + ON_CALL(*core, + compile_model(::testing::Matcher&>(_), + ::testing::Matcher(StrEq(ov::test::utils::DEVICE_GPU)), + (_))) + .WillByDefault(Return(mockExeNetworkActual)); + if (isCumulative) + plugin->set_device_name("MULTI"); + else + plugin->set_device_name("AUTO"); +} + +TEST_P(StatefulModelSupportedTest, CanFilterOutCorrectTargetDeviceWithStatefulModel) { + metaDevices.clear(); + int priority = 0; + for (auto& item : expectedCalledTimes) { + auto deviceName = item.first; + auto times = item.second; + DeviceInformation devInfo(deviceName, {}, -1, {}, deviceName, priority++); + metaDevices.push_back(devInfo); + if (times >= 0) { + EXPECT_CALL(*core, + compile_model(::testing::Matcher&>(_), + ::testing::Matcher(StrEq(deviceName)), + ::testing::Matcher(_))) + .Times(times); + } + } + int expectedTimes = expectedCalledTimes.begin()->second; + ov::AnyMap config = {}; + + if (!devicesList.empty()) + config.insert(ov::device::priorities(devicesList)); + + ON_CALL(*plugin, parse_meta_devices(_, _)).WillByDefault(Return(metaDevices)); + ON_CALL(*plugin, get_valid_device) + .WillByDefault([](const std::vector& metaDevices, const std::string& netPrecision) { + std::list devices(metaDevices.begin(), metaDevices.end()); + return devices; + }); + + config.insert(ov::intel_auto::enable_runtime_fallback(false)); + if (isCumulative) { + config.insert(ov::hint::performance_mode(ov::hint::PerformanceMode::CUMULATIVE_THROUGHPUT)); + } + if (expectedTimes < 0) { + ASSERT_THROW(plugin->compile_model(model, config), ov::Exception); + } else { + ASSERT_NO_THROW(plugin->compile_model(model, config)); + } +} + +const std::vector testConfigs = { + // test cases for dynamic model + StatefulModelConfigParams{ + "CPU", // device candidate list is CPU + true, // model is dynamic model + true, // model is stateful model + std::map{{"CPU", true}}, // device CPU supports stateful model + true, // performance mode is cumulative mode + std::vector>{{"CPU", 1}}}, // expected compiling model count is 1 on device CPU + StatefulModelConfigParams{"CPU", + true, + false, + std::map{{"CPU", true}}, + true, + std::vector>{{"CPU", 1}}}, + StatefulModelConfigParams{"CPU", + true, + true, + std::map{{"CPU", false}}, + true, + std::vector>{{"CPU", 1}}}, + StatefulModelConfigParams{"CPU", + true, + true, + std::map{{"CPU", true}}, + false, + std::vector>{{"CPU", 1}}}, + StatefulModelConfigParams{"CPU", + true, + true, + std::map{{"CPU", false}}, + false, + std::vector>{{"CPU", 1}}}, + + StatefulModelConfigParams{"GPU", + true, + false, + std::map{{"GPU", true}}, + true, + std::vector>{{"GPU", 1}}}, + StatefulModelConfigParams{"GPU", + true, + false, + std::map{{"GPU", false}}, + true, + std::vector>{{"GPU", 1}}}, + StatefulModelConfigParams{"GPU", + true, + false, + std::map{{"GPU", true}}, + false, + std::vector>{{"GPU", 1}}}, + + StatefulModelConfigParams{"CPU,GPU", + true, + true, + std::map{{"CPU", true}, {"GPU", true}}, + true, + std::vector>{{"CPU", 1}, {"GPU", 0}}}, + StatefulModelConfigParams{"GPU,CPU", + true, + true, + std::map{{"CPU", true}, {"GPU", true}}, + true, + std::vector>{{"GPU", 0}, {"CPU", 1}}}, + StatefulModelConfigParams{"CPU,GPU", + true, + false, + std::map{{"CPU", true}, {"GPU", true}}, + true, + std::vector>{{"CPU", 1}, {"GPU", 0}}}, + StatefulModelConfigParams{"GPU,CPU", + true, + false, + std::map{{"CPU", true}, {"GPU", true}}, + true, + std::vector>{{"GPU", 0}, {"CPU", 1}}}, + StatefulModelConfigParams{"CPU", + false, + false, + std::map{{"CPU", true}, {"GPU", true}}, + true, + std::vector>{{"CPU", 1}}}, + StatefulModelConfigParams{"CPU", + false, + false, + std::map{{"CPU", false}, {"GPU", true}}, + false, + std::vector>{{"CPU", 1}}}, + StatefulModelConfigParams{"GPU", + false, + false, + std::map{{"CPU", true}, {"GPU", true}}, + true, + std::vector>{{"GPU", 1}}}, + StatefulModelConfigParams{"GPU", + false, + false, + std::map{{"CPU", false}, {"GPU", true}}, + false, + std::vector>{{"GPU", 1}}}, + StatefulModelConfigParams{"CPU,GPU", + false, + false, + std::map{{"CPU", false}, {"GPU", true}}, + false, + std::vector>{{"CPU", 1}, {"GPU", 0}}}, + StatefulModelConfigParams{"CPU,GPU", + false, + false, + std::map{{"CPU", false}, {"GPU", true}}, + true, + std::vector>{{"CPU", 1}, {"GPU", 1}}}, + StatefulModelConfigParams{"GPU,CPU", + false, + false, + std::map{{"CPU", false}, {"GPU", true}}, + false, + std::vector>{{"GPU", 1}, {"CPU", 1}}}, + StatefulModelConfigParams{"GPU,CPU", + false, + false, + std::map{{"CPU", false}, {"GPU", true}}, + true, + std::vector>{{"GPU", 1}, {"CPU", 1}}}, + StatefulModelConfigParams{"CPU", + false, + true, + std::map{{"CPU", false}, {"GPU", true}}, + true, + std::vector>{{"CPU", 1}}}, + StatefulModelConfigParams{"CPU", + false, + true, + std::map{{"CPU", false}, {"GPU", true}}, + false, + std::vector>{{"CPU", 1}}}, + StatefulModelConfigParams{"CPU", + false, + true, + std::map{{"CPU", true}, {"GPU", false}}, + false, + std::vector>{{"CPU", 1}}}, + StatefulModelConfigParams{"GPU", + false, + true, + std::map{{"CPU", true}, {"GPU", false}}, + true, + std::vector>{{"GPU", 1}}}, + StatefulModelConfigParams{"GPU", + false, + true, + std::map{{"CPU", false}, {"GPU", true}}, + true, + std::vector>{{"GPU", 1}}}, + StatefulModelConfigParams{"GPU", + false, + true, + std::map{{"CPU", false}, {"GPU", true}}, + false, + std::vector>{{"GPU", 1}}}, + StatefulModelConfigParams{"CPU,GPU", + false, + true, + std::map{{"CPU", true}, {"GPU", false}}, + false, + std::vector>{{"CPU", 1}, {"GPU", 0}}}, + StatefulModelConfigParams{"CPU,GPU", + false, + true, + std::map{{"CPU", false}, {"GPU", true}}, + false, + std::vector>{{"GPU", 1}, {"CPU", 0}}}, + StatefulModelConfigParams{"CPU,GPU", + false, + true, + std::map{{"CPU", true}, {"GPU", true}}, + false, + std::vector>{{"GPU", 1}, {"CPU", 0}}}, + StatefulModelConfigParams{"CPU,GPU", + false, + true, + std::map{{"CPU", false}, {"GPU", false}}, + false, + std::vector>{{"GPU", 1}, {"CPU", 0}}}, + + StatefulModelConfigParams{"CPU,GPU", + false, + true, + std::map{{"CPU", true}, {"GPU", false}}, + true, + std::vector>{{"GPU", 0}, {"CPU", 1}}}, + StatefulModelConfigParams{"CPU,GPU", + false, + true, + std::map{{"CPU", false}, {"GPU", true}}, + true, + std::vector>{{"GPU", 1}, {"CPU", 0}}}, + StatefulModelConfigParams{"CPU,GPU", + false, + true, + std::map{{"CPU", true}, {"GPU", true}}, + true, + std::vector>{{"GPU", -1}, {"CPU", -1}}}, + StatefulModelConfigParams{"CPU,GPU", + false, + true, + std::map{{"CPU", false}, {"GPU", false}}, + true, + std::vector>{{"GPU", -1}, {"CPU", -1}}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, + StatefulModelSupportedTest, + ::testing::ValuesIn(testConfigs), + StatefulModelSupportedTest::getTestCaseName);