diff --git a/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp b/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp index e6680ca8590..8e853331df2 100644 --- a/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp +++ b/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp @@ -107,3 +107,14 @@ DECLARE_CONFIG_KEY(CONFIG_DEVICE_ID); } // namespace PluginConfigInternalParams } // namespace InferenceEngine + +namespace ov { + +/** + * @brief Read-only property to get a std::vector of properties + * which should affect the hash calculation for model cache + * @ingroup ie_dev_api_plugin_api + */ +static constexpr Property, PropertyMutability::RO> caching_properties{"CACHING_PROPERTIES"}; + +} // namespace ov diff --git a/src/inference/src/ie_core.cpp b/src/inference/src/ie_core.cpp index 750c53bf02f..326ed9a10b9 100644 --- a/src/inference/src/ie_core.cpp +++ b/src/inference/src/ie_core.cpp @@ -490,26 +490,24 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this& origConfig) const { std::map getMetricConfig; - auto compileConfig = origConfig; + std::map compileConfig; - // 0. Remove TARGET_FALLBACK key, move it to getMetricConfig - auto targetFallbackIt = compileConfig.find("TARGET_FALLBACK"); - if (targetFallbackIt == compileConfig.end()) { - targetFallbackIt = compileConfig.find(ov::device::priorities.name()); + // 0. Move TARGET_FALLBACK key to getMetricConfig + auto targetFallbackIt = origConfig.find("TARGET_FALLBACK"); + if (targetFallbackIt == origConfig.end()) { + targetFallbackIt = origConfig.find(ov::device::priorities.name()); } - if (targetFallbackIt != compileConfig.end()) { + if (targetFallbackIt != origConfig.end()) { getMetricConfig[targetFallbackIt->first] = targetFallbackIt->second; - compileConfig.erase(targetFallbackIt); } - // 1. remove DEVICE_ID key - auto deviceIt = compileConfig.find(ov::device::id.name()); - if (deviceIt != compileConfig.end()) { + // 1. Move DEVICE_ID key to getMetricConfig + auto deviceIt = origConfig.find(ov::device::id.name()); + if (deviceIt != origConfig.end()) { getMetricConfig[deviceIt->first] = deviceIt->second; - compileConfig.erase(deviceIt); } - // 2. replace it with DEVICE_ARCHITECTURE value + // 2. Replace it with DEVICE_ARCHITECTURE value if (DeviceSupportsConfigKey(plugin, ov::device::architecture.name())) { compileConfig[ov::device::architecture.name()] = plugin.get_property(ov::device::architecture, getMetricConfig); @@ -517,6 +515,17 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this() : it->second; + } + } return compileConfig; } diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 9e9defcdb3e..1bef54cad35 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -53,6 +53,7 @@ struct Config { void readProperties(const std::map &config); void updateProperties(); + std::map _config; #ifdef CPU_DEBUG_CAPS diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 43fefa01d9e..a720142088f 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -703,26 +703,11 @@ static bool streamsSet(const std::map& config) { } void Engine::ApplyPerformanceHints(std::map &config, const std::shared_ptr& ngraphFunc) const { - const bool streamsExplicitlySetForModel = streamsSet(config); - // checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via SetConfig) - if (streamsExplicitlySetForModel || - streamsExplicitlySetForEngine) - return; + auto getNumStreamsLatency = [&]() { + return std::pair(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), ov::util::to_string(ov::streams::NUMA)); + }; - const auto& mode = config.find(CONFIG_KEY(PERFORMANCE_HINT)); - // the mode may have just arrived to the LoadNetwork, or was set with the plugin's SetConfig - if (mode == config.end() && engConfig.perfHintsConfig.ovPerfHint.empty()) - return; - /* performance hints set for network has higher pririty than engine ones. - * This applies for all the configuration parameters */ - const auto mode_name = (mode != config.end()) ? - PerfHintsConfig::CheckPerformanceHintValue(mode->second) : - engConfig.perfHintsConfig.ovPerfHint; - - if (mode_name == CONFIG_VALUE(LATENCY)) { - config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = CONFIG_VALUE(CPU_THROUGHPUT_NUMA); - config[ov::num_streams.name()] = ov::util::to_string(ov::streams::NUMA); - } else if (mode_name == CONFIG_VALUE(THROUGHPUT)) { + auto getNumStreamsThroughput = [&]() { const auto isa = dnnl::get_effective_cpu_isa(); float isaSpecificThreshold = 1.0f; switch (isa) { @@ -797,8 +782,48 @@ void Engine::ApplyPerformanceHints(std::map &config, c num_streams = std::min(num_streams, engConfig.perfHintsConfig.ovPerfHintNumRequests); } - config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams); - config[ov::num_streams.name()] = ov::util::to_string(num_streams); + return std::pair(std::to_string(num_streams), ov::util::to_string(num_streams)); + }; + + auto getPerfHintName = [&]() { + const bool streamsExplicitlySetForModel = streamsSet(config); + // checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via SetConfig) + if (streamsExplicitlySetForModel || + streamsExplicitlySetForEngine) + return std::string(); + + const auto& perf_hint = config.find(CONFIG_KEY(PERFORMANCE_HINT)); + // the perf_hint may have just arrived to the LoadNetwork, or was set with the plugin's SetConfig + if (perf_hint == config.end() && engConfig.perfHintsConfig.ovPerfHint.empty()) + return std::string(); + /* performance hints set for network has higher pririty than engine ones. + * This applies for all the configuration parameters */ + const auto perf_hint_name = (perf_hint != config.end()) ? + PerfHintsConfig::CheckPerformanceHintValue(perf_hint->second) : + engConfig.perfHintsConfig.ovPerfHint; + return perf_hint_name; + }; + + // We compute both hints values because the optimal number of streams are computed based on ov::Model + // while we export model in cpu internal opset so we need to save precomputed optimal # streams for both hint modes + const auto latency_hints = getNumStreamsLatency(); + const auto tput_hints = getNumStreamsThroughput(); + + // save hints parameters to model rt_info + ov::AnyMap hints_props; + const auto latency_name = std::string(CONFIG_VALUE(LATENCY)) + "_" + std::string(ov::num_streams.name()); + const auto tput_name = std::string(CONFIG_VALUE(THROUGHPUT)) + "_" + std::string(ov::num_streams.name()); + hints_props.insert({latency_name, latency_hints.second}); + hints_props.insert({tput_name, tput_hints.second}); + ngraphFunc->set_rt_info(hints_props, "intel_cpu_hints_config"); + + const auto perf_hint_name = getPerfHintName(); + if (perf_hint_name == CONFIG_VALUE(LATENCY)) { + config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = latency_hints.first; + config[ov::num_streams.name()] = latency_hints.second; + } else if (perf_hint_name == CONFIG_VALUE(THROUGHPUT)) { + config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = tput_hints.first; + config[ov::num_streams.name()] = tput_hints.first; } } @@ -1024,6 +1049,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map range = std::make_tuple(1, parallel_get_max_threads()); return decltype(ov::range_for_streams)::value_type(range); + } else if (name == ov::caching_properties) { + std::vector cachingProperties; + return decltype(ov::caching_properties)::value_type(cachingProperties); } /* Internally legacy parameters are used with new API as part of migration procedure. * This fallback can be removed as soon as migration completed */ @@ -1136,6 +1165,22 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr Config conf = engConfig; conf.readProperties(config); + // import config props from caching model + auto function = cnnnetwork.getFunction(); + if (function->has_rt_info("intel_cpu_hints_config") && !conf.perfHintsConfig.ovPerfHint.empty()) { + const auto mode_name = conf.perfHintsConfig.ovPerfHint; + if (mode_name == CONFIG_VALUE(LATENCY) || mode_name == CONFIG_VALUE(THROUGHPUT)) { + const auto& hints_config = function->get_rt_info("intel_cpu_hints_config"); + const auto hints_param_name = mode_name + "_" + std::string(ov::num_streams.name()); + const auto it = hints_config.find(hints_param_name); + if (it != hints_config.end()) { + conf.readProperties({{std::string(ov::num_streams.name()), it->second.as()}}); + } else { + IE_THROW() << "Cache file doesn't contain precalculated number of streams for mode " << mode_name; + } + } + } + if (conf.enableDynamicBatch) { conf.batchLimit = static_cast(cnnnetwork.getBatchSize()); } diff --git a/src/plugins/intel_cpu/tests/functional/behavior/export_import.cpp b/src/plugins/intel_cpu/tests/functional/behavior/export_import.cpp new file mode 100644 index 00000000000..bd2cce2a239 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/behavior/export_import.cpp @@ -0,0 +1,65 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-corer: Apache-2.0 +// + +#include "openvino/core/any.hpp" +#include "openvino/runtime/core.hpp" +#include "openvino/runtime/compiled_model.hpp" +#include "openvino/runtime/properties.hpp" +#include "common_test_utils/test_common.hpp" +#include "ngraph_functions/builders.hpp" + + +#include +#include + +namespace { + +class ExportImportTest : public CommonTestUtils::TestsCommon {}; + +std::shared_ptr MakeMatMulModel() { + const ov::Shape input_shape = {1, 4096}; + const ov::element::Type precision = ov::element::f32; + + auto params = ngraph::builder::makeParams(precision, {input_shape}); + auto matmul_const = ngraph::builder::makeConstant(precision, {4096, 1024}, std::vector{}, true); + auto matmul = ngraph::builder::makeMatMul(params[0], matmul_const); + + auto add_const = ngraph::builder::makeConstant(precision, {1, 1024}, std::vector{}, true); + auto add = ngraph::builder::makeEltwise(matmul, add_const, ngraph::helpers::EltwiseTypes::ADD); + auto softmax = std::make_shared(add); + + ngraph::NodeVector results{softmax}; + return std::make_shared(results, params, "MatMulModel"); +} + +TEST(ExportImportTest, ExportOptimalNumStreams) { + auto original_model = MakeMatMulModel(); + std::string deviceName = "CPU"; + ov::Core core; + auto tput_mode = ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT); + auto latency_mode = ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY); + + auto original_tp_network = core.compile_model(original_model, deviceName, tput_mode); + auto original_latency_network = core.compile_model(original_model, deviceName, latency_mode); + + auto nstreams_tp_original = original_tp_network.get_property(ov::num_streams.name()).as(); + auto nstreams_latency_original = original_latency_network.get_property(ov::num_streams.name()).as(); + + std::stringstream exported_stream; + original_tp_network.export_model(exported_stream); + { + std::stringstream ss(exported_stream.str()); + auto imported_tp_network = core.import_model(ss, deviceName, tput_mode); + auto nstreams_tp_imported = imported_tp_network.get_property(ov::num_streams.name()).as(); + EXPECT_EQ(nstreams_tp_original, nstreams_tp_imported); + } + + { + std::stringstream ss(exported_stream.str()); + auto imported_latency_network = core.import_model(ss, deviceName, latency_mode); + auto nstreams_latency_imported = imported_latency_network.get_property(ov::num_streams.name()).as(); + EXPECT_EQ(nstreams_latency_original, nstreams_latency_imported); + } +} +} // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 98bb6c6f6b0..7645dfdafab 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -164,6 +164,9 @@ std::vector disabledTestPatterns() { // is shared across plugins // passed local test and cpu has specific test cases with nms9 to cover R"(smoke_NmsLayerTest.*)", + // Issue: 95239 + // HETERO plugin lacks caching_properties definition + R"(smoke_Hetero_CachingSupportCase.*)", // 94982. FP32->I32 conversion issue in the reference implementation. There can be some garbage in the rest of float values like 0.333333745. // The kernel does not have such garbage. The diff 0.000000745 is taken into account in calculations and affects further type conversion. // Reorder->GridSample->Reorder also does not work here. Potential fix is to use nearest conversion instead of truncation. diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp index 0cb0a7182af..ac583c01f3d 100644 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -86,6 +86,8 @@ std::vector disabledTestPatterns() { // TODO: Issue: 71068 R"(.*OVInferRequestCancellationTests.*)", // TODO: Issue: 71070 - R"(.*OVInferenceChaining.*(StaticOutputToStaticInput).*)" + R"(.*OVInferenceChaining.*(StaticOutputToStaticInput).*)", + // TODO: Issue: 95234 + R"(.*smoke_CachingSupportCase_GNA.*)" }; } diff --git a/src/tests/functional/inference_engine/caching_test.cpp b/src/tests/functional/inference_engine/caching_test.cpp index 3c62e19d08d..af7f43f6ce9 100644 --- a/src/tests/functional/inference_engine/caching_test.cpp +++ b/src/tests/functional/inference_engine/caching_test.cpp @@ -35,6 +35,8 @@ #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp" #include "cpp/ie_plugin.hpp" +#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" + using namespace InferenceEngine; using namespace ::testing; using namespace InferenceEngine::details; @@ -652,10 +654,26 @@ TEST_P(CachingTest, TestChangeLoadConfig) { EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber()); EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber()); EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber()); + ON_CALL(*mockPlugin, GetMetric(ov::supported_properties.name(), _)). + WillByDefault(Invoke([&](const std::string &, const std::map &) { + return std::vector{ + ov::supported_properties.name(), + METRIC_KEY(IMPORT_EXPORT_SUPPORT), + ov::device::capabilities.name(), + ov::device::architecture.name(), + ov::caching_properties.name()}; + })); + ON_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)). + WillByDefault(Invoke([&](const std::string &, const std::map &) { + std::vector res; + res.push_back(ov::PropertyName(CUSTOM_KEY, ov::PropertyMutability::RO)); + return decltype(ov::caching_properties)::value_type(res); + })); ON_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), _)). WillByDefault(Invoke([&](const std::string &, const std::map &) { std::vector res; - res.push_back(CUSTOM_KEY); + res.push_back(ov::caching_properties.name()); return res; })); { diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp index 682aa58cb54..e6a3576e521 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp @@ -13,6 +13,7 @@ #include "ngraph_functions/builders.hpp" #include "ngraph_functions/subgraph_builders.hpp" +#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #define GTEST_COUT std::cout << "[ ] [ INFO ] " @@ -184,6 +185,9 @@ void CompileModelCacheTestBase::run() { GTEST_COUT << "Plugin doesn't support import and export - skipping test" << std::endl; GTEST_SKIP(); } + if (importExportSupported(*core)) { + ASSERT_NO_THROW(core->get_property(targetDevice, ov::caching_properties)); + } configure_model(); try { compiledModel = core->compile_model(function, targetDevice, configuration);