[CPU] ModelCaching: added plugin specific config properties serialization (#13593)

2022-11-11 18:27:22 +01:00
parent cb067de597
commit cec772f2c0
9 changed files with 193 additions and 35 deletions
--- a/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
+++ b/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
@@ -107,3 +107,14 @@ DECLARE_CONFIG_KEY(CONFIG_DEVICE_ID);
 }  // namespace PluginConfigInternalParams

 }  // namespace InferenceEngine
+
+namespace ov {
+
+/**
+ * @brief Read-only property to get a std::vector<PropertyName> of properties
+ * which should affect the hash calculation for model cache
+ * @ingroup ie_dev_api_plugin_api
+ */
+static constexpr Property<std::vector<PropertyName>, PropertyMutability::RO> caching_properties{"CACHING_PROPERTIES"};
+
+}  // namespace ov
--- a/src/inference/src/ie_core.cpp
+++ b/src/inference/src/ie_core.cpp
@@ -490,26 +490,24 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this<ie::ICore
                                                           const std::string& deviceFamily,
                                                           const std::map<std::string, std::string>& origConfig) const {
        std::map<std::string, Any> getMetricConfig;
-        auto compileConfig = origConfig;
+        std::map<std::string, std::string> compileConfig;

-        // 0. Remove TARGET_FALLBACK key, move it to getMetricConfig
-        auto targetFallbackIt = compileConfig.find("TARGET_FALLBACK");
-        if (targetFallbackIt == compileConfig.end()) {
-            targetFallbackIt = compileConfig.find(ov::device::priorities.name());
+        // 0. Move TARGET_FALLBACK key to getMetricConfig
+        auto targetFallbackIt = origConfig.find("TARGET_FALLBACK");
+        if (targetFallbackIt == origConfig.end()) {
+            targetFallbackIt = origConfig.find(ov::device::priorities.name());
        }
-        if (targetFallbackIt != compileConfig.end()) {
+        if (targetFallbackIt != origConfig.end()) {
            getMetricConfig[targetFallbackIt->first] = targetFallbackIt->second;
-            compileConfig.erase(targetFallbackIt);
        }

-        // 1. remove DEVICE_ID key
-        auto deviceIt = compileConfig.find(ov::device::id.name());
-        if (deviceIt != compileConfig.end()) {
+        // 1. Move DEVICE_ID key to getMetricConfig
+        auto deviceIt = origConfig.find(ov::device::id.name());
+        if (deviceIt != origConfig.end()) {
            getMetricConfig[deviceIt->first] = deviceIt->second;
-            compileConfig.erase(deviceIt);
        }

-        // 2. replace it with DEVICE_ARCHITECTURE value
+        // 2. Replace it with DEVICE_ARCHITECTURE value
        if (DeviceSupportsConfigKey(plugin, ov::device::architecture.name())) {
            compileConfig[ov::device::architecture.name()] =
                plugin.get_property(ov::device::architecture, getMetricConfig);
@@ -517,6 +515,17 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this<ie::ICore
            // Take device name if device does not support DEVICE_ARCHITECTURE metric
            compileConfig[ov::device::architecture.name()] = deviceFamily;
        }
+
+        // 3. Extract config keys which affect compile config
+        if (DeviceSupportsConfigKey(plugin, ov::caching_properties.name())) {
+            auto cachingProps = plugin.get_property(ov::caching_properties);
+            for (const auto& prop : cachingProps) {
+                // origConfig values have higher priority than plugin parameters
+                auto it = origConfig.find(prop);
+                compileConfig[prop] =
+                    it == origConfig.end() ? plugin.get_property(prop, {}).as<std::string>() : it->second;
+            }
+        }
        return compileConfig;
    }

--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@@ -53,6 +53,7 @@ struct Config {

    void readProperties(const std::map<std::string, std::string> &config);
    void updateProperties();
+
    std::map<std::string, std::string> _config;

 #ifdef CPU_DEBUG_CAPS
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@@ -703,26 +703,11 @@ static bool streamsSet(const std::map<std::string, std::string>& config) {
 }

 void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const {
-    const bool streamsExplicitlySetForModel = streamsSet(config);
-    // checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via SetConfig)
-    if (streamsExplicitlySetForModel ||
-        streamsExplicitlySetForEngine)
-        return;
+    auto getNumStreamsLatency = [&]() {
+        return std::pair<std::string, std::string>(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), ov::util::to_string(ov::streams::NUMA));
+    };

-    const auto& mode = config.find(CONFIG_KEY(PERFORMANCE_HINT));
-    // the mode may have just arrived to the LoadNetwork, or was set with the plugin's SetConfig
-    if (mode == config.end() && engConfig.perfHintsConfig.ovPerfHint.empty())
-        return;
-    /* performance hints set for network has higher pririty than engine ones.
-     * This applies for all the configuration parameters */
-    const auto mode_name = (mode != config.end()) ?
-        PerfHintsConfig::CheckPerformanceHintValue(mode->second) :
-        engConfig.perfHintsConfig.ovPerfHint;
-
-    if (mode_name == CONFIG_VALUE(LATENCY)) {
-        config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = CONFIG_VALUE(CPU_THROUGHPUT_NUMA);
-        config[ov::num_streams.name()] = ov::util::to_string(ov::streams::NUMA);
-    } else if (mode_name == CONFIG_VALUE(THROUGHPUT)) {
+    auto getNumStreamsThroughput = [&]() {
        const auto isa = dnnl::get_effective_cpu_isa();
        float isaSpecificThreshold = 1.0f;
        switch (isa) {
@@ -797,8 +782,48 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
            num_streams = std::min(num_streams,
                                   engConfig.perfHintsConfig.ovPerfHintNumRequests);
        }
-        config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams);
-        config[ov::num_streams.name()] = ov::util::to_string(num_streams);
+        return std::pair<std::string, std::string>(std::to_string(num_streams), ov::util::to_string(num_streams));
+    };
+
+    auto getPerfHintName = [&]() {
+        const bool streamsExplicitlySetForModel = streamsSet(config);
+        // checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via SetConfig)
+        if (streamsExplicitlySetForModel ||
+            streamsExplicitlySetForEngine)
+            return std::string();
+
+        const auto& perf_hint = config.find(CONFIG_KEY(PERFORMANCE_HINT));
+        // the perf_hint may have just arrived to the LoadNetwork, or was set with the plugin's SetConfig
+        if (perf_hint == config.end() && engConfig.perfHintsConfig.ovPerfHint.empty())
+            return std::string();
+        /* performance hints set for network has higher pririty than engine ones.
+        * This applies for all the configuration parameters */
+        const auto perf_hint_name = (perf_hint != config.end()) ?
+            PerfHintsConfig::CheckPerformanceHintValue(perf_hint->second) :
+            engConfig.perfHintsConfig.ovPerfHint;
+        return perf_hint_name;
+    };
+
+    // We compute both hints values because the optimal number of streams are computed based on ov::Model
+    // while we export model in cpu internal opset so we need to save precomputed optimal # streams for both hint modes
+    const auto latency_hints = getNumStreamsLatency();
+    const auto tput_hints = getNumStreamsThroughput();
+
+    // save hints parameters to model rt_info
+    ov::AnyMap hints_props;
+    const auto latency_name = std::string(CONFIG_VALUE(LATENCY)) + "_" + std::string(ov::num_streams.name());
+    const auto tput_name = std::string(CONFIG_VALUE(THROUGHPUT)) + "_" + std::string(ov::num_streams.name());
+    hints_props.insert({latency_name, latency_hints.second});
+    hints_props.insert({tput_name, tput_hints.second});
+    ngraphFunc->set_rt_info(hints_props, "intel_cpu_hints_config");
+
+    const auto perf_hint_name = getPerfHintName();
+    if (perf_hint_name == CONFIG_VALUE(LATENCY)) {
+        config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = latency_hints.first;
+        config[ov::num_streams.name()] = latency_hints.second;
+    } else if (perf_hint_name == CONFIG_VALUE(THROUGHPUT)) {
+        config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = tput_hints.first;
+        config[ov::num_streams.name()] = tput_hints.first;
    }
 }

@@ -1024,6 +1049,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
                                                    RO_property(ov::range_for_streams.name()),
                                                    RO_property(ov::device::full_name.name()),
                                                    RO_property(ov::device::capabilities.name()),
+                                                    RO_property(ov::caching_properties.name()),
                                                    RO_property(ov::cache_dir.name())   // WA Can be removed after implementing snippet serialization.
        };
        // the whole config is RW before network is loaded.
@@ -1065,6 +1091,9 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
    } else if (name == ov::range_for_streams) {
        const std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
        return decltype(ov::range_for_streams)::value_type(range);
+    } else if (name == ov::caching_properties) {
+        std::vector<ov::PropertyName> cachingProperties;
+        return decltype(ov::caching_properties)::value_type(cachingProperties);
    }
    /* Internally legacy parameters are used with new API as part of migration procedure.
     * This fallback can be removed as soon as migration completed */
@@ -1136,6 +1165,22 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr
    Config conf = engConfig;
    conf.readProperties(config);

+    // import config props from caching model
+    auto function = cnnnetwork.getFunction();
+    if (function->has_rt_info("intel_cpu_hints_config") && !conf.perfHintsConfig.ovPerfHint.empty()) {
+        const auto mode_name = conf.perfHintsConfig.ovPerfHint;
+        if (mode_name == CONFIG_VALUE(LATENCY) || mode_name == CONFIG_VALUE(THROUGHPUT)) {
+            const auto& hints_config = function->get_rt_info<ov::AnyMap>("intel_cpu_hints_config");
+            const auto hints_param_name = mode_name + "_" + std::string(ov::num_streams.name());
+            const auto it = hints_config.find(hints_param_name);
+            if (it != hints_config.end()) {
+                conf.readProperties({{std::string(ov::num_streams.name()), it->second.as<std::string>()}});
+            } else {
+                IE_THROW() << "Cache file doesn't contain precalculated number of streams for mode " << mode_name;
+            }
+        }
+    }
+
    if (conf.enableDynamicBatch) {
        conf.batchLimit = static_cast<int>(cnnnetwork.getBatchSize());
    }
--- a/src/plugins/intel_cpu/tests/functional/behavior/export_import.cpp
+++ b/src/plugins/intel_cpu/tests/functional/behavior/export_import.cpp
@@ -0,0 +1,65 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-corer: Apache-2.0
+//
+
+#include "openvino/core/any.hpp"
+#include "openvino/runtime/core.hpp"
+#include "openvino/runtime/compiled_model.hpp"
+#include "openvino/runtime/properties.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "ngraph_functions/builders.hpp"
+
+
+#include <openvino/opsets/opset9.hpp>
+#include <ie/ie_core.hpp>
+
+namespace {
+
+class ExportImportTest : public CommonTestUtils::TestsCommon {};
+
+std::shared_ptr<ov::Model> MakeMatMulModel() {
+    const ov::Shape input_shape = {1, 4096};
+    const ov::element::Type precision = ov::element::f32;
+
+    auto params = ngraph::builder::makeParams(precision, {input_shape});
+    auto matmul_const = ngraph::builder::makeConstant(precision, {4096, 1024}, std::vector<float>{}, true);
+    auto matmul = ngraph::builder::makeMatMul(params[0], matmul_const);
+
+    auto add_const = ngraph::builder::makeConstant(precision, {1, 1024}, std::vector<float>{}, true);
+    auto add = ngraph::builder::makeEltwise(matmul, add_const, ngraph::helpers::EltwiseTypes::ADD);
+    auto softmax = std::make_shared<ov::opset9::Softmax>(add);
+
+    ngraph::NodeVector results{softmax};
+    return std::make_shared<ov::Model>(results, params, "MatMulModel");
+}
+
+TEST(ExportImportTest, ExportOptimalNumStreams) {
+    auto original_model = MakeMatMulModel();
+    std::string deviceName = "CPU";
+    ov::Core core;
+    auto tput_mode = ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT);
+    auto latency_mode = ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY);
+
+    auto original_tp_network = core.compile_model(original_model, deviceName, tput_mode);
+    auto original_latency_network = core.compile_model(original_model, deviceName, latency_mode);
+
+    auto nstreams_tp_original = original_tp_network.get_property(ov::num_streams.name()).as<std::string>();
+    auto nstreams_latency_original = original_latency_network.get_property(ov::num_streams.name()).as<std::string>();
+
+    std::stringstream exported_stream;
+    original_tp_network.export_model(exported_stream);
+    {
+        std::stringstream ss(exported_stream.str());
+        auto imported_tp_network = core.import_model(ss, deviceName, tput_mode);
+        auto nstreams_tp_imported = imported_tp_network.get_property(ov::num_streams.name()).as<std::string>();
+        EXPECT_EQ(nstreams_tp_original, nstreams_tp_imported);
+    }
+
+    {
+        std::stringstream ss(exported_stream.str());
+        auto imported_latency_network = core.import_model(ss, deviceName, latency_mode);
+        auto nstreams_latency_imported = imported_latency_network.get_property(ov::num_streams.name()).as<std::string>();
+        EXPECT_EQ(nstreams_latency_original, nstreams_latency_imported);
+    }
+}
+}  // namespace
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -164,6 +164,9 @@ std::vector<std::string> disabledTestPatterns() {
        // is shared across plugins
        // passed local test and cpu has specific test cases with nms9 to cover
        R"(smoke_NmsLayerTest.*)",
+        // Issue: 95239
+        // HETERO plugin lacks caching_properties definition
+        R"(smoke_Hetero_CachingSupportCase.*)",
        // 94982. FP32->I32 conversion issue in the reference implementation. There can be some garbage in the rest of float values like 0.333333745.
        // The kernel does not have such garbage. The diff 0.000000745 is taken into account in calculations and affects further type conversion.
        // Reorder->GridSample->Reorder also does not work here. Potential fix is to use nearest conversion instead of truncation.
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -86,6 +86,8 @@ std::vector<std::string> disabledTestPatterns() {
        // TODO: Issue: 71068
        R"(.*OVInferRequestCancellationTests.*)",
        // TODO: Issue: 71070
-        R"(.*OVInferenceChaining.*(StaticOutputToStaticInput).*)"
+        R"(.*OVInferenceChaining.*(StaticOutputToStaticInput).*)",
+        // TODO: Issue: 95234
+        R"(.*smoke_CachingSupportCase_GNA.*)"
    };
 }
--- a/src/tests/functional/inference_engine/caching_test.cpp
+++ b/src/tests/functional/inference_engine/caching_test.cpp
@@ -35,6 +35,8 @@
 #include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
 #include "cpp/ie_plugin.hpp"

+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
+
 using namespace InferenceEngine;
 using namespace ::testing;
 using namespace InferenceEngine::details;
@@ -652,10 +654,26 @@ TEST_P(CachingTest, TestChangeLoadConfig) {
    EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
    EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
    EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
+    EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
+    ON_CALL(*mockPlugin, GetMetric(ov::supported_properties.name(), _)).
+                WillByDefault(Invoke([&](const std::string &, const std::map<std::string, Parameter> &) {
+            return std::vector<ov::PropertyName>{
+                ov::supported_properties.name(),
+                METRIC_KEY(IMPORT_EXPORT_SUPPORT),
+                ov::device::capabilities.name(),
+                ov::device::architecture.name(),
+                ov::caching_properties.name()};
+    }));
+    ON_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).
+            WillByDefault(Invoke([&](const std::string &, const std::map<std::string, Parameter> &) {
+        std::vector<ov::PropertyName> res;
+        res.push_back(ov::PropertyName(CUSTOM_KEY, ov::PropertyMutability::RO));
+        return decltype(ov::caching_properties)::value_type(res);
+    }));
    ON_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), _)).
            WillByDefault(Invoke([&](const std::string &, const std::map<std::string, Parameter> &) {
        std::vector<std::string> res;
-        res.push_back(CUSTOM_KEY);
+        res.push_back(ov::caching_properties.name());
        return res;
    }));
    {
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp
@@ -13,6 +13,7 @@

 #include "ngraph_functions/builders.hpp"
 #include "ngraph_functions/subgraph_builders.hpp"
+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"

 #define GTEST_COUT std::cout << "[          ] [ INFO ] "

@@ -184,6 +185,9 @@ void CompileModelCacheTestBase::run() {
        GTEST_COUT << "Plugin doesn't support import and export - skipping test" << std::endl;
        GTEST_SKIP();
    }
+    if (importExportSupported(*core)) {
+        ASSERT_NO_THROW(core->get_property(targetDevice, ov::caching_properties));
+    }
    configure_model();
    try {
        compiledModel = core->compile_model(function, targetDevice, configuration);