[CPU] ModelCaching: added plugin specific config properties serialization (#13593)
This commit is contained in:
committed by
GitHub
parent
cb067de597
commit
cec772f2c0
@@ -107,3 +107,14 @@ DECLARE_CONFIG_KEY(CONFIG_DEVICE_ID);
|
||||
} // namespace PluginConfigInternalParams
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
||||
namespace ov {
|
||||
|
||||
/**
|
||||
* @brief Read-only property to get a std::vector<PropertyName> of properties
|
||||
* which should affect the hash calculation for model cache
|
||||
* @ingroup ie_dev_api_plugin_api
|
||||
*/
|
||||
static constexpr Property<std::vector<PropertyName>, PropertyMutability::RO> caching_properties{"CACHING_PROPERTIES"};
|
||||
|
||||
} // namespace ov
|
||||
|
||||
@@ -490,26 +490,24 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this<ie::ICore
|
||||
const std::string& deviceFamily,
|
||||
const std::map<std::string, std::string>& origConfig) const {
|
||||
std::map<std::string, Any> getMetricConfig;
|
||||
auto compileConfig = origConfig;
|
||||
std::map<std::string, std::string> compileConfig;
|
||||
|
||||
// 0. Remove TARGET_FALLBACK key, move it to getMetricConfig
|
||||
auto targetFallbackIt = compileConfig.find("TARGET_FALLBACK");
|
||||
if (targetFallbackIt == compileConfig.end()) {
|
||||
targetFallbackIt = compileConfig.find(ov::device::priorities.name());
|
||||
// 0. Move TARGET_FALLBACK key to getMetricConfig
|
||||
auto targetFallbackIt = origConfig.find("TARGET_FALLBACK");
|
||||
if (targetFallbackIt == origConfig.end()) {
|
||||
targetFallbackIt = origConfig.find(ov::device::priorities.name());
|
||||
}
|
||||
if (targetFallbackIt != compileConfig.end()) {
|
||||
if (targetFallbackIt != origConfig.end()) {
|
||||
getMetricConfig[targetFallbackIt->first] = targetFallbackIt->second;
|
||||
compileConfig.erase(targetFallbackIt);
|
||||
}
|
||||
|
||||
// 1. remove DEVICE_ID key
|
||||
auto deviceIt = compileConfig.find(ov::device::id.name());
|
||||
if (deviceIt != compileConfig.end()) {
|
||||
// 1. Move DEVICE_ID key to getMetricConfig
|
||||
auto deviceIt = origConfig.find(ov::device::id.name());
|
||||
if (deviceIt != origConfig.end()) {
|
||||
getMetricConfig[deviceIt->first] = deviceIt->second;
|
||||
compileConfig.erase(deviceIt);
|
||||
}
|
||||
|
||||
// 2. replace it with DEVICE_ARCHITECTURE value
|
||||
// 2. Replace it with DEVICE_ARCHITECTURE value
|
||||
if (DeviceSupportsConfigKey(plugin, ov::device::architecture.name())) {
|
||||
compileConfig[ov::device::architecture.name()] =
|
||||
plugin.get_property(ov::device::architecture, getMetricConfig);
|
||||
@@ -517,6 +515,17 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this<ie::ICore
|
||||
// Take device name if device does not support DEVICE_ARCHITECTURE metric
|
||||
compileConfig[ov::device::architecture.name()] = deviceFamily;
|
||||
}
|
||||
|
||||
// 3. Extract config keys which affect compile config
|
||||
if (DeviceSupportsConfigKey(plugin, ov::caching_properties.name())) {
|
||||
auto cachingProps = plugin.get_property(ov::caching_properties);
|
||||
for (const auto& prop : cachingProps) {
|
||||
// origConfig values have higher priority than plugin parameters
|
||||
auto it = origConfig.find(prop);
|
||||
compileConfig[prop] =
|
||||
it == origConfig.end() ? plugin.get_property(prop, {}).as<std::string>() : it->second;
|
||||
}
|
||||
}
|
||||
return compileConfig;
|
||||
}
|
||||
|
||||
|
||||
@@ -53,6 +53,7 @@ struct Config {
|
||||
|
||||
void readProperties(const std::map<std::string, std::string> &config);
|
||||
void updateProperties();
|
||||
|
||||
std::map<std::string, std::string> _config;
|
||||
|
||||
#ifdef CPU_DEBUG_CAPS
|
||||
|
||||
@@ -703,26 +703,11 @@ static bool streamsSet(const std::map<std::string, std::string>& config) {
|
||||
}
|
||||
|
||||
void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const {
|
||||
const bool streamsExplicitlySetForModel = streamsSet(config);
|
||||
// checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via SetConfig)
|
||||
if (streamsExplicitlySetForModel ||
|
||||
streamsExplicitlySetForEngine)
|
||||
return;
|
||||
auto getNumStreamsLatency = [&]() {
|
||||
return std::pair<std::string, std::string>(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), ov::util::to_string(ov::streams::NUMA));
|
||||
};
|
||||
|
||||
const auto& mode = config.find(CONFIG_KEY(PERFORMANCE_HINT));
|
||||
// the mode may have just arrived to the LoadNetwork, or was set with the plugin's SetConfig
|
||||
if (mode == config.end() && engConfig.perfHintsConfig.ovPerfHint.empty())
|
||||
return;
|
||||
/* performance hints set for network has higher pririty than engine ones.
|
||||
* This applies for all the configuration parameters */
|
||||
const auto mode_name = (mode != config.end()) ?
|
||||
PerfHintsConfig::CheckPerformanceHintValue(mode->second) :
|
||||
engConfig.perfHintsConfig.ovPerfHint;
|
||||
|
||||
if (mode_name == CONFIG_VALUE(LATENCY)) {
|
||||
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = CONFIG_VALUE(CPU_THROUGHPUT_NUMA);
|
||||
config[ov::num_streams.name()] = ov::util::to_string(ov::streams::NUMA);
|
||||
} else if (mode_name == CONFIG_VALUE(THROUGHPUT)) {
|
||||
auto getNumStreamsThroughput = [&]() {
|
||||
const auto isa = dnnl::get_effective_cpu_isa();
|
||||
float isaSpecificThreshold = 1.0f;
|
||||
switch (isa) {
|
||||
@@ -797,8 +782,48 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
|
||||
num_streams = std::min(num_streams,
|
||||
engConfig.perfHintsConfig.ovPerfHintNumRequests);
|
||||
}
|
||||
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams);
|
||||
config[ov::num_streams.name()] = ov::util::to_string(num_streams);
|
||||
return std::pair<std::string, std::string>(std::to_string(num_streams), ov::util::to_string(num_streams));
|
||||
};
|
||||
|
||||
auto getPerfHintName = [&]() {
|
||||
const bool streamsExplicitlySetForModel = streamsSet(config);
|
||||
// checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via SetConfig)
|
||||
if (streamsExplicitlySetForModel ||
|
||||
streamsExplicitlySetForEngine)
|
||||
return std::string();
|
||||
|
||||
const auto& perf_hint = config.find(CONFIG_KEY(PERFORMANCE_HINT));
|
||||
// the perf_hint may have just arrived to the LoadNetwork, or was set with the plugin's SetConfig
|
||||
if (perf_hint == config.end() && engConfig.perfHintsConfig.ovPerfHint.empty())
|
||||
return std::string();
|
||||
/* performance hints set for network has higher pririty than engine ones.
|
||||
* This applies for all the configuration parameters */
|
||||
const auto perf_hint_name = (perf_hint != config.end()) ?
|
||||
PerfHintsConfig::CheckPerformanceHintValue(perf_hint->second) :
|
||||
engConfig.perfHintsConfig.ovPerfHint;
|
||||
return perf_hint_name;
|
||||
};
|
||||
|
||||
// We compute both hints values because the optimal number of streams are computed based on ov::Model
|
||||
// while we export model in cpu internal opset so we need to save precomputed optimal # streams for both hint modes
|
||||
const auto latency_hints = getNumStreamsLatency();
|
||||
const auto tput_hints = getNumStreamsThroughput();
|
||||
|
||||
// save hints parameters to model rt_info
|
||||
ov::AnyMap hints_props;
|
||||
const auto latency_name = std::string(CONFIG_VALUE(LATENCY)) + "_" + std::string(ov::num_streams.name());
|
||||
const auto tput_name = std::string(CONFIG_VALUE(THROUGHPUT)) + "_" + std::string(ov::num_streams.name());
|
||||
hints_props.insert({latency_name, latency_hints.second});
|
||||
hints_props.insert({tput_name, tput_hints.second});
|
||||
ngraphFunc->set_rt_info(hints_props, "intel_cpu_hints_config");
|
||||
|
||||
const auto perf_hint_name = getPerfHintName();
|
||||
if (perf_hint_name == CONFIG_VALUE(LATENCY)) {
|
||||
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = latency_hints.first;
|
||||
config[ov::num_streams.name()] = latency_hints.second;
|
||||
} else if (perf_hint_name == CONFIG_VALUE(THROUGHPUT)) {
|
||||
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = tput_hints.first;
|
||||
config[ov::num_streams.name()] = tput_hints.first;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1024,6 +1049,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
|
||||
RO_property(ov::range_for_streams.name()),
|
||||
RO_property(ov::device::full_name.name()),
|
||||
RO_property(ov::device::capabilities.name()),
|
||||
RO_property(ov::caching_properties.name()),
|
||||
RO_property(ov::cache_dir.name()) // WA Can be removed after implementing snippet serialization.
|
||||
};
|
||||
// the whole config is RW before network is loaded.
|
||||
@@ -1065,6 +1091,9 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
|
||||
} else if (name == ov::range_for_streams) {
|
||||
const std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
|
||||
return decltype(ov::range_for_streams)::value_type(range);
|
||||
} else if (name == ov::caching_properties) {
|
||||
std::vector<ov::PropertyName> cachingProperties;
|
||||
return decltype(ov::caching_properties)::value_type(cachingProperties);
|
||||
}
|
||||
/* Internally legacy parameters are used with new API as part of migration procedure.
|
||||
* This fallback can be removed as soon as migration completed */
|
||||
@@ -1136,6 +1165,22 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr
|
||||
Config conf = engConfig;
|
||||
conf.readProperties(config);
|
||||
|
||||
// import config props from caching model
|
||||
auto function = cnnnetwork.getFunction();
|
||||
if (function->has_rt_info("intel_cpu_hints_config") && !conf.perfHintsConfig.ovPerfHint.empty()) {
|
||||
const auto mode_name = conf.perfHintsConfig.ovPerfHint;
|
||||
if (mode_name == CONFIG_VALUE(LATENCY) || mode_name == CONFIG_VALUE(THROUGHPUT)) {
|
||||
const auto& hints_config = function->get_rt_info<ov::AnyMap>("intel_cpu_hints_config");
|
||||
const auto hints_param_name = mode_name + "_" + std::string(ov::num_streams.name());
|
||||
const auto it = hints_config.find(hints_param_name);
|
||||
if (it != hints_config.end()) {
|
||||
conf.readProperties({{std::string(ov::num_streams.name()), it->second.as<std::string>()}});
|
||||
} else {
|
||||
IE_THROW() << "Cache file doesn't contain precalculated number of streams for mode " << mode_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (conf.enableDynamicBatch) {
|
||||
conf.batchLimit = static_cast<int>(cnnnetwork.getBatchSize());
|
||||
}
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-corer: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/core/any.hpp"
|
||||
#include "openvino/runtime/core.hpp"
|
||||
#include "openvino/runtime/compiled_model.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "common_test_utils/test_common.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
|
||||
#include <openvino/opsets/opset9.hpp>
|
||||
#include <ie/ie_core.hpp>
|
||||
|
||||
namespace {
|
||||
|
||||
class ExportImportTest : public CommonTestUtils::TestsCommon {};
|
||||
|
||||
std::shared_ptr<ov::Model> MakeMatMulModel() {
|
||||
const ov::Shape input_shape = {1, 4096};
|
||||
const ov::element::Type precision = ov::element::f32;
|
||||
|
||||
auto params = ngraph::builder::makeParams(precision, {input_shape});
|
||||
auto matmul_const = ngraph::builder::makeConstant(precision, {4096, 1024}, std::vector<float>{}, true);
|
||||
auto matmul = ngraph::builder::makeMatMul(params[0], matmul_const);
|
||||
|
||||
auto add_const = ngraph::builder::makeConstant(precision, {1, 1024}, std::vector<float>{}, true);
|
||||
auto add = ngraph::builder::makeEltwise(matmul, add_const, ngraph::helpers::EltwiseTypes::ADD);
|
||||
auto softmax = std::make_shared<ov::opset9::Softmax>(add);
|
||||
|
||||
ngraph::NodeVector results{softmax};
|
||||
return std::make_shared<ov::Model>(results, params, "MatMulModel");
|
||||
}
|
||||
|
||||
TEST(ExportImportTest, ExportOptimalNumStreams) {
|
||||
auto original_model = MakeMatMulModel();
|
||||
std::string deviceName = "CPU";
|
||||
ov::Core core;
|
||||
auto tput_mode = ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT);
|
||||
auto latency_mode = ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY);
|
||||
|
||||
auto original_tp_network = core.compile_model(original_model, deviceName, tput_mode);
|
||||
auto original_latency_network = core.compile_model(original_model, deviceName, latency_mode);
|
||||
|
||||
auto nstreams_tp_original = original_tp_network.get_property(ov::num_streams.name()).as<std::string>();
|
||||
auto nstreams_latency_original = original_latency_network.get_property(ov::num_streams.name()).as<std::string>();
|
||||
|
||||
std::stringstream exported_stream;
|
||||
original_tp_network.export_model(exported_stream);
|
||||
{
|
||||
std::stringstream ss(exported_stream.str());
|
||||
auto imported_tp_network = core.import_model(ss, deviceName, tput_mode);
|
||||
auto nstreams_tp_imported = imported_tp_network.get_property(ov::num_streams.name()).as<std::string>();
|
||||
EXPECT_EQ(nstreams_tp_original, nstreams_tp_imported);
|
||||
}
|
||||
|
||||
{
|
||||
std::stringstream ss(exported_stream.str());
|
||||
auto imported_latency_network = core.import_model(ss, deviceName, latency_mode);
|
||||
auto nstreams_latency_imported = imported_latency_network.get_property(ov::num_streams.name()).as<std::string>();
|
||||
EXPECT_EQ(nstreams_latency_original, nstreams_latency_imported);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
@@ -164,6 +164,9 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
// is shared across plugins
|
||||
// passed local test and cpu has specific test cases with nms9 to cover
|
||||
R"(smoke_NmsLayerTest.*)",
|
||||
// Issue: 95239
|
||||
// HETERO plugin lacks caching_properties definition
|
||||
R"(smoke_Hetero_CachingSupportCase.*)",
|
||||
// 94982. FP32->I32 conversion issue in the reference implementation. There can be some garbage in the rest of float values like 0.333333745.
|
||||
// The kernel does not have such garbage. The diff 0.000000745 is taken into account in calculations and affects further type conversion.
|
||||
// Reorder->GridSample->Reorder also does not work here. Potential fix is to use nearest conversion instead of truncation.
|
||||
|
||||
@@ -86,6 +86,8 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
// TODO: Issue: 71068
|
||||
R"(.*OVInferRequestCancellationTests.*)",
|
||||
// TODO: Issue: 71070
|
||||
R"(.*OVInferenceChaining.*(StaticOutputToStaticInput).*)"
|
||||
R"(.*OVInferenceChaining.*(StaticOutputToStaticInput).*)",
|
||||
// TODO: Issue: 95234
|
||||
R"(.*smoke_CachingSupportCase_GNA.*)"
|
||||
};
|
||||
}
|
||||
|
||||
@@ -35,6 +35,8 @@
|
||||
#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
|
||||
#include "cpp/ie_plugin.hpp"
|
||||
|
||||
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace ::testing;
|
||||
using namespace InferenceEngine::details;
|
||||
@@ -652,10 +654,26 @@ TEST_P(CachingTest, TestChangeLoadConfig) {
|
||||
EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
|
||||
EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
|
||||
EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
|
||||
EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
|
||||
ON_CALL(*mockPlugin, GetMetric(ov::supported_properties.name(), _)).
|
||||
WillByDefault(Invoke([&](const std::string &, const std::map<std::string, Parameter> &) {
|
||||
return std::vector<ov::PropertyName>{
|
||||
ov::supported_properties.name(),
|
||||
METRIC_KEY(IMPORT_EXPORT_SUPPORT),
|
||||
ov::device::capabilities.name(),
|
||||
ov::device::architecture.name(),
|
||||
ov::caching_properties.name()};
|
||||
}));
|
||||
ON_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).
|
||||
WillByDefault(Invoke([&](const std::string &, const std::map<std::string, Parameter> &) {
|
||||
std::vector<ov::PropertyName> res;
|
||||
res.push_back(ov::PropertyName(CUSTOM_KEY, ov::PropertyMutability::RO));
|
||||
return decltype(ov::caching_properties)::value_type(res);
|
||||
}));
|
||||
ON_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), _)).
|
||||
WillByDefault(Invoke([&](const std::string &, const std::map<std::string, Parameter> &) {
|
||||
std::vector<std::string> res;
|
||||
res.push_back(CUSTOM_KEY);
|
||||
res.push_back(ov::caching_properties.name());
|
||||
return res;
|
||||
}));
|
||||
{
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "ngraph_functions/subgraph_builders.hpp"
|
||||
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||
|
||||
#define GTEST_COUT std::cout << "[ ] [ INFO ] "
|
||||
|
||||
@@ -184,6 +185,9 @@ void CompileModelCacheTestBase::run() {
|
||||
GTEST_COUT << "Plugin doesn't support import and export - skipping test" << std::endl;
|
||||
GTEST_SKIP();
|
||||
}
|
||||
if (importExportSupported(*core)) {
|
||||
ASSERT_NO_THROW(core->get_property(targetDevice, ov::caching_properties));
|
||||
}
|
||||
configure_model();
|
||||
try {
|
||||
compiledModel = core->compile_model(function, targetDevice, configuration);
|
||||
|
||||
Reference in New Issue
Block a user