[CPU] ModelCaching: added plugin specific config properties serialization (#13593)

This commit is contained in:
Vladislav Golubev
2022-11-11 18:27:22 +01:00
committed by GitHub
parent cb067de597
commit cec772f2c0
9 changed files with 193 additions and 35 deletions

View File

@@ -107,3 +107,14 @@ DECLARE_CONFIG_KEY(CONFIG_DEVICE_ID);
} // namespace PluginConfigInternalParams
} // namespace InferenceEngine
namespace ov {
/**
* @brief Read-only property to get a std::vector<PropertyName> of properties
* which should affect the hash calculation for model cache
* @ingroup ie_dev_api_plugin_api
*/
static constexpr Property<std::vector<PropertyName>, PropertyMutability::RO> caching_properties{"CACHING_PROPERTIES"};
} // namespace ov

View File

@@ -490,26 +490,24 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this<ie::ICore
const std::string& deviceFamily,
const std::map<std::string, std::string>& origConfig) const {
std::map<std::string, Any> getMetricConfig;
auto compileConfig = origConfig;
std::map<std::string, std::string> compileConfig;
// 0. Remove TARGET_FALLBACK key, move it to getMetricConfig
auto targetFallbackIt = compileConfig.find("TARGET_FALLBACK");
if (targetFallbackIt == compileConfig.end()) {
targetFallbackIt = compileConfig.find(ov::device::priorities.name());
// 0. Move TARGET_FALLBACK key to getMetricConfig
auto targetFallbackIt = origConfig.find("TARGET_FALLBACK");
if (targetFallbackIt == origConfig.end()) {
targetFallbackIt = origConfig.find(ov::device::priorities.name());
}
if (targetFallbackIt != compileConfig.end()) {
if (targetFallbackIt != origConfig.end()) {
getMetricConfig[targetFallbackIt->first] = targetFallbackIt->second;
compileConfig.erase(targetFallbackIt);
}
// 1. remove DEVICE_ID key
auto deviceIt = compileConfig.find(ov::device::id.name());
if (deviceIt != compileConfig.end()) {
// 1. Move DEVICE_ID key to getMetricConfig
auto deviceIt = origConfig.find(ov::device::id.name());
if (deviceIt != origConfig.end()) {
getMetricConfig[deviceIt->first] = deviceIt->second;
compileConfig.erase(deviceIt);
}
// 2. replace it with DEVICE_ARCHITECTURE value
// 2. Replace it with DEVICE_ARCHITECTURE value
if (DeviceSupportsConfigKey(plugin, ov::device::architecture.name())) {
compileConfig[ov::device::architecture.name()] =
plugin.get_property(ov::device::architecture, getMetricConfig);
@@ -517,6 +515,17 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this<ie::ICore
// Take device name if device does not support DEVICE_ARCHITECTURE metric
compileConfig[ov::device::architecture.name()] = deviceFamily;
}
// 3. Extract config keys which affect compile config
if (DeviceSupportsConfigKey(plugin, ov::caching_properties.name())) {
auto cachingProps = plugin.get_property(ov::caching_properties);
for (const auto& prop : cachingProps) {
// origConfig values have higher priority than plugin parameters
auto it = origConfig.find(prop);
compileConfig[prop] =
it == origConfig.end() ? plugin.get_property(prop, {}).as<std::string>() : it->second;
}
}
return compileConfig;
}

View File

@@ -53,6 +53,7 @@ struct Config {
void readProperties(const std::map<std::string, std::string> &config);
void updateProperties();
std::map<std::string, std::string> _config;
#ifdef CPU_DEBUG_CAPS

View File

@@ -703,26 +703,11 @@ static bool streamsSet(const std::map<std::string, std::string>& config) {
}
void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, const std::shared_ptr<ngraph::Function>& ngraphFunc) const {
const bool streamsExplicitlySetForModel = streamsSet(config);
// checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via SetConfig)
if (streamsExplicitlySetForModel ||
streamsExplicitlySetForEngine)
return;
auto getNumStreamsLatency = [&]() {
return std::pair<std::string, std::string>(CONFIG_VALUE(CPU_THROUGHPUT_NUMA), ov::util::to_string(ov::streams::NUMA));
};
const auto& mode = config.find(CONFIG_KEY(PERFORMANCE_HINT));
// the mode may have just arrived to the LoadNetwork, or was set with the plugin's SetConfig
if (mode == config.end() && engConfig.perfHintsConfig.ovPerfHint.empty())
return;
/* performance hints set for network has higher pririty than engine ones.
* This applies for all the configuration parameters */
const auto mode_name = (mode != config.end()) ?
PerfHintsConfig::CheckPerformanceHintValue(mode->second) :
engConfig.perfHintsConfig.ovPerfHint;
if (mode_name == CONFIG_VALUE(LATENCY)) {
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = CONFIG_VALUE(CPU_THROUGHPUT_NUMA);
config[ov::num_streams.name()] = ov::util::to_string(ov::streams::NUMA);
} else if (mode_name == CONFIG_VALUE(THROUGHPUT)) {
auto getNumStreamsThroughput = [&]() {
const auto isa = dnnl::get_effective_cpu_isa();
float isaSpecificThreshold = 1.0f;
switch (isa) {
@@ -797,8 +782,48 @@ void Engine::ApplyPerformanceHints(std::map<std::string, std::string> &config, c
num_streams = std::min(num_streams,
engConfig.perfHintsConfig.ovPerfHintNumRequests);
}
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams);
config[ov::num_streams.name()] = ov::util::to_string(num_streams);
return std::pair<std::string, std::string>(std::to_string(num_streams), ov::util::to_string(num_streams));
};
auto getPerfHintName = [&]() {
const bool streamsExplicitlySetForModel = streamsSet(config);
// checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via SetConfig)
if (streamsExplicitlySetForModel ||
streamsExplicitlySetForEngine)
return std::string();
const auto& perf_hint = config.find(CONFIG_KEY(PERFORMANCE_HINT));
// the perf_hint may have just arrived to the LoadNetwork, or was set with the plugin's SetConfig
if (perf_hint == config.end() && engConfig.perfHintsConfig.ovPerfHint.empty())
return std::string();
/* performance hints set for network has higher pririty than engine ones.
* This applies for all the configuration parameters */
const auto perf_hint_name = (perf_hint != config.end()) ?
PerfHintsConfig::CheckPerformanceHintValue(perf_hint->second) :
engConfig.perfHintsConfig.ovPerfHint;
return perf_hint_name;
};
// We compute both hints values because the optimal number of streams are computed based on ov::Model
// while we export model in cpu internal opset so we need to save precomputed optimal # streams for both hint modes
const auto latency_hints = getNumStreamsLatency();
const auto tput_hints = getNumStreamsThroughput();
// save hints parameters to model rt_info
ov::AnyMap hints_props;
const auto latency_name = std::string(CONFIG_VALUE(LATENCY)) + "_" + std::string(ov::num_streams.name());
const auto tput_name = std::string(CONFIG_VALUE(THROUGHPUT)) + "_" + std::string(ov::num_streams.name());
hints_props.insert({latency_name, latency_hints.second});
hints_props.insert({tput_name, tput_hints.second});
ngraphFunc->set_rt_info(hints_props, "intel_cpu_hints_config");
const auto perf_hint_name = getPerfHintName();
if (perf_hint_name == CONFIG_VALUE(LATENCY)) {
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = latency_hints.first;
config[ov::num_streams.name()] = latency_hints.second;
} else if (perf_hint_name == CONFIG_VALUE(THROUGHPUT)) {
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = tput_hints.first;
config[ov::num_streams.name()] = tput_hints.first;
}
}
@@ -1024,6 +1049,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
RO_property(ov::range_for_streams.name()),
RO_property(ov::device::full_name.name()),
RO_property(ov::device::capabilities.name()),
RO_property(ov::caching_properties.name()),
RO_property(ov::cache_dir.name()) // WA Can be removed after implementing snippet serialization.
};
// the whole config is RW before network is loaded.
@@ -1065,6 +1091,9 @@ Parameter Engine::GetMetric(const std::string& name, const std::map<std::string,
} else if (name == ov::range_for_streams) {
const std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
return decltype(ov::range_for_streams)::value_type(range);
} else if (name == ov::caching_properties) {
std::vector<ov::PropertyName> cachingProperties;
return decltype(ov::caching_properties)::value_type(cachingProperties);
}
/* Internally legacy parameters are used with new API as part of migration procedure.
* This fallback can be removed as soon as migration completed */
@@ -1136,6 +1165,22 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr
Config conf = engConfig;
conf.readProperties(config);
// import config props from caching model
auto function = cnnnetwork.getFunction();
if (function->has_rt_info("intel_cpu_hints_config") && !conf.perfHintsConfig.ovPerfHint.empty()) {
const auto mode_name = conf.perfHintsConfig.ovPerfHint;
if (mode_name == CONFIG_VALUE(LATENCY) || mode_name == CONFIG_VALUE(THROUGHPUT)) {
const auto& hints_config = function->get_rt_info<ov::AnyMap>("intel_cpu_hints_config");
const auto hints_param_name = mode_name + "_" + std::string(ov::num_streams.name());
const auto it = hints_config.find(hints_param_name);
if (it != hints_config.end()) {
conf.readProperties({{std::string(ov::num_streams.name()), it->second.as<std::string>()}});
} else {
IE_THROW() << "Cache file doesn't contain precalculated number of streams for mode " << mode_name;
}
}
}
if (conf.enableDynamicBatch) {
conf.batchLimit = static_cast<int>(cnnnetwork.getBatchSize());
}

View File

@@ -0,0 +1,65 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-corer: Apache-2.0
//
#include "openvino/core/any.hpp"
#include "openvino/runtime/core.hpp"
#include "openvino/runtime/compiled_model.hpp"
#include "openvino/runtime/properties.hpp"
#include "common_test_utils/test_common.hpp"
#include "ngraph_functions/builders.hpp"
#include <openvino/opsets/opset9.hpp>
#include <ie/ie_core.hpp>
namespace {
class ExportImportTest : public CommonTestUtils::TestsCommon {};
std::shared_ptr<ov::Model> MakeMatMulModel() {
const ov::Shape input_shape = {1, 4096};
const ov::element::Type precision = ov::element::f32;
auto params = ngraph::builder::makeParams(precision, {input_shape});
auto matmul_const = ngraph::builder::makeConstant(precision, {4096, 1024}, std::vector<float>{}, true);
auto matmul = ngraph::builder::makeMatMul(params[0], matmul_const);
auto add_const = ngraph::builder::makeConstant(precision, {1, 1024}, std::vector<float>{}, true);
auto add = ngraph::builder::makeEltwise(matmul, add_const, ngraph::helpers::EltwiseTypes::ADD);
auto softmax = std::make_shared<ov::opset9::Softmax>(add);
ngraph::NodeVector results{softmax};
return std::make_shared<ov::Model>(results, params, "MatMulModel");
}
TEST(ExportImportTest, ExportOptimalNumStreams) {
auto original_model = MakeMatMulModel();
std::string deviceName = "CPU";
ov::Core core;
auto tput_mode = ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT);
auto latency_mode = ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY);
auto original_tp_network = core.compile_model(original_model, deviceName, tput_mode);
auto original_latency_network = core.compile_model(original_model, deviceName, latency_mode);
auto nstreams_tp_original = original_tp_network.get_property(ov::num_streams.name()).as<std::string>();
auto nstreams_latency_original = original_latency_network.get_property(ov::num_streams.name()).as<std::string>();
std::stringstream exported_stream;
original_tp_network.export_model(exported_stream);
{
std::stringstream ss(exported_stream.str());
auto imported_tp_network = core.import_model(ss, deviceName, tput_mode);
auto nstreams_tp_imported = imported_tp_network.get_property(ov::num_streams.name()).as<std::string>();
EXPECT_EQ(nstreams_tp_original, nstreams_tp_imported);
}
{
std::stringstream ss(exported_stream.str());
auto imported_latency_network = core.import_model(ss, deviceName, latency_mode);
auto nstreams_latency_imported = imported_latency_network.get_property(ov::num_streams.name()).as<std::string>();
EXPECT_EQ(nstreams_latency_original, nstreams_latency_imported);
}
}
} // namespace

View File

@@ -164,6 +164,9 @@ std::vector<std::string> disabledTestPatterns() {
// is shared across plugins
// passed local test and cpu has specific test cases with nms9 to cover
R"(smoke_NmsLayerTest.*)",
// Issue: 95239
// HETERO plugin lacks caching_properties definition
R"(smoke_Hetero_CachingSupportCase.*)",
// 94982. FP32->I32 conversion issue in the reference implementation. There can be some garbage in the rest of float values like 0.333333745.
// The kernel does not have such garbage. The diff 0.000000745 is taken into account in calculations and affects further type conversion.
// Reorder->GridSample->Reorder also does not work here. Potential fix is to use nearest conversion instead of truncation.

View File

@@ -86,6 +86,8 @@ std::vector<std::string> disabledTestPatterns() {
// TODO: Issue: 71068
R"(.*OVInferRequestCancellationTests.*)",
// TODO: Issue: 71070
R"(.*OVInferenceChaining.*(StaticOutputToStaticInput).*)"
R"(.*OVInferenceChaining.*(StaticOutputToStaticInput).*)",
// TODO: Issue: 95234
R"(.*smoke_CachingSupportCase_GNA.*)"
};
}

View File

@@ -35,6 +35,8 @@
#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp"
#include "cpp/ie_plugin.hpp"
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
using namespace InferenceEngine;
using namespace ::testing;
using namespace InferenceEngine::details;
@@ -652,10 +654,26 @@ TEST_P(CachingTest, TestChangeLoadConfig) {
EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_METRICS), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).Times(AnyNumber());
ON_CALL(*mockPlugin, GetMetric(ov::supported_properties.name(), _)).
WillByDefault(Invoke([&](const std::string &, const std::map<std::string, Parameter> &) {
return std::vector<ov::PropertyName>{
ov::supported_properties.name(),
METRIC_KEY(IMPORT_EXPORT_SUPPORT),
ov::device::capabilities.name(),
ov::device::architecture.name(),
ov::caching_properties.name()};
}));
ON_CALL(*mockPlugin, GetMetric(ov::caching_properties.name(), _)).
WillByDefault(Invoke([&](const std::string &, const std::map<std::string, Parameter> &) {
std::vector<ov::PropertyName> res;
res.push_back(ov::PropertyName(CUSTOM_KEY, ov::PropertyMutability::RO));
return decltype(ov::caching_properties)::value_type(res);
}));
ON_CALL(*mockPlugin, GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), _)).
WillByDefault(Invoke([&](const std::string &, const std::map<std::string, Parameter> &) {
std::vector<std::string> res;
res.push_back(CUSTOM_KEY);
res.push_back(ov::caching_properties.name());
return res;
}));
{

View File

@@ -13,6 +13,7 @@
#include "ngraph_functions/builders.hpp"
#include "ngraph_functions/subgraph_builders.hpp"
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
#define GTEST_COUT std::cout << "[ ] [ INFO ] "
@@ -184,6 +185,9 @@ void CompileModelCacheTestBase::run() {
GTEST_COUT << "Plugin doesn't support import and export - skipping test" << std::endl;
GTEST_SKIP();
}
if (importExportSupported(*core)) {
ASSERT_NO_THROW(core->get_property(targetDevice, ov::caching_properties));
}
configure_model();
try {
compiledModel = core->compile_model(function, targetDevice, configuration);