Generalize model cache reusing (#21492)

* Generalize model cache reusing

* Use .as<std::AnyMap>() to convert std::string to std::AnyMap

* Store driver version for different devices

* Use driver version as gpu compiled model runtime properties
This commit is contained in:
River Li 2023-12-19 23:50:05 +08:00 committed by GitHub
parent 294cc6668c
commit 7b1074b275
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 232 additions and 12 deletions

View File

@ -67,6 +67,26 @@ static constexpr Property<ov::threading::IStreamsExecutor::ThreadBindingType, Pr
*/
static constexpr Property<size_t, PropertyMutability::RW> threads_per_stream{"THREADS_PER_STREAM"};
/**
* @brief It contains compiled_model_runtime_properties information to make plugin runtime can check whether it is
* compatible with the cached compiled model, the result is returned by get_property() calling.
*
* The information details are defined by plugin itself, each plugin may require different runtime contents.
* For example, CPU plugin will contain OV version, while GPU plugin will contain OV and GPU driver version, etc.
* Core doesn't understand its content and only read it from plugin and write it into blob header.
*
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<std::string, PropertyMutability::RO> compiled_model_runtime_properties{
"COMPILED_MODEL_RUNTIME_PROPERTIES"};
/**
* @brief Check whether the attached compiled_model_runtime_properties is supported by this device runtime.
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<bool, PropertyMutability::RO> compiled_model_runtime_properties_supported{
"COMPILED_MODEL_RUNTIME_PROPERTIES_SUPPORTED"};
} // namespace internal
OPENVINO_DEPRECATED(
"This property is deprecated and will be removed soon. Use ov::internal::caching_properties instead of it.")

View File

@ -190,9 +190,12 @@ std::string ModelCache::compute_hash(const std::string& modelStr,
CompiledBlobHeader::CompiledBlobHeader() {}
CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo)
CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion,
const std::string& fileInfo,
const std::string& runtimeInfo)
: m_ieVersion(ieVersion),
m_fileInfo(fileInfo) {}
m_fileInfo(fileInfo),
m_runtimeInfo(runtimeInfo) {}
std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
std::string xmlStr;
@ -208,6 +211,7 @@ std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
pugi::xml_node compiledBlobNode = document.document_element();
header.m_ieVersion = pugixml::utils::GetStrAttr(compiledBlobNode, "ie_version");
header.m_fileInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "file_info");
header.m_runtimeInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "runtime_info");
return stream;
}
@ -217,6 +221,7 @@ std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header)
auto compiledBlobNode = document.append_child("compiled_blob");
compiledBlobNode.append_attribute("ie_version").set_value(header.m_ieVersion.c_str());
compiledBlobNode.append_attribute("file_info").set_value(header.m_fileInfo.c_str());
compiledBlobNode.append_attribute("runtime_info").set_value(header.m_runtimeInfo.c_str());
document.save(stream, nullptr, pugi::format_raw);
document.reset();

View File

@ -31,10 +31,11 @@ struct ModelCache final {
class CompiledBlobHeader final {
std::string m_ieVersion;
std::string m_fileInfo;
std::string m_runtimeInfo;
public:
CompiledBlobHeader();
CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo);
CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, const std::string& runtimeInfo);
const std::string& getIeVersion() const {
return m_ieVersion;
@ -44,6 +45,10 @@ public:
return m_fileInfo;
}
const std::string& getRuntimeInfo() const {
return m_runtimeInfo;
}
friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header);
friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header);

View File

@ -1436,9 +1436,15 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model_and_cache(const std::s
try {
// need to export network for further import from "cache"
OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::Export");
std::string compiled_model_runtime_properties;
if (device_supports_internal_property(plugin, ov::internal::compiled_model_runtime_properties.name())) {
compiled_model_runtime_properties =
plugin.get_property(ov::internal::compiled_model_runtime_properties.name(), {}).as<std::string>();
}
cacheContent.cacheManager->write_cache_entry(cacheContent.blobId, [&](std::ostream& networkStream) {
networkStream << ov::CompiledBlobHeader(InferenceEngine::GetInferenceEngineVersion()->buildNumber,
ov::ModelCache::calculate_file_info(cacheContent.modelPath));
ov::ModelCache::calculate_file_info(cacheContent.modelPath),
compiled_model_runtime_properties);
execNetwork->export_model(networkStream);
});
} catch (...) {
@ -1467,14 +1473,25 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
try {
ov::CompiledBlobHeader header;
networkStream >> header;
if (header.getIeVersion() != ov::get_openvino_version().buildNumber) {
// Build number mismatch, don't use this cache
OPENVINO_THROW("Version does not match");
}
if (header.getFileInfo() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
// Original file is changed, don't use cache
OPENVINO_THROW("Original model file is changed");
}
if (util::contains(plugin.get_property(ov::internal::supported_properties),
ov::internal::compiled_model_runtime_properties_supported.name())) {
ov::AnyMap compiled_model_runtime_properties = {
{ov::internal::compiled_model_runtime_properties.name(), std::string(header.getRuntimeInfo())}};
auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(),
compiled_model_runtime_properties);
if (!res.as<bool>()) {
OPENVINO_THROW("Original model runtime properties have been changed, not supported anymore!");
}
} else {
if (header.getIeVersion() != ov::get_openvino_version().buildNumber) {
// Build number mismatch, don't use this cache
OPENVINO_THROW("Version does not match");
}
}
} catch (...) {
throw HeaderException();
}

View File

@ -1703,6 +1703,99 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
}
}
TEST_P(CachingTest, TestCacheFileWithCompiledModelRuntimeProperties) {
EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::capability::EXPORT_IMPORT, _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::capabilities.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _))
.Times(AnyNumber())
.WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap&) {
return std::vector<ov::PropertyName>{ov::internal::caching_properties.name(),
ov::internal::compiled_model_runtime_properties.name(),
ov::internal::compiled_model_runtime_properties_supported.name()};
}));
const std::string compiled_model_runtime_properties("Mock compiled model format segment.");
EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_runtime_properties.name(), _))
.Times(AtLeast(1))
.WillRepeatedly(Return(compiled_model_runtime_properties));
EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_runtime_properties_supported.name(), _))
.Times(AtLeast(1))
.WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap& options) {
auto it = options.find(ov::internal::compiled_model_runtime_properties.name());
ov::Any ret = true;
if (it == options.end() || it->second.as<std::string>() != compiled_model_runtime_properties)
ret = false;
return ret;
}));
{
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
.Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0);
m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
EXPECT_CALL(net, export_model(_)).Times(1);
});
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
{
auto blobs = ov::test::utils::listFilesWithExt(m_cacheDir, "blob");
for (const auto& fileName : blobs) {
std::string content;
{
std::ifstream inp(fileName, std::ios_base::binary);
std::ostringstream ostr;
ostr << inp.rdbuf();
content = ostr.str();
}
auto index = content.find(compiled_model_runtime_properties.c_str());
std::string new_compiled_model_runtime_properties(compiled_model_runtime_properties.size(), '0');
if (index != std::string::npos) {
content.replace(index, compiled_model_runtime_properties.size(), new_compiled_model_runtime_properties);
} else {
return; // skip test
}
std::ofstream out(fileName, std::ios_base::binary);
out.write(content.c_str(), static_cast<std::streamsize>(content.size()));
}
}
m_post_mock_net_callbacks.pop_back();
{ // Step 2. compiled_model_runtime_properties mismatch, cache will be silently removed
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
.Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0);
m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
EXPECT_CALL(net, export_model(_)).Times(1);
});
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
m_post_mock_net_callbacks.pop_back();
{ // Step 3: same load, should be ok now due to re-creation of cache
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(!m_remoteContext ? 1 : 0);
for (auto& net : comp_models) {
EXPECT_CALL(*net, export_model(_)).Times(0);
}
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
}
TEST_P(CachingTest, LoadHetero_NoCacheMetric) {
EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _))

View File

@ -175,6 +175,8 @@ Engine::Engine() :
#if defined(OV_CPU_WITH_ACL)
scheduler_guard = SchedulerGuard::instance();
#endif
auto& ov_version = ov::get_openvino_version();
m_compiled_model_runtime_properties["OV_VERSION"] = std::string(ov_version.buildNumber);
}
Engine::~Engine() {
@ -690,6 +692,26 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options)
return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests);
} else if (name == ov::hint::execution_mode) {
return engConfig.executionMode;
} else if (name == ov::internal::compiled_model_runtime_properties.name()) {
auto model_runtime_properties = ov::Any(m_compiled_model_runtime_properties);
return decltype(ov::internal::compiled_model_runtime_properties)::value_type(
std::move(model_runtime_properties.as<std::string>()));
} else if (name == ov::internal::compiled_model_runtime_properties_supported.name()) {
ov::Any res = true;
auto it = options.find(ov::internal::compiled_model_runtime_properties.name());
if (it == options.end()) {
res = false;
} else {
ov::AnyMap input_map = it->second.as<ov::AnyMap>();
for (auto& item : m_compiled_model_runtime_properties) {
auto it = input_map.find(item.first);
if (it == input_map.end() || it->second.as<std::string>() != item.second.as<std::string>()) {
res = false;
break;
}
}
}
return res;
}
return get_ro_property(name, options);
}
@ -740,7 +762,9 @@ ov::Any Engine::get_metric_legacy(const std::string& name, const ov::AnyMap& opt
} else if (ov::internal::supported_properties.name() == name) {
return decltype(ov::internal::supported_properties)::value_type{
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}};
} else if (name == ov::internal::caching_properties) {
std::vector<ov::PropertyName> cachingProperties = {ov::device::full_name.name()};
return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties));
@ -798,7 +822,9 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio
} else if (ov::internal::supported_properties == name) {
return decltype(ov::internal::supported_properties)::value_type{
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}};
} else if (name == ov::device::full_name) {
return decltype(ov::device::full_name)::value_type(deviceFullName);
} else if (name == ov::available_devices) {

View File

@ -67,6 +67,7 @@ private:
So track if streams is set explicitly (not auto-configured) */
bool streamsExplicitlySetForEngine = false;
const std::string deviceFullName;
ov::AnyMap m_compiled_model_runtime_properties;
std::shared_ptr<void> specialSetup;

View File

@ -19,6 +19,7 @@ private:
std::string m_default_device_id = "0";
std::map<std::string, cldnn::device::ptr> m_device_map;
std::map<std::string, ExecutionConfig> m_configs_map;
ov::AnyMap m_compiled_model_runtime_properties;
mutable std::map<std::string, std::shared_ptr<RemoteContextImpl>> m_default_contexts;
mutable std::once_flag m_default_contexts_once;

View File

@ -174,6 +174,10 @@ Plugin::Plugin() {
for (const auto& device : m_device_map) {
m_configs_map.insert({device.first, ExecutionConfig(ov::device::id(device.first))});
}
// Set common info for compiled_model_runtime_properties
auto& ov_version = ov::get_openvino_version();
m_compiled_model_runtime_properties["OV_VERSION"] = ov_version.buildNumber;
}
std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model, const ov::AnyMap& orig_config) const {
@ -336,6 +340,53 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
return decltype(ov::internal::caching_properties)::value_type(get_caching_properties());
}
ov::AnyMap actual_runtime_info;
auto prepare_actual_runtime_info = [&]() {
// Suppose all devices share the same version driver.
auto device_id = m_default_device_id;
OPENVINO_ASSERT(m_device_map.find(device_id) != m_device_map.end(),
"[GPU] compiled_model_runtime_properties: Couldn't find device for GPU with id ",
device_id);
actual_runtime_info["DRIVER_VERSION"] = m_device_map.at(device_id)->get_info().driver_version;
// More items can be inserted if needed
};
// Below properties depend on the device ID.
if (name == ov::internal::compiled_model_runtime_properties.name()) {
prepare_actual_runtime_info();
auto model_runtime_info = m_compiled_model_runtime_properties;
// Set specified device info for compiled_model_runtime_properties
model_runtime_info.insert(actual_runtime_info.begin(), actual_runtime_info.end());
auto model_format = ov::Any(model_runtime_info);
return decltype(ov::internal::compiled_model_runtime_properties)::value_type(
std::move(model_format.as<std::string>()));
} else if (name == ov::internal::compiled_model_runtime_properties_supported.name()) {
ov::Any res = true;
prepare_actual_runtime_info();
auto it = options.find(ov::internal::compiled_model_runtime_properties.name());
if (it == options.end()) {
res = false;
return res;
}
ov::AnyMap input_map = it->second.as<ov::AnyMap>();
// Check common info of compiled_model_runtime_properties
for (auto& item : m_compiled_model_runtime_properties) {
auto it = input_map.find(item.first);
if (it == input_map.end() || it->second.as<std::string>() != item.second.as<std::string>()) {
res = false;
return res;
}
}
// Check specified device info of compiled_model_runtime_properties
for (const auto& it : actual_runtime_info) {
auto item = input_map.find(it.first);
if (item == input_map.end() || item->second.as<std::string>() != it.second.as<std::string>()) {
res = false;
break;
}
}
return res;
}
OPENVINO_SUPPRESS_DEPRECATED_START
if (name == METRIC_KEY(SUPPORTED_METRICS)) {
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, LegacyAPIHelper::get_supported_metrics());
@ -520,7 +571,6 @@ std::vector<ov::PropertyName> Plugin::get_caching_properties() const {
static const std::vector<ov::PropertyName> caching_properties = {
ov::PropertyName{ov::device::architecture.name(), PropertyMutability::RO},
ov::PropertyName{ov::intel_gpu::execution_units_count.name(), PropertyMutability::RO},
ov::PropertyName{ov::intel_gpu::driver_version.name(), PropertyMutability::RO},
ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RW},
};
@ -576,7 +626,9 @@ std::vector<ov::PropertyName> Plugin::get_supported_internal_properties() const
static const std::vector<ov::PropertyName> supported_internal_properties = {
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::config_device_id.name(), ov::PropertyMutability::WO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}};
return supported_internal_properties;
}