From 7b1074b275ae4231e682f17cac1c0b5d80eefa71 Mon Sep 17 00:00:00 2001 From: River Li Date: Tue, 19 Dec 2023 23:50:05 +0800 Subject: [PATCH] Generalize model cache reusing (#21492) * Generalize model cache reusing * Use .as() to convert std::string to std::AnyMap * Store driver version for different devices * Use driver version as gpu compiled model runtime properties --- .../openvino/runtime/internal_properties.hpp | 20 ++++ src/inference/src/compilation_context.cpp | 9 +- src/inference/src/compilation_context.hpp | 7 +- src/inference/src/dev/core_impl.cpp | 27 +++++- .../tests/functional/caching_test.cpp | 93 +++++++++++++++++++ src/plugins/intel_cpu/src/plugin.cpp | 30 +++++- src/plugins/intel_cpu/src/plugin.h | 1 + .../include/intel_gpu/plugin/plugin.hpp | 1 + src/plugins/intel_gpu/src/plugin/plugin.cpp | 56 ++++++++++- 9 files changed, 232 insertions(+), 12 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp index 47feb69322c..dece41b82d2 100644 --- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp +++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp @@ -67,6 +67,26 @@ static constexpr Property threads_per_stream{"THREADS_PER_STREAM"}; +/** + * @brief It contains compiled_model_runtime_properties information to make plugin runtime can check whether it is + * compatible with the cached compiled model, the result is returned by get_property() calling. + * + * The information details are defined by plugin itself, each plugin may require different runtime contents. + * For example, CPU plugin will contain OV version, while GPU plugin will contain OV and GPU driver version, etc. + * Core doesn't understand its content and only read it from plugin and write it into blob header. + * + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property compiled_model_runtime_properties{ + "COMPILED_MODEL_RUNTIME_PROPERTIES"}; + +/** + * @brief Check whether the attached compiled_model_runtime_properties is supported by this device runtime. + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property compiled_model_runtime_properties_supported{ + "COMPILED_MODEL_RUNTIME_PROPERTIES_SUPPORTED"}; + } // namespace internal OPENVINO_DEPRECATED( "This property is deprecated and will be removed soon. Use ov::internal::caching_properties instead of it.") diff --git a/src/inference/src/compilation_context.cpp b/src/inference/src/compilation_context.cpp index c71b83c6df9..5c9b789b883 100644 --- a/src/inference/src/compilation_context.cpp +++ b/src/inference/src/compilation_context.cpp @@ -190,9 +190,12 @@ std::string ModelCache::compute_hash(const std::string& modelStr, CompiledBlobHeader::CompiledBlobHeader() {} -CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo) +CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, + const std::string& fileInfo, + const std::string& runtimeInfo) : m_ieVersion(ieVersion), - m_fileInfo(fileInfo) {} + m_fileInfo(fileInfo), + m_runtimeInfo(runtimeInfo) {} std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) { std::string xmlStr; @@ -208,6 +211,7 @@ std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) { pugi::xml_node compiledBlobNode = document.document_element(); header.m_ieVersion = pugixml::utils::GetStrAttr(compiledBlobNode, "ie_version"); header.m_fileInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "file_info"); + header.m_runtimeInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "runtime_info"); return stream; } @@ -217,6 +221,7 @@ std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header) auto compiledBlobNode = document.append_child("compiled_blob"); compiledBlobNode.append_attribute("ie_version").set_value(header.m_ieVersion.c_str()); compiledBlobNode.append_attribute("file_info").set_value(header.m_fileInfo.c_str()); + compiledBlobNode.append_attribute("runtime_info").set_value(header.m_runtimeInfo.c_str()); document.save(stream, nullptr, pugi::format_raw); document.reset(); diff --git a/src/inference/src/compilation_context.hpp b/src/inference/src/compilation_context.hpp index cfaacba242c..9b5575d056d 100644 --- a/src/inference/src/compilation_context.hpp +++ b/src/inference/src/compilation_context.hpp @@ -31,10 +31,11 @@ struct ModelCache final { class CompiledBlobHeader final { std::string m_ieVersion; std::string m_fileInfo; + std::string m_runtimeInfo; public: CompiledBlobHeader(); - CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo); + CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, const std::string& runtimeInfo); const std::string& getIeVersion() const { return m_ieVersion; @@ -44,6 +45,10 @@ public: return m_fileInfo; } + const std::string& getRuntimeInfo() const { + return m_runtimeInfo; + } + friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header); friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header); diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 6882c3a7035..66627f58e63 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1436,9 +1436,15 @@ ov::SoPtr ov::CoreImpl::compile_model_and_cache(const std::s try { // need to export network for further import from "cache" OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::Export"); + std::string compiled_model_runtime_properties; + if (device_supports_internal_property(plugin, ov::internal::compiled_model_runtime_properties.name())) { + compiled_model_runtime_properties = + plugin.get_property(ov::internal::compiled_model_runtime_properties.name(), {}).as(); + } cacheContent.cacheManager->write_cache_entry(cacheContent.blobId, [&](std::ostream& networkStream) { networkStream << ov::CompiledBlobHeader(InferenceEngine::GetInferenceEngineVersion()->buildNumber, - ov::ModelCache::calculate_file_info(cacheContent.modelPath)); + ov::ModelCache::calculate_file_info(cacheContent.modelPath), + compiled_model_runtime_properties); execNetwork->export_model(networkStream); }); } catch (...) { @@ -1467,14 +1473,25 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( try { ov::CompiledBlobHeader header; networkStream >> header; - if (header.getIeVersion() != ov::get_openvino_version().buildNumber) { - // Build number mismatch, don't use this cache - OPENVINO_THROW("Version does not match"); - } if (header.getFileInfo() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) { // Original file is changed, don't use cache OPENVINO_THROW("Original model file is changed"); } + if (util::contains(plugin.get_property(ov::internal::supported_properties), + ov::internal::compiled_model_runtime_properties_supported.name())) { + ov::AnyMap compiled_model_runtime_properties = { + {ov::internal::compiled_model_runtime_properties.name(), std::string(header.getRuntimeInfo())}}; + auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(), + compiled_model_runtime_properties); + if (!res.as()) { + OPENVINO_THROW("Original model runtime properties have been changed, not supported anymore!"); + } + } else { + if (header.getIeVersion() != ov::get_openvino_version().buildNumber) { + // Build number mismatch, don't use this cache + OPENVINO_THROW("Version does not match"); + } + } } catch (...) { throw HeaderException(); } diff --git a/src/inference/tests/functional/caching_test.cpp b/src/inference/tests/functional/caching_test.cpp index 8991e2c7b41..a8f10175218 100644 --- a/src/inference/tests/functional/caching_test.cpp +++ b/src/inference/tests/functional/caching_test.cpp @@ -1703,6 +1703,99 @@ TEST_P(CachingTest, TestCacheFileOldVersion) { } } +TEST_P(CachingTest, TestCacheFileWithCompiledModelRuntimeProperties) { + EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::device::capability::EXPORT_IMPORT, _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::device::capabilities.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _)) + .Times(AnyNumber()) + .WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap&) { + return std::vector{ov::internal::caching_properties.name(), + ov::internal::compiled_model_runtime_properties.name(), + ov::internal::compiled_model_runtime_properties_supported.name()}; + })); + const std::string compiled_model_runtime_properties("Mock compiled model format segment."); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_runtime_properties.name(), _)) + .Times(AtLeast(1)) + .WillRepeatedly(Return(compiled_model_runtime_properties)); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_runtime_properties_supported.name(), _)) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap& options) { + auto it = options.find(ov::internal::compiled_model_runtime_properties.name()); + ov::Any ret = true; + if (it == options.end() || it->second.as() != compiled_model_runtime_properties) + ret = false; + return ret; + })); + { + EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)) + .Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0); + m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) { + EXPECT_CALL(net, export_model(_)).Times(1); + }); + testLoad([&](ov::Core& core) { + EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir))); + EXPECT_NO_THROW(m_testFunction(core)); + }); + } + { + auto blobs = ov::test::utils::listFilesWithExt(m_cacheDir, "blob"); + for (const auto& fileName : blobs) { + std::string content; + { + std::ifstream inp(fileName, std::ios_base::binary); + std::ostringstream ostr; + ostr << inp.rdbuf(); + content = ostr.str(); + } + auto index = content.find(compiled_model_runtime_properties.c_str()); + std::string new_compiled_model_runtime_properties(compiled_model_runtime_properties.size(), '0'); + if (index != std::string::npos) { + content.replace(index, compiled_model_runtime_properties.size(), new_compiled_model_runtime_properties); + } else { + return; // skip test + } + std::ofstream out(fileName, std::ios_base::binary); + out.write(content.c_str(), static_cast(content.size())); + } + } + m_post_mock_net_callbacks.pop_back(); + { // Step 2. compiled_model_runtime_properties mismatch, cache will be silently removed + EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)) + .Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0); + m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) { + EXPECT_CALL(net, export_model(_)).Times(1); + }); + testLoad([&](ov::Core& core) { + EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir))); + EXPECT_NO_THROW(m_testFunction(core)); + }); + } + m_post_mock_net_callbacks.pop_back(); + { // Step 3: same load, should be ok now due to re-creation of cache + EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(!m_remoteContext ? 1 : 0); + for (auto& net : comp_models) { + EXPECT_CALL(*net, export_model(_)).Times(0); + } + testLoad([&](ov::Core& core) { + EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir))); + EXPECT_NO_THROW(m_testFunction(core)); + }); + } +} + TEST_P(CachingTest, LoadHetero_NoCacheMetric) { EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber()); EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _)) diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 779b6ef9fa2..29314fdd054 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -175,6 +175,8 @@ Engine::Engine() : #if defined(OV_CPU_WITH_ACL) scheduler_guard = SchedulerGuard::instance(); #endif + auto& ov_version = ov::get_openvino_version(); + m_compiled_model_runtime_properties["OV_VERSION"] = std::string(ov_version.buildNumber); } Engine::~Engine() { @@ -690,6 +692,26 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options) return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests); } else if (name == ov::hint::execution_mode) { return engConfig.executionMode; + } else if (name == ov::internal::compiled_model_runtime_properties.name()) { + auto model_runtime_properties = ov::Any(m_compiled_model_runtime_properties); + return decltype(ov::internal::compiled_model_runtime_properties)::value_type( + std::move(model_runtime_properties.as())); + } else if (name == ov::internal::compiled_model_runtime_properties_supported.name()) { + ov::Any res = true; + auto it = options.find(ov::internal::compiled_model_runtime_properties.name()); + if (it == options.end()) { + res = false; + } else { + ov::AnyMap input_map = it->second.as(); + for (auto& item : m_compiled_model_runtime_properties) { + auto it = input_map.find(item.first); + if (it == input_map.end() || it->second.as() != item.second.as()) { + res = false; + break; + } + } + } + return res; } return get_ro_property(name, options); } @@ -740,7 +762,9 @@ ov::Any Engine::get_metric_legacy(const std::string& name, const ov::AnyMap& opt } else if (ov::internal::supported_properties.name() == name) { return decltype(ov::internal::supported_properties)::value_type{ ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}}; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}}; } else if (name == ov::internal::caching_properties) { std::vector cachingProperties = {ov::device::full_name.name()}; return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties)); @@ -798,7 +822,9 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio } else if (ov::internal::supported_properties == name) { return decltype(ov::internal::supported_properties)::value_type{ ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}}; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}}; } else if (name == ov::device::full_name) { return decltype(ov::device::full_name)::value_type(deviceFullName); } else if (name == ov::available_devices) { diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 7f04a3c3708..256eafdbadb 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -67,6 +67,7 @@ private: So track if streams is set explicitly (not auto-configured) */ bool streamsExplicitlySetForEngine = false; const std::string deviceFullName; + ov::AnyMap m_compiled_model_runtime_properties; std::shared_ptr specialSetup; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index d92cd695f4b..4378e12c921 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -19,6 +19,7 @@ private: std::string m_default_device_id = "0"; std::map m_device_map; std::map m_configs_map; + ov::AnyMap m_compiled_model_runtime_properties; mutable std::map> m_default_contexts; mutable std::once_flag m_default_contexts_once; diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 767cb72e16a..b54fbc28702 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -174,6 +174,10 @@ Plugin::Plugin() { for (const auto& device : m_device_map) { m_configs_map.insert({device.first, ExecutionConfig(ov::device::id(device.first))}); } + + // Set common info for compiled_model_runtime_properties + auto& ov_version = ov::get_openvino_version(); + m_compiled_model_runtime_properties["OV_VERSION"] = ov_version.buildNumber; } std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, const ov::AnyMap& orig_config) const { @@ -336,6 +340,53 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) return decltype(ov::internal::caching_properties)::value_type(get_caching_properties()); } + ov::AnyMap actual_runtime_info; + auto prepare_actual_runtime_info = [&]() { + // Suppose all devices share the same version driver. + auto device_id = m_default_device_id; + OPENVINO_ASSERT(m_device_map.find(device_id) != m_device_map.end(), + "[GPU] compiled_model_runtime_properties: Couldn't find device for GPU with id ", + device_id); + actual_runtime_info["DRIVER_VERSION"] = m_device_map.at(device_id)->get_info().driver_version; + // More items can be inserted if needed + }; + // Below properties depend on the device ID. + if (name == ov::internal::compiled_model_runtime_properties.name()) { + prepare_actual_runtime_info(); + auto model_runtime_info = m_compiled_model_runtime_properties; + // Set specified device info for compiled_model_runtime_properties + model_runtime_info.insert(actual_runtime_info.begin(), actual_runtime_info.end()); + auto model_format = ov::Any(model_runtime_info); + return decltype(ov::internal::compiled_model_runtime_properties)::value_type( + std::move(model_format.as())); + } else if (name == ov::internal::compiled_model_runtime_properties_supported.name()) { + ov::Any res = true; + prepare_actual_runtime_info(); + auto it = options.find(ov::internal::compiled_model_runtime_properties.name()); + if (it == options.end()) { + res = false; + return res; + } + ov::AnyMap input_map = it->second.as(); + // Check common info of compiled_model_runtime_properties + for (auto& item : m_compiled_model_runtime_properties) { + auto it = input_map.find(item.first); + if (it == input_map.end() || it->second.as() != item.second.as()) { + res = false; + return res; + } + } + // Check specified device info of compiled_model_runtime_properties + for (const auto& it : actual_runtime_info) { + auto item = input_map.find(it.first); + if (item == input_map.end() || item->second.as() != it.second.as()) { + res = false; + break; + } + } + return res; + } + OPENVINO_SUPPRESS_DEPRECATED_START if (name == METRIC_KEY(SUPPORTED_METRICS)) { IE_SET_METRIC_RETURN(SUPPORTED_METRICS, LegacyAPIHelper::get_supported_metrics()); @@ -520,7 +571,6 @@ std::vector Plugin::get_caching_properties() const { static const std::vector caching_properties = { ov::PropertyName{ov::device::architecture.name(), PropertyMutability::RO}, ov::PropertyName{ov::intel_gpu::execution_units_count.name(), PropertyMutability::RO}, - ov::PropertyName{ov::intel_gpu::driver_version.name(), PropertyMutability::RO}, ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::execution_mode.name(), PropertyMutability::RW}, }; @@ -576,7 +626,9 @@ std::vector Plugin::get_supported_internal_properties() const static const std::vector supported_internal_properties = { ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, ov::PropertyName{ov::internal::config_device_id.name(), ov::PropertyMutability::WO}, - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}}; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}}; return supported_internal_properties; }