From 246a287c3459b90d34f589377153c2e32582e8be Mon Sep 17 00:00:00 2001 From: River Li Date: Tue, 10 Jan 2023 16:32:34 +0800 Subject: [PATCH] Add new compile model api to support hash model memory (#14543) * Add new compile_model api for ONNX RUNTIME OV EP Allow compile_model() accept model/weight data. * Update minor place * Cache model if possible * Compute hash based on model_xml and model_weight * Update typo * Change hash key computation for model's weights * Resolve test case issue * Use tensor replace blob for hash computation * Fix hash computation isssue and add more test cases * Fix a build issue caused by data format --- src/inference/dev_api/ie_icore.hpp | 21 +++++ .../include/openvino/runtime/core.hpp | 38 ++++++++ src/inference/src/compilation_context.cpp | 28 ++++++ src/inference/src/compilation_context.hpp | 7 ++ src/inference/src/ie_core.cpp | 63 +++++++++++++ .../behavior/ov_plugin/caching_tests.cpp | 6 ++ .../behavior/ov_plugin/caching_tests.cpp | 11 +++ .../exec_network_base.hpp | 58 ++++++++++++ .../behavior/ov_plugin/caching_tests.hpp | 21 +++++ .../src/behavior/ov_plugin/caching_tests.cpp | 92 +++++++++++++++++++ .../cpp_interfaces/interface/mock_icore.hpp | 7 ++ 11 files changed, 352 insertions(+) diff --git a/src/inference/dev_api/ie_icore.hpp b/src/inference/dev_api/ie_icore.hpp index cc4e3b32205..9b01c266444 100644 --- a/src/inference/dev_api/ie_icore.hpp +++ b/src/inference/dev_api/ie_icore.hpp @@ -99,6 +99,27 @@ public: const std::map& config, const std::function& val = nullptr) = 0; + /** + * @brief Creates an executable network from a model memory. + * + * Users can create as many networks as they need and use + * them simultaneously (up to the limitation of the hardware resources) + * + * @param modelStr String data of model + * @param weights Model's weights + * @param deviceName Name of device to load network to + * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load + * operation + * @param val Optional callback to perform validation of loaded CNNNetwork, if ReadNetwork is triggered + * @return An executable network reference + */ + virtual ie::SoExecutableNetworkInternal LoadNetwork( + const std::string& modelStr, + const ie::Blob::CPtr& weights, + const std::string& deviceName, + const std::map& config, + const std::function& val = nullptr) = 0; + /** * @brief Creates an executable network from a previously exported network * @param networkModel network model stream diff --git a/src/inference/include/openvino/runtime/core.hpp b/src/inference/include/openvino/runtime/core.hpp index b0bef7f4a1d..6af39a36cfb 100644 --- a/src/inference/include/openvino/runtime/core.hpp +++ b/src/inference/include/openvino/runtime/core.hpp @@ -255,6 +255,44 @@ public: return compile_model(model_path, device_name, AnyMap{std::forward(properties)...}); } + /** + * @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD memory. + * @param model String with a model in IR/ONNX/PDPD format. + * @param weights Shared pointer to a constant tensor with weights. + * Reading ONNX/PDPD models does not support loading weights from the @p weights tensors. + * @param device_name Name of a device to load a model to. + * @param properties Optional map of pairs: (property name, property value) relevant only for this load + * operation. + * @note Created model object shares the weights with the @p weights object. + * Thus, do not create @p weights on temporary data that can be freed later, since the model + * constant data will point to an invalid memory. + * @return A compiled model. + */ + CompiledModel compile_model(const std::string& model, + const ov::Tensor& weights, + const std::string& device_name, + const AnyMap& properties = {}); + + /** + * @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD memory. + * @param model String with a model in IR/ONNX/PDPD format. + * @param weights Shared pointer to a constant tensor with weights. + * Reading ONNX/PDPD models does not support loading weights from the @p weights tensors. + * @param device_name Name of a device to load a model to. + * @tparam Properties Should be a pack of `std::pair` types. + * @note Created model object shares the weights with the @p weights object. + * Thus, do not create @p weights on temporary data that can be freed later, since the model + * constant data will point to an invalid memory. + * @return A compiled model. + */ + template + util::EnableIfAllStringAny compile_model(const std::string& model, + const ov::Tensor& weights, + const std::string& device_name, + Properties&&... properties) { + return compile_model(model, weights, device_name, AnyMap{std::forward(properties)...}); + } + /** * @brief Creates a compiled model from a source model within a specified remote context. * @param model Model object acquired from Core::read_model. diff --git a/src/inference/src/compilation_context.cpp b/src/inference/src/compilation_context.cpp index d23a09158b0..27c1a83db65 100644 --- a/src/inference/src/compilation_context.cpp +++ b/src/inference/src/compilation_context.cpp @@ -140,6 +140,34 @@ std::string NetworkCompilationContext::computeHash(const std::string& modelName, return std::to_string(seed); } +std::string NetworkCompilationContext::computeHash(const std::string& modelStr, + const ov::Tensor& tensor, + const std::map& compileOptions) { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - Model Memory"); + uint64_t seed = 0; + // model string + seed = hash_combine(seed, modelStr); + + // tensor data + seed = hash_combine(seed, tensor.get_size()); + + auto ptr = static_cast(tensor.data()); + size_t size = tensor.get_size() / sizeof(size_t); + for (size_t i = 0; i < size; i++) + seed = hash_combine(seed, ptr[i]); + auto size_done = size * sizeof(size_t); + auto ptr_left = static_cast(tensor.data()) + size_done; + size_t size_left = tensor.get_size() - size_done; + for (size_t i = 0; i < size_left; i++) + seed = hash_combine(seed, ptr_left[i]); + + // compile options + for (const auto& kvp : compileOptions) { + seed = hash_combine(seed, kvp.first + kvp.second); + } + return std::to_string(seed); +} + ////////////////////////////////////////////////// CompiledBlobHeader::CompiledBlobHeader() {} diff --git a/src/inference/src/compilation_context.hpp b/src/inference/src/compilation_context.hpp index fab53cc22d2..7b61a014948 100644 --- a/src/inference/src/compilation_context.hpp +++ b/src/inference/src/compilation_context.hpp @@ -9,6 +9,10 @@ #include #include +namespace ov { +class Tensor; +} // namespace ov + namespace InferenceEngine { class CNNNetwork; @@ -20,6 +24,9 @@ struct NetworkCompilationContext final { static std::string computeHash(const std::string& modelName, const std::map& compileOptions); + static std::string computeHash(const std::string& modeStr, + const ov::Tensor& data, + const std::map& compileOptions); }; class CompiledBlobHeader final { diff --git a/src/inference/src/ie_core.cpp b/src/inference/src/ie_core.cpp index d9c415fd60a..15b04736e9c 100644 --- a/src/inference/src/ie_core.cpp +++ b/src/inference/src/ie_core.cpp @@ -546,6 +546,15 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this& config) const { + auto compileConfig = CreateCompileConfig(plugin, deviceFamily, config); + return ie::NetworkCompilationContext::computeHash(modelStr, weights, compileConfig); + } + public: CoreImpl(bool _newAPI) : newAPI(_newAPI) { add_mutex(""); // Register global mutex @@ -884,6 +893,46 @@ public: return {res._ptr, res._so}; } + ie::SoExecutableNetworkInternal LoadNetwork(const std::string& modelStr, + const ie::Blob::CPtr& weights, + const std::string& deviceName, + const std::map& config, + const std::function& val = nullptr) override { + OV_ITT_SCOPE(FIRST_INFERENCE, ie::itt::domains::IE_LT, "Core::LoadNetwork::Memory"); + auto parsed = parseDeviceNameIntoConfig(deviceName, config); + auto plugin = GetCPPPluginByName(parsed._deviceName); + ov::SoPtr res; + + auto cacheManager = + coreConfig.getCacheConfigForDevice(parsed._deviceName, DeviceSupportsCacheDir(plugin), parsed._config) + ._cacheManager; + auto cacheContent = CacheContent{cacheManager}; + if (cacheManager && DeviceSupportsImportExport(plugin)) { + bool loadedFromCache = false; + ov::Tensor tensor = ov::Tensor(); + if (weights) { + tensor = ov::Tensor(element::u8, {weights->byteSize()}, weights->cbuffer().as()); + } + cacheContent.blobId = CalculateMemoryHash(modelStr, tensor, parsed._deviceName, plugin, parsed._config); + auto lock = cacheGuard.getHashLock(cacheContent.blobId); + res = LoadNetworkFromCache(cacheContent, plugin, parsed._config, nullptr, loadedFromCache); + if (!loadedFromCache) { + auto cnnNetwork = ReadNetwork(modelStr, weights); + if (val) { + val(cnnNetwork); + } + res = compile_model_impl(cnnNetwork, plugin, parsed._config, nullptr, cacheContent); + } + } else { + auto cnnNetwork = ReadNetwork(modelStr, weights); + if (val) { + val(cnnNetwork); + } + res = compile_model_impl(cnnNetwork, plugin, parsed._config, nullptr, cacheContent); + } + return {res._ptr, res._so}; + } + ie::SoExecutableNetworkInternal ImportNetwork(std::istream& networkModel, const std::string& deviceName, const std::map& config) override { @@ -2003,6 +2052,20 @@ CompiledModel Core::compile_model(const std::string& modelPath, const std::strin }); } +CompiledModel Core::compile_model(const std::string& model, + const ov::Tensor& weights, + const std::string& deviceName, + const AnyMap& config) { + InferenceEngine::Blob::Ptr blob; + if (weights) { + blob = weights._impl; + } + OV_CORE_CALL_STATEMENT({ + auto exec = _impl->LoadNetwork(model, blob, deviceName, any_copy(flatten_sub_properties(deviceName, config))); + return {exec._ptr, exec._so}; + }); +} + CompiledModel Core::compile_model(const std::shared_ptr& model, const RemoteContext& context, const AnyMap& config) { diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp index f7d16e03d30..32e1b714cb6 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp @@ -173,4 +173,10 @@ namespace { ::testing::ValuesIn(TestTargets), ::testing::ValuesIn(LoadFromFileConfigs)), CompileModelLoadFromFileTestBase::getTestCaseName); + + INSTANTIATE_TEST_SUITE_P(smoke_Auto_CachingSupportCase_CPU, + CompileModelLoadFromMemoryTestBase, + ::testing::Combine(::testing::ValuesIn(TestTargets), + ::testing::ValuesIn(LoadFromFileConfigs)), + CompileModelLoadFromMemoryTestBase::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp index 1dc86363995..f79b45b8ab8 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp @@ -83,6 +83,12 @@ namespace { ::testing::ValuesIn(LoadFromFileConfigs)), CompileModelLoadFromFileTestBase::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_Auto_CachingSupportCase_GPU, + CompileModelLoadFromMemoryTestBase, + ::testing::Combine(::testing::ValuesIn(TestTargets), + ::testing::ValuesIn(LoadFromFileConfigs)), + CompileModelLoadFromMemoryTestBase::getTestCaseName); + const std::vector GPULoadFromFileConfigs = { {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)}, {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}, @@ -94,4 +100,9 @@ namespace { ::testing::ValuesIn(GPULoadFromFileConfigs)), CompileModelLoadFromFileTestBase::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_CachingSupportCase_GPU, + CompileModelLoadFromMemoryTestBase, + ::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::ValuesIn(GPULoadFromFileConfigs)), + CompileModelLoadFromMemoryTestBase::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_network_base.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_network_base.hpp index d24ea754d35..c43ef0b61f4 100644 --- a/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_network_base.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_network_base.hpp @@ -84,6 +84,64 @@ TEST_P(OVExecutableNetworkBaseTest, canLoadCorrectNetworkToGetExecutable) { EXPECT_NO_THROW(auto execNet = core->compile_model(function, target_device, configuration)); } +TEST_P(OVExecutableNetworkBaseTest, canLoadNetworkFromMemory) { + std::string model = R"V0G0N( + + + + + + + 1 + 3 + 22 + 22 + + + + + + + + 1 + 3 + 22 + 22 + + + + + 1 + 3 + 22 + 22 + + + + + + + 1 + 3 + 22 + 22 + + + + + + + + + + )V0G0N"; + + if (target_device.find("GNA") != std::string::npos) { + GTEST_SKIP(); + } + EXPECT_NO_THROW(auto execNet = core->compile_model(model, ov::Tensor(), target_device, configuration)); +} + TEST(OVExecutableNetworkBaseTest, smoke_LoadNetworkToDefaultDeviceNoThrow) { SKIP_IF_CURRENT_TEST_IS_DISABLED() std::shared_ptr core = utils::PluginCache::get().core(); diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/caching_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/caching_tests.hpp index 4875d2fedaf..4081f95f4eb 100644 --- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/caching_tests.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/caching_tests.hpp @@ -82,6 +82,27 @@ public: void run() override; }; +using compileModelLoadFromMemoryParams = std::tuple; +class CompileModelLoadFromMemoryTestBase : public testing::WithParamInterface, + virtual public SubgraphBaseTest, + virtual public OVPluginTestBase { + std::string m_cacheFolderName; + std::string m_modelName; + std::string m_weightsName; + std::string m_model; + ov::Tensor m_weights; + std::vector weights_vector; + +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + + void SetUp() override; + void TearDown() override; + void run() override; +}; + using compileKernelsCacheParams = std::tuple< std::string, // device name std::pair // device and cache configuration diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp index f48543b3a41..ad84e360be6 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp @@ -321,6 +321,98 @@ TEST_P(CompileModelLoadFromFileTestBase, CanLoadFromFileWithoutExecption) { run(); } +std::string CompileModelLoadFromMemoryTestBase::getTestCaseName( + testing::TestParamInfo obj) { + auto param = obj.param; + auto deviceName = std::get<0>(param); + auto configuration = std::get<1>(param); + std::ostringstream result; + std::replace(deviceName.begin(), deviceName.end(), ':', '.'); + result << "device_name=" << deviceName << "_"; + for (auto& iter : configuration) { + result << "_" << iter.first << "_" << iter.second.as() << "_"; + } + return result.str(); +} + +void CompileModelLoadFromMemoryTestBase::SetUp() { + ovModelWithName funcPair; + std::tie(targetDevice, configuration) = GetParam(); + target_device = targetDevice; + APIBaseTest::SetUp(); + std::stringstream ss; + auto hash = std::hash()(SubgraphBaseTest::GetTestName()); + ss << "testCache_" << std::to_string(hash) << "_" << std::this_thread::get_id() << "_" << GetTimestamp(); + m_modelName = ss.str() + ".xml"; + m_weightsName = ss.str() + ".bin"; + for (auto& iter : configuration) { + ss << "_" << iter.first << "_" << iter.second.as() << "_"; + } + m_cacheFolderName = ss.str(); + core->set_property(ov::cache_dir()); + ngraph::pass::Manager manager; + manager.register_pass(m_modelName, m_weightsName); + manager.run_passes(ngraph::builder::subgraph::makeConvPoolRelu( + {1, 3, 227, 227}, + InferenceEngine::details::convertPrecision(InferenceEngine::Precision::FP32))); + + try { + std::ifstream model_file(m_modelName, std::ios::binary); + std::stringstream ss; + ss << model_file.rdbuf(); + m_model = ss.str(); + } catch (const Exception& ex) { + GTEST_FAIL() << "Can't read xml file from: " << m_modelName << "\nException [" << ex.what() << "]" << std::endl; + } + + try { + std::ifstream weights_file(m_weightsName, std::ios::binary); + weights_file.unsetf(std::ios::skipws); + + weights_file.seekg(0, std::ios::end); + const auto weights_size = static_cast(weights_file.tellg()); + weights_file.seekg(0, std::ios::beg); + + weights_vector.reserve(weights_size); + weights_vector.insert(weights_vector.begin(), + std::istream_iterator(weights_file), + std::istream_iterator()); + m_weights = ov::Tensor(ov::element::u8, {1, 1, 1, weights_size}, weights_vector.data()); + } catch (const Exception& ex) { + GTEST_FAIL() << "Can't read weights file from: " << m_weightsName << "\nException [" << ex.what() << "]" + << std::endl; + } +} + +void CompileModelLoadFromMemoryTestBase::TearDown() { + CommonTestUtils::removeFilesWithExt(m_cacheFolderName, "blob"); + CommonTestUtils::removeFilesWithExt(m_cacheFolderName, "cl_cache"); + CommonTestUtils::removeIRFiles(m_modelName, m_weightsName); + std::remove(m_cacheFolderName.c_str()); + core->set_property(ov::cache_dir()); + APIBaseTest::TearDown(); + weights_vector.clear(); +} + +void CompileModelLoadFromMemoryTestBase::run() { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + core->set_property(ov::cache_dir(m_cacheFolderName)); + try { + compiledModel = core->compile_model(m_model, m_weights, targetDevice, configuration); + inferRequest = compiledModel.create_infer_request(); + inferRequest.infer(); + } catch (const Exception& ex) { + GTEST_FAIL() << "Can't loadNetwork with model path " << m_modelName << "\nException [" << ex.what() << "]" + << std::endl; + } catch (...) { + GTEST_FAIL() << "Can't compile network with model path " << m_modelName << std::endl; + } +} + +TEST_P(CompileModelLoadFromMemoryTestBase, CanLoadFromMemoryWithoutExecption) { + run(); +} + std::string CompiledKernelsCacheTest::getTestCaseName(testing::TestParamInfo obj) { auto param = obj.param; std::string deviceName; diff --git a/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp b/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp index dc2ce850adc..ae198c37099 100644 --- a/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp +++ b/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp @@ -21,6 +21,13 @@ public: const std::string &, const std::map &, const std::function &)); + MOCK_METHOD5( + LoadNetwork, + InferenceEngine::SoExecutableNetworkInternal(const std::string&, + const InferenceEngine::Blob::CPtr&, + const std::string&, + const std::map&, + const std::function&)); MOCK_METHOD3(ImportNetwork, InferenceEngine::SoExecutableNetworkInternal( std::istream&, const std::string&, const std::map&));