Add new compile model api to support hash model memory (#14543)

* Add new compile_model api for ONNX RUNTIME OV EP Allow compile_model() accept model/weight data. * Update minor place * Cache model if possible * Compute hash based on model_xml and model_weight * Update typo * Change hash key computation for model's weights * Resolve test case issue * Use tensor replace blob for hash computation * Fix hash computation isssue and add more test cases * Fix a build issue caused by data format
2023-01-10 16:32:34 +08:00 · 2023-01-10 16:32:34 +08:00 · 246a287c34
commit 246a287c34
parent b64c1ff20a
11 changed files with 352 additions and 0 deletions
--- a/src/inference/dev_api/ie_icore.hpp
+++ b/src/inference/dev_api/ie_icore.hpp
@ -99,6 +99,27 @@ public:
        const std::map<std::string, std::string>& config,
        const std::function<void(const ie::CNNNetwork&)>& val = nullptr) = 0;

+    /**
+     * @brief Creates an executable network from a model memory.
+     *
+     * Users can create as many networks as they need and use
+     *        them simultaneously (up to the limitation of the hardware resources)
+     *
+     * @param modelStr String data of model
+     * @param weights Model's weights
+     * @param deviceName Name of device to load network to
+     * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load
+     * operation
+     * @param val Optional callback to perform validation of loaded CNNNetwork, if ReadNetwork is triggered
+     * @return An executable network reference
+     */
+    virtual ie::SoExecutableNetworkInternal LoadNetwork(
+        const std::string& modelStr,
+        const ie::Blob::CPtr& weights,
+        const std::string& deviceName,
+        const std::map<std::string, std::string>& config,
+        const std::function<void(const ie::CNNNetwork&)>& val = nullptr) = 0;
+
    /**
     * @brief Creates an executable network from a previously exported network
     * @param networkModel network model stream
--- a/src/inference/include/openvino/runtime/core.hpp
+++ b/src/inference/include/openvino/runtime/core.hpp
@ -255,6 +255,44 @@ public:
        return compile_model(model_path, device_name, AnyMap{std::forward<Properties>(properties)...});
    }

+    /**
+     * @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD memory.
+     * @param model String with a model in IR/ONNX/PDPD format.
+     * @param weights Shared pointer to a constant tensor with weights.
+     * Reading ONNX/PDPD models does not support loading weights from the @p weights tensors.
+     * @param device_name Name of a device to load a model to.
+     * @param properties Optional map of pairs: (property name, property value) relevant only for this load
+     * operation.
+     * @note Created model object shares the weights with the @p weights object.
+     * Thus, do not create @p weights on temporary data that can be freed later, since the model
+     * constant data will point to an invalid memory.
+     * @return A compiled model.
+     */
+    CompiledModel compile_model(const std::string& model,
+                                const ov::Tensor& weights,
+                                const std::string& device_name,
+                                const AnyMap& properties = {});
+
+    /**
+     * @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD memory.
+     * @param model String with a model in IR/ONNX/PDPD format.
+     * @param weights Shared pointer to a constant tensor with weights.
+     * Reading ONNX/PDPD models does not support loading weights from the @p weights tensors.
+     * @param device_name Name of a device to load a model to.
+     * @tparam Properties Should be a pack of `std::pair<std::string, ov::Any>` types.
+     * @note Created model object shares the weights with the @p weights object.
+     * Thus, do not create @p weights on temporary data that can be freed later, since the model
+     * constant data will point to an invalid memory.
+     * @return A compiled model.
+     */
+    template <typename... Properties>
+    util::EnableIfAllStringAny<CompiledModel, Properties...> compile_model(const std::string& model,
+                                                                           const ov::Tensor& weights,
+                                                                           const std::string& device_name,
+                                                                           Properties&&... properties) {
+        return compile_model(model, weights, device_name, AnyMap{std::forward<Properties>(properties)...});
+    }
+
    /**
     * @brief Creates a compiled model from a source model within a specified remote context.
     * @param model Model object acquired from Core::read_model.
--- a/src/inference/src/compilation_context.cpp
+++ b/src/inference/src/compilation_context.cpp
@ -140,6 +140,34 @@ std::string NetworkCompilationContext::computeHash(const std::string& modelName,
    return std::to_string(seed);
 }

+std::string NetworkCompilationContext::computeHash(const std::string& modelStr,
+                                                   const ov::Tensor& tensor,
+                                                   const std::map<std::string, std::string>& compileOptions) {
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - Model Memory");
+    uint64_t seed = 0;
+    // model string
+    seed = hash_combine(seed, modelStr);
+
+    // tensor data
+    seed = hash_combine(seed, tensor.get_size());
+
+    auto ptr = static_cast<size_t*>(tensor.data());
+    size_t size = tensor.get_size() / sizeof(size_t);
+    for (size_t i = 0; i < size; i++)
+        seed = hash_combine(seed, ptr[i]);
+    auto size_done = size * sizeof(size_t);
+    auto ptr_left = static_cast<uint8_t*>(tensor.data()) + size_done;
+    size_t size_left = tensor.get_size() - size_done;
+    for (size_t i = 0; i < size_left; i++)
+        seed = hash_combine(seed, ptr_left[i]);
+
+    // compile options
+    for (const auto& kvp : compileOptions) {
+        seed = hash_combine(seed, kvp.first + kvp.second);
+    }
+    return std::to_string(seed);
+}
+
 //////////////////////////////////////////////////

 CompiledBlobHeader::CompiledBlobHeader() {}
--- a/src/inference/src/compilation_context.hpp
+++ b/src/inference/src/compilation_context.hpp
@ -9,6 +9,10 @@
 #include <ostream>
 #include <string>

+namespace ov {
+class Tensor;
+}  // namespace ov
+
 namespace InferenceEngine {

 class CNNNetwork;
@ -20,6 +24,9 @@ struct NetworkCompilationContext final {

    static std::string computeHash(const std::string& modelName,
                                   const std::map<std::string, std::string>& compileOptions);
+    static std::string computeHash(const std::string& modeStr,
+                                   const ov::Tensor& data,
+                                   const std::map<std::string, std::string>& compileOptions);
 };

 class CompiledBlobHeader final {
--- a/src/inference/src/ie_core.cpp
+++ b/src/inference/src/ie_core.cpp
@ -546,6 +546,15 @@ class CoreImpl : public ie::ICore, public std::enable_shared_from_this<ie::ICore
        return ie::NetworkCompilationContext::computeHash(modelName, compileConfig);
    }

+    std::string CalculateMemoryHash(const std::string& modelStr,
+                                    const ov::Tensor& weights,
+                                    const std::string& deviceFamily,
+                                    const ov::InferencePlugin& plugin,
+                                    const std::map<std::string, std::string>& config) const {
+        auto compileConfig = CreateCompileConfig(plugin, deviceFamily, config);
+        return ie::NetworkCompilationContext::computeHash(modelStr, weights, compileConfig);
+    }
+
 public:
    CoreImpl(bool _newAPI) : newAPI(_newAPI) {
        add_mutex("");  // Register global mutex
@ -884,6 +893,46 @@ public:
        return {res._ptr, res._so};
    }

+    ie::SoExecutableNetworkInternal LoadNetwork(const std::string& modelStr,
+                                                const ie::Blob::CPtr& weights,
+                                                const std::string& deviceName,
+                                                const std::map<std::string, std::string>& config,
+                                                const std::function<void(const CNNNetwork&)>& val = nullptr) override {
+        OV_ITT_SCOPE(FIRST_INFERENCE, ie::itt::domains::IE_LT, "Core::LoadNetwork::Memory");
+        auto parsed = parseDeviceNameIntoConfig(deviceName, config);
+        auto plugin = GetCPPPluginByName(parsed._deviceName);
+        ov::SoPtr<ie::IExecutableNetworkInternal> res;
+
+        auto cacheManager =
+            coreConfig.getCacheConfigForDevice(parsed._deviceName, DeviceSupportsCacheDir(plugin), parsed._config)
+                ._cacheManager;
+        auto cacheContent = CacheContent{cacheManager};
+        if (cacheManager && DeviceSupportsImportExport(plugin)) {
+            bool loadedFromCache = false;
+            ov::Tensor tensor = ov::Tensor();
+            if (weights) {
+                tensor = ov::Tensor(element::u8, {weights->byteSize()}, weights->cbuffer().as<uint8_t*>());
+            }
+            cacheContent.blobId = CalculateMemoryHash(modelStr, tensor, parsed._deviceName, plugin, parsed._config);
+            auto lock = cacheGuard.getHashLock(cacheContent.blobId);
+            res = LoadNetworkFromCache(cacheContent, plugin, parsed._config, nullptr, loadedFromCache);
+            if (!loadedFromCache) {
+                auto cnnNetwork = ReadNetwork(modelStr, weights);
+                if (val) {
+                    val(cnnNetwork);
+                }
+                res = compile_model_impl(cnnNetwork, plugin, parsed._config, nullptr, cacheContent);
+            }
+        } else {
+            auto cnnNetwork = ReadNetwork(modelStr, weights);
+            if (val) {
+                val(cnnNetwork);
+            }
+            res = compile_model_impl(cnnNetwork, plugin, parsed._config, nullptr, cacheContent);
+        }
+        return {res._ptr, res._so};
+    }
+
    ie::SoExecutableNetworkInternal ImportNetwork(std::istream& networkModel,
                                                  const std::string& deviceName,
                                                  const std::map<std::string, std::string>& config) override {
@ -2003,6 +2052,20 @@ CompiledModel Core::compile_model(const std::string& modelPath, const std::strin
    });
 }

+CompiledModel Core::compile_model(const std::string& model,
+                                  const ov::Tensor& weights,
+                                  const std::string& deviceName,
+                                  const AnyMap& config) {
+    InferenceEngine::Blob::Ptr blob;
+    if (weights) {
+        blob = weights._impl;
+    }
+    OV_CORE_CALL_STATEMENT({
+        auto exec = _impl->LoadNetwork(model, blob, deviceName, any_copy(flatten_sub_properties(deviceName, config)));
+        return {exec._ptr, exec._so};
+    });
+}
+
 CompiledModel Core::compile_model(const std::shared_ptr<const ov::Model>& model,
                                  const RemoteContext& context,
                                  const AnyMap& config) {
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp
@ -173,4 +173,10 @@ namespace {
                                ::testing::ValuesIn(TestTargets),
                                ::testing::ValuesIn(LoadFromFileConfigs)),
                        CompileModelLoadFromFileTestBase::getTestCaseName);
+
+    INSTANTIATE_TEST_SUITE_P(smoke_Auto_CachingSupportCase_CPU,
+                             CompileModelLoadFromMemoryTestBase,
+                             ::testing::Combine(::testing::ValuesIn(TestTargets),
+                                                ::testing::ValuesIn(LoadFromFileConfigs)),
+                             CompileModelLoadFromMemoryTestBase::getTestCaseName);
 } // namespace
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp
@ -83,6 +83,12 @@ namespace {
                                ::testing::ValuesIn(LoadFromFileConfigs)),
                        CompileModelLoadFromFileTestBase::getTestCaseName);

+    INSTANTIATE_TEST_SUITE_P(smoke_Auto_CachingSupportCase_GPU,
+                             CompileModelLoadFromMemoryTestBase,
+                             ::testing::Combine(::testing::ValuesIn(TestTargets),
+                                                ::testing::ValuesIn(LoadFromFileConfigs)),
+                             CompileModelLoadFromMemoryTestBase::getTestCaseName);
+
    const std::vector<ov::AnyMap> GPULoadFromFileConfigs = {
        {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)},
        {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)},
@ -94,4 +100,9 @@ namespace {
                                ::testing::ValuesIn(GPULoadFromFileConfigs)),
                        CompileModelLoadFromFileTestBase::getTestCaseName);

+    INSTANTIATE_TEST_SUITE_P(smoke_CachingSupportCase_GPU,
+                             CompileModelLoadFromMemoryTestBase,
+                             ::testing::Combine(::testing::Values(CommonTestUtils::DEVICE_GPU),
+                                                ::testing::ValuesIn(GPULoadFromFileConfigs)),
+                             CompileModelLoadFromMemoryTestBase::getTestCaseName);
 } // namespace
--- a/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_network_base.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_network_base.hpp
@ -84,6 +84,64 @@ TEST_P(OVExecutableNetworkBaseTest, canLoadCorrectNetworkToGetExecutable) {
    EXPECT_NO_THROW(auto execNet = core->compile_model(function, target_device, configuration));
 }

+TEST_P(OVExecutableNetworkBaseTest, canLoadNetworkFromMemory) {
+    std::string model = R"V0G0N(
+        <net name="Network" version="10">
+            <layers>
+                <layer name="in1" type="Parameter" id="0" version="opset8">
+                    <data element_type="f16" shape="1,3,22,22"/>
+                    <output>
+                        <port id="0" precision="FP16" names="data">
+                            <dim>1</dim>
+                            <dim>3</dim>
+                            <dim>22</dim>
+                            <dim>22</dim>
+                        </port>
+                    </output>
+                </layer>
+                <layer name="round" id="1" type="Round" version="opset8">
+                    <data mode="half_to_even"/>
+                    <input>
+                        <port id="1" precision="FP16">
+                            <dim>1</dim>
+                            <dim>3</dim>
+                            <dim>22</dim>
+                            <dim>22</dim>
+                        </port>
+                    </input>
+                    <output>
+                        <port id="2" precision="FP16" names="r">
+                            <dim>1</dim>
+                            <dim>3</dim>
+                            <dim>22</dim>
+                            <dim>22</dim>
+                        </port>
+                    </output>
+                </layer>
+                <layer name="output" type="Result" id="2" version="opset8">
+                    <input>
+                        <port id="0" precision="FP16">
+                            <dim>1</dim>
+                            <dim>3</dim>
+                            <dim>22</dim>
+                            <dim>22</dim>
+                        </port>
+                    </input>
+                </layer>
+            </layers>
+            <edges>
+                <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
+                <edge from-layer="1" from-port="2" to-layer="2" to-port="0"/>
+            </edges>
+        </net>
+        )V0G0N";
+
+    if (target_device.find("GNA") != std::string::npos) {
+        GTEST_SKIP();
+    }
+    EXPECT_NO_THROW(auto execNet = core->compile_model(model, ov::Tensor(), target_device, configuration));
+}
+
 TEST(OVExecutableNetworkBaseTest, smoke_LoadNetworkToDefaultDeviceNoThrow) {
    SKIP_IF_CURRENT_TEST_IS_DISABLED()
    std::shared_ptr<ov::Core> core = utils::PluginCache::get().core();
--- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/caching_tests.hpp
+++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/caching_tests.hpp
@ -82,6 +82,27 @@ public:
    void run() override;
 };

+using compileModelLoadFromMemoryParams = std::tuple<std::string,  // device name
+                                                    ov::AnyMap    // device configuration
+>;
+class CompileModelLoadFromMemoryTestBase : public testing::WithParamInterface<compileModelLoadFromMemoryParams>,
+                                           virtual public SubgraphBaseTest,
+                                           virtual public OVPluginTestBase {
+    std::string m_cacheFolderName;
+    std::string m_modelName;
+    std::string m_weightsName;
+    std::string m_model;
+    ov::Tensor m_weights;
+    std::vector<std::uint8_t> weights_vector;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<compileModelLoadFromMemoryParams> obj);
+
+    void SetUp() override;
+    void TearDown() override;
+    void run() override;
+};
+
 using compileKernelsCacheParams = std::tuple<
        std::string,                          // device name
        std::pair<ov::AnyMap, std::string>    // device and cache configuration
--- a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp
+++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp
@ -321,6 +321,98 @@ TEST_P(CompileModelLoadFromFileTestBase, CanLoadFromFileWithoutExecption) {
    run();
 }

+std::string CompileModelLoadFromMemoryTestBase::getTestCaseName(
+    testing::TestParamInfo<compileModelLoadFromMemoryParams> obj) {
+    auto param = obj.param;
+    auto deviceName = std::get<0>(param);
+    auto configuration = std::get<1>(param);
+    std::ostringstream result;
+    std::replace(deviceName.begin(), deviceName.end(), ':', '.');
+    result << "device_name=" << deviceName << "_";
+    for (auto& iter : configuration) {
+        result << "_" << iter.first << "_" << iter.second.as<std::string>() << "_";
+    }
+    return result.str();
+}
+
+void CompileModelLoadFromMemoryTestBase::SetUp() {
+    ovModelWithName funcPair;
+    std::tie(targetDevice, configuration) = GetParam();
+    target_device = targetDevice;
+    APIBaseTest::SetUp();
+    std::stringstream ss;
+    auto hash = std::hash<std::string>()(SubgraphBaseTest::GetTestName());
+    ss << "testCache_" << std::to_string(hash) << "_" << std::this_thread::get_id() << "_" << GetTimestamp();
+    m_modelName = ss.str() + ".xml";
+    m_weightsName = ss.str() + ".bin";
+    for (auto& iter : configuration) {
+        ss << "_" << iter.first << "_" << iter.second.as<std::string>() << "_";
+    }
+    m_cacheFolderName = ss.str();
+    core->set_property(ov::cache_dir());
+    ngraph::pass::Manager manager;
+    manager.register_pass<ov::pass::Serialize>(m_modelName, m_weightsName);
+    manager.run_passes(ngraph::builder::subgraph::makeConvPoolRelu(
+        {1, 3, 227, 227},
+        InferenceEngine::details::convertPrecision(InferenceEngine::Precision::FP32)));
+
+    try {
+        std::ifstream model_file(m_modelName, std::ios::binary);
+        std::stringstream ss;
+        ss << model_file.rdbuf();
+        m_model = ss.str();
+    } catch (const Exception& ex) {
+        GTEST_FAIL() << "Can't read xml file from: " << m_modelName << "\nException [" << ex.what() << "]" << std::endl;
+    }
+
+    try {
+        std::ifstream weights_file(m_weightsName, std::ios::binary);
+        weights_file.unsetf(std::ios::skipws);
+
+        weights_file.seekg(0, std::ios::end);
+        const auto weights_size = static_cast<std::size_t>(weights_file.tellg());
+        weights_file.seekg(0, std::ios::beg);
+
+        weights_vector.reserve(weights_size);
+        weights_vector.insert(weights_vector.begin(),
+                              std::istream_iterator<std::uint8_t>(weights_file),
+                              std::istream_iterator<std::uint8_t>());
+        m_weights = ov::Tensor(ov::element::u8, {1, 1, 1, weights_size}, weights_vector.data());
+    } catch (const Exception& ex) {
+        GTEST_FAIL() << "Can't read weights file from: " << m_weightsName << "\nException [" << ex.what() << "]"
+                     << std::endl;
+    }
+}
+
+void CompileModelLoadFromMemoryTestBase::TearDown() {
+    CommonTestUtils::removeFilesWithExt(m_cacheFolderName, "blob");
+    CommonTestUtils::removeFilesWithExt(m_cacheFolderName, "cl_cache");
+    CommonTestUtils::removeIRFiles(m_modelName, m_weightsName);
+    std::remove(m_cacheFolderName.c_str());
+    core->set_property(ov::cache_dir());
+    APIBaseTest::TearDown();
+    weights_vector.clear();
+}
+
+void CompileModelLoadFromMemoryTestBase::run() {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED();
+    core->set_property(ov::cache_dir(m_cacheFolderName));
+    try {
+        compiledModel = core->compile_model(m_model, m_weights, targetDevice, configuration);
+        inferRequest = compiledModel.create_infer_request();
+        inferRequest.infer();
+    } catch (const Exception& ex) {
+        GTEST_FAIL() << "Can't loadNetwork with model path " << m_modelName << "\nException [" << ex.what() << "]"
+                     << std::endl;
+    } catch (...) {
+        GTEST_FAIL() << "Can't compile network with model path " << m_modelName << std::endl;
+    }
+}
+
+TEST_P(CompileModelLoadFromMemoryTestBase, CanLoadFromMemoryWithoutExecption) {
+    run();
+}
+
 std::string CompiledKernelsCacheTest::getTestCaseName(testing::TestParamInfo<compileKernelsCacheParams> obj) {
    auto param = obj.param;
    std::string deviceName;
--- a/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp
+++ b/src/tests/ie_test_utils/unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp
@ -21,6 +21,13 @@ public:
            const std::string &,
            const std::map<std::string, std::string> &,
            const std::function<void(const InferenceEngine::CNNNetwork&)> &));
+    MOCK_METHOD5(
+        LoadNetwork,
+        InferenceEngine::SoExecutableNetworkInternal(const std::string&,
+                                                     const InferenceEngine::Blob::CPtr&,
+                                                     const std::string&,
+                                                     const std::map<std::string, std::string>&,
+                                                     const std::function<void(const InferenceEngine::CNNNetwork&)>&));

    MOCK_METHOD3(ImportNetwork, InferenceEngine::SoExecutableNetworkInternal(
        std::istream&, const std::string&, const std::map<std::string, std::string>&));