From 7cea7dd4e6a1c78efa6a5f570939e28a44b547a7 Mon Sep 17 00:00:00 2001 From: Mikhail Nosov Date: Wed, 16 Mar 2022 12:22:33 +0300 Subject: [PATCH] Docs: model caching page update according to OpenVINO API 2.0 (#10981) --- docs/OV_Runtime_UG/Model_caching_overview.md | 165 +++++++++---------- docs/img/caching_enabled.png | 4 +- docs/img/caching_times.png | 4 +- docs/snippets/InferenceEngine_Caching0.cpp | 17 -- docs/snippets/InferenceEngine_Caching1.cpp | 13 -- docs/snippets/InferenceEngine_Caching2.cpp | 14 -- docs/snippets/InferenceEngine_Caching3.cpp | 20 --- docs/snippets/ov_caching.cpp | 69 ++++++++ docs/snippets/ov_caching.py | 36 ++++ 9 files changed, 184 insertions(+), 158 deletions(-) delete mode 100644 docs/snippets/InferenceEngine_Caching0.cpp delete mode 100644 docs/snippets/InferenceEngine_Caching1.cpp delete mode 100644 docs/snippets/InferenceEngine_Caching2.cpp delete mode 100644 docs/snippets/InferenceEngine_Caching3.cpp create mode 100644 docs/snippets/ov_caching.cpp create mode 100644 docs/snippets/ov_caching.py diff --git a/docs/OV_Runtime_UG/Model_caching_overview.md b/docs/OV_Runtime_UG/Model_caching_overview.md index 074f471b4de..70505abfc4a 100644 --- a/docs/OV_Runtime_UG/Model_caching_overview.md +++ b/docs/OV_Runtime_UG/Model_caching_overview.md @@ -1,59 +1,95 @@ # Model Caching Overview {#openvino_docs_IE_DG_Model_caching_overview} -## Introduction (C++) +## Introduction -@sphinxdirective -.. raw:: html +As described in the [Integrate OpenVINO™ with Your Application](integrate_with_your_application.md), a common application flow consists of the following steps: -
C++
-@endsphinxdirective +1. **Create a Core object**: First step to manage available devices and read model objects -As described in the [OpenVINO™ Runtime User Guide](openvino_intro.md), a common application flow consists of the following steps: - -1. **Create a Core object**: First step to manage available devices and read network objects - -2. **Read the Intermediate Representation**: Read an Intermediate Representation file into an object of the `InferenceEngine::CNNNetwork` +2. **Read the Intermediate Representation**: Read an Intermediate Representation file into an object of the `ov::Model` 3. **Prepare inputs and outputs**: If needed, manipulate precision, memory layout, size or color format 4. **Set configuration**: Pass device-specific loading configurations to the device -5. **Compile and Load Network to device**: Use the `InferenceEngine::Core::LoadNetwork()` method with a specific device +5. **Compile and Load Network to device**: Use the `ov::Core::compile_model()` method with a specific device -6. **Set input data**: Specify input blob +6. **Set input data**: Specify input tensor 7. **Execute**: Carry out inference and process results Step 5 can potentially perform several time-consuming device-specific optimizations and network compilations, and such delays can lead to a bad user experience on application startup. To avoid this, some devices offer import/export network capability, and it is possible to either use the [Compile tool](../../tools/compile_tool/README.md) -or enable model caching to export compiled network automatically. Reusing cached networks can significantly reduce load network time. +or enable model caching to export compiled model automatically. Reusing cached model can significantly reduce compile model time. -### Set "CACHE_DIR" config option to enable model caching +### Set "cache_dir" config option to enable model caching To enable model caching, the application must specify a folder to store cached blobs, which is done like this: -@snippet snippets/InferenceEngine_Caching0.cpp part0 +@sphinxdirective -With this code, if the device specified by `LoadNetwork` supports import/export network capability, a cached blob is automatically created inside the `myCacheFolder` folder. -CACHE_DIR config is set to the Core object. If the device does not support import/export capability, cache is not created and no error is thrown. +.. tab:: C++ -Depending on your device, total time for loading network on application startup can be significantly reduced. -Also note that the very first LoadNetwork (when cache is not yet created) takes slightly longer time to "export" the compiled blob into a cache file: + .. doxygensnippet:: docs/snippets/ov_caching.cpp + :language: cpp + :fragment: [ov:caching:part0] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_caching.py + :language: python + :fragment: [ov:caching:part0] + +@endsphinxdirective + +With this code, if the device specified by `device_name` supports import/export model capability, a cached blob is automatically created inside the `/path/to/cache/dir` folder. +If the device does not support import/export capability, cache is not created and no error is thrown. + +Depending on your device, total time for compiling model on application startup can be significantly reduced. +Also note that the very first `compile_model` (when cache is not yet created) takes slightly longer time to "export" the compiled blob into a cache file: ![caching_enabled] -### Even faster: use LoadNetwork(modelPath) +### Even faster: use compile_model(modelPath) -In some cases, applications do not need to customize inputs and outputs every time. Such an application always -call `cnnNet = ie.ReadNetwork(...)`, then `ie.LoadNetwork(cnnNet, ..)` and it can be further optimized. -For these cases, the 2021.4 release introduces a more convenient API to load the network in a single call, skipping the export step: +In some cases, applications do not need to customize inputs and outputs every time. Such application always +call `model = core.read_model(...)`, then `core.compile_model(model, ..)` and it can be further optimized. +For these cases, there is a more convenient API to compile the model in a single call, skipping the read step: -@snippet snippets/InferenceEngine_Caching1.cpp part1 +@sphinxdirective -With model caching enabled, total load time is even smaller, if ReadNetwork is optimized as well. +.. tab:: C++ -@snippet snippets/InferenceEngine_Caching2.cpp part2 + .. doxygensnippet:: docs/snippets/ov_caching.cpp + :language: cpp + :fragment: [ov:caching:part1] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_caching.py + :language: python + :fragment: [ov:caching:part1] + +@endsphinxdirective + +With model caching enabled, total load time is even smaller, if `read_model` is optimized as well. + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_caching.cpp + :language: cpp + :fragment: [ov:caching:part2] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_caching.py + :language: python + :fragment: [ov:caching:part2] + +@endsphinxdirective ![caching_times] @@ -62,74 +98,23 @@ With model caching enabled, total load time is even smaller, if ReadNetwork is o Not every device supports network import/export capability. For those that don't, enabling caching has no effect. To check in advance if a particular device supports model caching, your application can use the following code: -@snippet snippets/InferenceEngine_Caching3.cpp part3 - -## Introduction (Python) - @sphinxdirective -.. raw:: html -
Python
+.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_caching.cpp + :language: cpp + :fragment: [ov:caching:part3] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_caching.py + :language: python + :fragment: [ov:caching:part3] + @endsphinxdirective -As described in OpenVINO User Guide, a common application flow consists of the following steps: - -1. **Create a Core Object** -2. **Read the Intermediate Representation** - Read an Intermediate Representation file into an object of the [ie_api.IENetwork](api/ie_python_api/_autosummary/openvino.inference_engine.IENetwork.html) -3. **Prepare inputs and outputs** -4. **Set configuration** - Pass device-specific loading configurations to the device -5. **Compile and Load Network to device** - Use the `IECore.load_network()` method and specify the target device -6. **Set input data** -7. **Execute the model** - Run inference - -Step #5 can potentially perform several time-consuming device-specific optimizations and network compilations, and such delays can lead to bad user experience on application startup. To avoid this, some devices offer Import/Export network capability, and it is possible to either use the [Compile tool](../../tools/compile_tool/README.md) or enable model caching to export the compiled network automatically. Reusing cached networks can significantly reduce load network time. - -### Set the “CACHE_DIR” config option to enable model caching - -To enable model caching, the application must specify the folder where to store cached blobs. It can be done using [IECore.set_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.set_config). - -``` python -from openvino.inference_engine import IECore - -ie = IECore() -ie.set_config(config={"CACHE_DIR": path_to_cache}, device_name=device) -net = ie.read_network(model=path_to_xml_file) -exec_net = ie.load_network(network=net, device_name=device) -``` - -With this code, if a device supports the Import/Export network capability, a cached blob is automatically created inside the path_to_cache directory `CACHE_DIR` config is set to the Core object. If device does not support Import/Export capability, cache is just not created and no error is thrown - -Depending on your device, total time for loading network on application startup can be significantly reduced. Please also note that very first [IECore.load_network](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.load_network) (when the cache is not yet created) takes slightly longer time to ‘export’ the compiled blob into a cache file. - -![caching_enabled] - - -### Even Faster: Use IECore.load_network(path_to_xml_file) - -In some cases, applications do not need to customize inputs and outputs every time. These applications always call [IECore.read_network](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.read_network), then `IECore.load_network(model=path_to_xml_file)` and may be further optimized. For such cases, it's more convenient to load the network in a single call to `ie.load_network()` -A model can be loaded directly to the device, with model caching enabled: - -``` python -from openvino.inference_engine import IECore - -ie = IECore() -ie.set_config(config={"CACHE_DIR" : path_to_cache}, device_name=device) -ie.load_network(network=path_to_xml_file, device_name=device) -``` - -![caching_times] - -### Advanced Examples - -Not every device supports network import/export capability, enabling of caching for such devices does not have any effect. To check in advance if a particular device supports model caching, your application can use the following code: - -```python -all_metrics = ie.get_metric(device_name=device, metric_name="SUPPORTED_METRICS") -# Find the 'IMPORT_EXPORT_SUPPORT' metric in supported metrics -allows_caching = "IMPORT_EXPORT_SUPPORT" in all_metrics -``` - -> **NOTE**: The GPU plugin does not have the IMPORT_EXPORT_SUPPORT capability, and does not support model caching yet. However, the GPU plugin supports caching kernels (see the [GPU plugin documentation](supported_plugins/GPU.md)). Kernel caching for the GPU plugin can be accessed the same way as model caching: by setting the `CACHE_DIR` configuration key to a folder where the cache should be stored. +> **NOTE**: The GPU plugin does not have the EXPORT_IMPORT capability, and does not support model caching yet. However, the GPU plugin supports caching kernels (see the [GPU plugin documentation](supported_plugins/GPU.md)). Kernel caching for the GPU plugin can be accessed the same way as model caching: by setting the `CACHE_DIR` configuration key to a folder where the cache should be stored. [caching_enabled]: ../img/caching_enabled.png diff --git a/docs/img/caching_enabled.png b/docs/img/caching_enabled.png index f8a898764e1..2cc080c118b 100644 --- a/docs/img/caching_enabled.png +++ b/docs/img/caching_enabled.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:488a7a47e5086a6868c22219bc9d58a3508059e5a1dc470f2653a12552dea82f -size 36207 +oid sha256:ecf560b08b921da29d59a3c1f6332d092a0575dd00cf59806dc801c32a10790f +size 120241 diff --git a/docs/img/caching_times.png b/docs/img/caching_times.png index 11d9c8b088f..fa67a63f3fc 100644 --- a/docs/img/caching_times.png +++ b/docs/img/caching_times.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2eed189f9cb3d30fe13b4ba4515edd4e6da5d01545660e65fa8a33d945967281 -size 28894 +oid sha256:357483dd3460848e98489073cd9d58b5c8ada9ec3df4fbfd0956ba9e779f9c15 +size 79843 diff --git a/docs/snippets/InferenceEngine_Caching0.cpp b/docs/snippets/InferenceEngine_Caching0.cpp deleted file mode 100644 index 5311a3d0bb6..00000000000 --- a/docs/snippets/InferenceEngine_Caching0.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include - -int main() { -using namespace InferenceEngine; - std::string modelPath = "/tmp/myModel.xml"; - std::string device = "GNA"; - std::map deviceConfig; -//! [part0] - InferenceEngine::Core ie; // Step 1: create Inference engine object - ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "myCacheFolder"}}); // Step 1b: Enable caching - auto cnnNet = ie.ReadNetwork(modelPath); // Step 2: ReadNetwork - //... // Step 3: Prepare inputs/outputs - //... // Step 4: Set device configuration - ie.LoadNetwork(cnnNet, device, deviceConfig); // Step 5: LoadNetwork -//! [part0] -return 0; -} diff --git a/docs/snippets/InferenceEngine_Caching1.cpp b/docs/snippets/InferenceEngine_Caching1.cpp deleted file mode 100644 index 3c9d0c5b22d..00000000000 --- a/docs/snippets/InferenceEngine_Caching1.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include - -int main() { -using namespace InferenceEngine; - std::string modelPath = "/tmp/myModel.xml"; - std::string device = "GNA"; - std::map deviceConfig; -//! [part1] - InferenceEngine::Core ie; // Step 1: create Inference engine object - ie.LoadNetwork(modelPath, device, deviceConfig); // Step 2: LoadNetwork by model file path -//! [part1] -return 0; -} diff --git a/docs/snippets/InferenceEngine_Caching2.cpp b/docs/snippets/InferenceEngine_Caching2.cpp deleted file mode 100644 index aaf4b33c10d..00000000000 --- a/docs/snippets/InferenceEngine_Caching2.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include - -int main() { -using namespace InferenceEngine; - std::string modelPath = "/tmp/myModel.xml"; - std::string device = "GNA"; - std::map deviceConfig; -//! [part2] - InferenceEngine::Core ie; // Step 1: create Inference engine object - ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "myCacheFolder"}}); // Step 1b: Enable caching - ie.LoadNetwork(modelPath, device, deviceConfig); // Step 2: LoadNetwork by model file path -//! [part2] -return 0; -} diff --git a/docs/snippets/InferenceEngine_Caching3.cpp b/docs/snippets/InferenceEngine_Caching3.cpp deleted file mode 100644 index db6cd89e5c6..00000000000 --- a/docs/snippets/InferenceEngine_Caching3.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include - -int main() { -using namespace InferenceEngine; - std::string modelPath = "/tmp/myModel.xml"; - std::string deviceName = "GNA"; - std::map deviceConfig; - InferenceEngine::Core ie; -//! [part3] - // Get list of supported metrics - std::vector keys = ie.GetMetric(deviceName, METRIC_KEY(SUPPORTED_METRICS)); - - // Find 'IMPORT_EXPORT_SUPPORT' metric in supported metrics - auto it = std::find(keys.begin(), keys.end(), METRIC_KEY(IMPORT_EXPORT_SUPPORT)); - - // If metric 'IMPORT_EXPORT_SUPPORT' exists, check it's value - auto cachingSupported = (it != keys.end()) && ie.GetMetric(deviceName, METRIC_KEY(IMPORT_EXPORT_SUPPORT)).as(); -//! [part3] - return 0; -} diff --git a/docs/snippets/ov_caching.cpp b/docs/snippets/ov_caching.cpp new file mode 100644 index 00000000000..b4221f3473b --- /dev/null +++ b/docs/snippets/ov_caching.cpp @@ -0,0 +1,69 @@ +#include + +void part0() { + std::string modelPath = "/tmp/myModel.xml"; + std::string device = "GNA"; + ov::AnyMap config; +//! [ov:caching:part0] +ov::Core core; // Step 1: create ov::Core object +core.set_property(ov::cache_dir("/path/to/cache/dir")); // Step 1b: Enable caching +auto model = core.read_model(modelPath); // Step 2: Read Model +//... // Step 3: Prepare inputs/outputs +//... // Step 4: Set device configuration +auto compiled = core.compile_model(model, device, config); // Step 5: LoadNetwork +//! [ov:caching:part0] + if (!compiled) { + throw std::runtime_error("error"); + } +} + +void part1() { + std::string modelPath = "/tmp/myModel.xml"; + std::string device = "GNA"; + ov::AnyMap config; +//! [ov:caching:part1] +ov::Core core; // Step 1: create ov::Core object +auto compiled = core.compile_model(modelPath, device, config); // Step 2: Compile model by file path +//! [ov:caching:part1] + if (!compiled) { + throw std::runtime_error("error"); + } +} + +void part2() { + std::string modelPath = "/tmp/myModel.xml"; + std::string device = "GNA"; + ov::AnyMap config; +//! [ov:caching:part2] +ov::Core core; // Step 1: create ov::Core object +core.set_property(ov::cache_dir("/path/to/cache/dir")); // Step 1b: Enable caching +auto compiled = core.compile_model(modelPath, device, config); // Step 2: Compile model by file path +//! [ov:caching:part2] + if (!compiled) { + throw std::runtime_error("error"); + } +} + +void part3() { + std::string deviceName = "GNA"; + ov::AnyMap config; + ov::Core core; +//! [ov:caching:part3] +// Get list of supported device capabilities +std::vector caps = core.get_property(deviceName, ov::device::capabilities); + +// Find 'EXPORT_IMPORT' capability in supported capabilities +bool cachingSupported = std::find(caps.begin(), caps.end(), ov::device::capability::EXPORT_IMPORT) != caps.end(); +//! [ov:caching:part3] + if (!cachingSupported) { + throw std::runtime_error("GNA should support model caching"); + } +} + +int main() { + part0(); + part1(); + part2(); + part3(); + return 0; +} \ No newline at end of file diff --git a/docs/snippets/ov_caching.py b/docs/snippets/ov_caching.py new file mode 100644 index 00000000000..3aa400fe1fa --- /dev/null +++ b/docs/snippets/ov_caching.py @@ -0,0 +1,36 @@ +# Copyright (C) 2018-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +from openvino.runtime import Core + +device_name = 'GNA' +xml_path = '/tmp/myModel.xml' +# ! [ov:caching:part0] +core = Core() +core.set_property({'CACHE_DIR': '/path/to/cache/dir'}) +model = core.read_model(model=xml_path) +compiled_model = core.compile_model(model=model, device_name=device_name) +# ! [ov:caching:part0] + +assert compiled_model + +# ! [ov:caching:part1] +core = Core() +compiled_model = core.compile_model(model_path=xml_path, device_name=device_name) +# ! [ov:caching:part1] + +assert compiled_model + +# ! [ov:caching:part2] +core = Core() +core.set_property({'CACHE_DIR': '/path/to/cache/dir'}) +compiled_model = core.compile_model(model_path=xml_path, device_name=device_name) +# ! [ov:caching:part2] + +assert compiled_model + +# ! [ov:caching:part3] +# Find 'EXPORT_IMPORT' capability in supported capabilities +caching_supported = 'EXPORT_IMPORT' in core.get_property(device_name, 'OPTIMIZATION_CAPABILITIES') +# ! [ov:caching:part3]