Compare commits
72 Commits
2023.2.0.d
...
releases/2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2eb6fbcca1 | ||
|
|
1bf8a41ff6 | ||
|
|
c5f7ad383e | ||
|
|
cccff7fe0d | ||
|
|
b33800a61c | ||
|
|
320887b424 | ||
|
|
5f2e584231 | ||
|
|
ba35364a53 | ||
|
|
1c84064e06 | ||
|
|
c370284bc4 | ||
|
|
81b0aec201 | ||
|
|
f37e14c614 | ||
|
|
7fe8264703 | ||
|
|
a5cfe0ecb2 | ||
|
|
22cf9efcdc | ||
|
|
1fdc9e372f | ||
|
|
18e83a2177 | ||
|
|
ef892a01df | ||
|
|
7965797103 | ||
|
|
28be0a0452 | ||
|
|
8db14b987c | ||
|
|
9910725a7b | ||
|
|
c72f2ef88c | ||
|
|
a135ab64af | ||
|
|
9a7e939a6f | ||
|
|
f9075c808a | ||
|
|
60059f2c75 | ||
|
|
58012dcd71 | ||
|
|
71f27da6a8 | ||
|
|
35a898ab85 | ||
|
|
a5d9f96efd | ||
|
|
1ef890329c | ||
|
|
a86ae42aed | ||
|
|
cef0696ef7 | ||
|
|
e57a96474d | ||
|
|
ed052022d3 | ||
|
|
6eda5c39c6 | ||
|
|
0100810dd6 | ||
|
|
882e377ef9 | ||
|
|
c9d5d95e2c | ||
|
|
d77bc36dcd | ||
|
|
19e1b6002e | ||
|
|
6bcd0f6072 | ||
|
|
0e8534a4a9 | ||
|
|
08d7c3e75f | ||
|
|
821d513150 | ||
|
|
38a48b9cbf | ||
|
|
c6d8905a88 | ||
|
|
3a80652d70 | ||
|
|
120d3a596d | ||
|
|
25af83db81 | ||
|
|
03c6f4e3fe | ||
|
|
5d3d323bed | ||
|
|
a53524a554 | ||
|
|
02d2dbd0fa | ||
|
|
bfe0748b4c | ||
|
|
d78577aecb | ||
|
|
e09f0e4808 | ||
|
|
ff73955354 | ||
|
|
18cb230af4 | ||
|
|
9067a25616 | ||
|
|
c4ff0ffa9d | ||
|
|
4675a12c8f | ||
|
|
3cd5da0797 | ||
|
|
9b402f226f | ||
|
|
784adca70a | ||
|
|
8e1603f7fd | ||
|
|
66ede40e4e | ||
|
|
40a29a7aa3 | ||
|
|
a7e00dae54 | ||
|
|
4c40494605 | ||
|
|
3e2a4a5df1 |
@@ -4,11 +4,13 @@ resources:
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/openvino_contrib
|
||||
ref: releases/2021/3
|
||||
|
||||
- repository: testdata
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/testdata
|
||||
ref: releases/2021/3
|
||||
|
||||
jobs:
|
||||
- job: Lin
|
||||
|
||||
@@ -64,13 +64,13 @@ jobs:
|
||||
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
#-DENABLE_PROFILING_ITT=ON
|
||||
#-DSELECTIVE_BUILD=COLLECT
|
||||
cmakeArgs: >
|
||||
-GNinja
|
||||
-DVERBOSE_BUILD=ON
|
||||
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
|
||||
-DENABLE_FASTER_BUILD=ON
|
||||
-DENABLE_PROFILING_ITT=ON
|
||||
-DSELECTIVE_BUILD=COLLECT
|
||||
$(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
|
||||
|
||||
@@ -4,11 +4,13 @@ resources:
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/openvino_contrib
|
||||
ref: releases/2021/3
|
||||
|
||||
- repository: testdata
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/testdata
|
||||
ref: releases/2021/3
|
||||
|
||||
jobs:
|
||||
- job: Mac
|
||||
|
||||
@@ -4,11 +4,13 @@ resources:
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/openvino_contrib
|
||||
ref: releases/2021/3
|
||||
|
||||
- repository: testdata
|
||||
type: github
|
||||
endpoint: openvinotoolkit
|
||||
name: openvinotoolkit/testdata
|
||||
ref: releases/2021/3
|
||||
|
||||
jobs:
|
||||
- job: Win
|
||||
@@ -36,7 +38,7 @@ jobs:
|
||||
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
|
||||
IB_DIR: C:\Program Files (x86)\IncrediBuild
|
||||
IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
|
||||
TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.1\opencv\bin;$(IB_DIR);%PATH%
|
||||
TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.2\opencv\bin;$(IB_DIR);%PATH%
|
||||
|
||||
steps:
|
||||
- script: |
|
||||
|
||||
89
.ci/azure/windows_conditional_compilation.yml
Normal file
89
.ci/azure/windows_conditional_compilation.yml
Normal file
@@ -0,0 +1,89 @@
|
||||
jobs:
|
||||
- job: WinCC
|
||||
# About 150% of total time
|
||||
timeoutInMinutes: 120
|
||||
|
||||
pool:
|
||||
name: WIN_VMSS_VENV_F8S_WU2
|
||||
|
||||
variables:
|
||||
system.debug: true
|
||||
VSTS_HTTP_RETRY: 5
|
||||
VSTS_HTTP_TIMEOUT: 200
|
||||
WORKERS_NUMBER: 8
|
||||
BUILD_TYPE: Release
|
||||
REPO_DIR: $(Build.Repository.LocalPath)
|
||||
OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)\..\openvino_contrib
|
||||
MODELS_PATH: $(REPO_DIR)\..\testdata
|
||||
WORK_DIR: $(Pipeline.Workspace)\_w
|
||||
BUILD_DIR: D:\build
|
||||
BIN_DIR: $(REPO_DIR)\bin\intel64
|
||||
MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe
|
||||
INSTALL_DIR: $(WORK_DIR)\install_pkg
|
||||
SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
|
||||
IB_DIR: C:\Program Files (x86)\IncrediBuild
|
||||
IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
|
||||
TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.2\opencv\bin;$(IB_DIR);%PATH%
|
||||
|
||||
steps:
|
||||
- script: |
|
||||
powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom"
|
||||
where python3
|
||||
where python
|
||||
python --version
|
||||
where java
|
||||
java -version
|
||||
wmic computersystem get TotalPhysicalMemory
|
||||
wmic cpu list
|
||||
wmic logicaldisk get description,name
|
||||
wmic VOLUME list
|
||||
set
|
||||
displayName: 'System info'
|
||||
|
||||
- script: |
|
||||
rd /Q /S $(WORK_DIR) & mkdir $(WORK_DIR)
|
||||
rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR)
|
||||
displayName: 'Make dir'
|
||||
|
||||
- script: |
|
||||
certutil -urlcache -split -f https://incredibuilddiag1wu2.blob.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
|
||||
call install_ib_console.bat
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Install IncrediBuild'
|
||||
|
||||
- checkout: self
|
||||
clean: true
|
||||
lfs: false
|
||||
submodules: recursive
|
||||
path: openvino
|
||||
|
||||
- script: |
|
||||
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip
|
||||
powershell -command "Expand-Archive -Force ninja-win.zip"
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'CMake'
|
||||
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Build Win'
|
||||
|
||||
- script: dir $(REPO_DIR)\bin\ /s
|
||||
displayName: 'List files'
|
||||
|
||||
- script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Install'
|
||||
|
||||
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
|
||||
displayName: Stop IncrediBuild
|
||||
continueOnError: true
|
||||
enabled: false
|
||||
12
.github/workflows/mo.yml
vendored
12
.github/workflows/mo.yml
vendored
@@ -80,7 +80,17 @@ jobs:
|
||||
python3 setup.py sdist bdist_wheel
|
||||
working-directory: model-optimizer
|
||||
|
||||
- name: Test
|
||||
- name: Test package content
|
||||
run: |
|
||||
echo "src = open('openvino_mo.egg-info/SOURCES.txt', 'rt').read().split()" | tee -a test_wheel.py
|
||||
echo "ref = open('automation/package_BOM.txt', 'rt').read().split()" | tee -a test_wheel.py
|
||||
echo "for name in ref:" | tee -a test_wheel.py
|
||||
echo " if name.endswith('.py'):" | tee -a test_wheel.py
|
||||
echo " assert name in src or './' + name in src, name + ' file missed'" | tee -a test_wheel.py
|
||||
python3 test_wheel.py
|
||||
working-directory: model-optimizer
|
||||
|
||||
- name: Test conversion
|
||||
run: |
|
||||
wget -q http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz
|
||||
tar -xf mobilenet_v1_1.0_224.tgz
|
||||
|
||||
17
README.md
17
README.md
@@ -1,5 +1,5 @@
|
||||
# [OpenVINO™ Toolkit](https://01.org/openvinotoolkit) - Deep Learning Deployment Toolkit repository
|
||||
[](https://github.com/openvinotoolkit/openvino/releases/tag/2021.2)
|
||||
# OpenVINO™ Toolkit
|
||||
[](https://github.com/openvinotoolkit/openvino/releases/tag/2021.3)
|
||||
[](LICENSE)
|
||||

|
||||

|
||||
@@ -7,7 +7,7 @@
|
||||
This toolkit allows developers to deploy pre-trained deep learning models
|
||||
through a high-level C++ Inference Engine API integrated with application logic.
|
||||
|
||||
This open source version includes several components: namely [Model Optimizer], [ngraph] and
|
||||
This open source version includes several components: namely [Model Optimizer], [nGraph] and
|
||||
[Inference Engine], as well as CPU, GPU, MYRIAD, multi device and heterogeneous plugins to accelerate deep learning inferencing on Intel® CPUs and Intel® Processor Graphics.
|
||||
It supports pre-trained models from the [Open Model Zoo], along with 100+ open
|
||||
source and public models in popular formats such as Caffe\*, TensorFlow\*,
|
||||
@@ -15,7 +15,7 @@ MXNet\* and ONNX\*.
|
||||
|
||||
## Repository components:
|
||||
* [Inference Engine]
|
||||
* [ngraph]
|
||||
* [nGraph]
|
||||
* [Model Optimizer]
|
||||
|
||||
## License
|
||||
@@ -27,9 +27,10 @@ and release your contribution under these terms.
|
||||
* Docs: https://docs.openvinotoolkit.org/
|
||||
* Wiki: https://github.com/openvinotoolkit/openvino/wiki
|
||||
* Issue tracking: https://github.com/openvinotoolkit/openvino/issues
|
||||
* Additional OpenVINO modules: https://github.com/openvinotoolkit/openvino_contrib
|
||||
* [HomePage](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
|
||||
* [OpenVINO™ Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
|
||||
* Storage: https://storage.openvinotoolkit.org/
|
||||
* Additional OpenVINO™ modules: https://github.com/openvinotoolkit/openvino_contrib
|
||||
* [Intel® Distribution of OpenVINO™ toolkit Product Page](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
|
||||
* [Intel® Distribution of OpenVINO™ toolkit Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
|
||||
|
||||
## Support
|
||||
Please report questions, issues and suggestions using:
|
||||
@@ -45,4 +46,4 @@ Please report questions, issues and suggestions using:
|
||||
[Inference Engine]:https://software.intel.com/en-us/articles/OpenVINO-InferEngine
|
||||
[Model Optimizer]:https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer
|
||||
[tag on StackOverflow]:https://stackoverflow.com/search?q=%23openvino
|
||||
[ngraph]:https://docs.openvinotoolkit.org/latest/openvino_docs_nGraph_DG_DevGuide.html
|
||||
[nGraph]:https://docs.openvinotoolkit.org/latest/openvino_docs_nGraph_DG_DevGuide.html
|
||||
|
||||
@@ -337,7 +337,7 @@ operation for the CPU plugin. The code of the library is described in the [Exte
|
||||
In order to build the extension run the following:<br>
|
||||
```bash
|
||||
mkdir build && cd build
|
||||
source /opt/intel/openvino/bin/setupvars.sh
|
||||
source /opt/intel/openvino_2021/bin/setupvars.sh
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release
|
||||
make --jobs=$(nproc)
|
||||
```
|
||||
@@ -368,7 +368,7 @@ python3 mri_reconstruction_demo.py \
|
||||
- [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md)
|
||||
- [Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md)
|
||||
- [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md)
|
||||
- [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index)
|
||||
- [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_group_intel)
|
||||
- For IoT Libraries and Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit).
|
||||
|
||||
## Converting Models:
|
||||
|
||||
@@ -2,6 +2,19 @@
|
||||
|
||||
The sections below contain detailed list of changes made to the Inference Engine API in recent releases.
|
||||
|
||||
## 2021.3
|
||||
|
||||
### New API
|
||||
|
||||
* InferenceEngine::InferRequest::Cancel to cancel inference request execution
|
||||
* InferenceEngine::Layout::HWC to support HWC layout for input or output blobs
|
||||
* InferenceEngine::Precision::F64 data precision for f64 data type
|
||||
* InferenceEngine::CNNNetwork::getOVNameForTensor to map frameworks tensor names to OpenVINO internal tensor names
|
||||
|
||||
### Deprecated API
|
||||
|
||||
* InferenceEngine::IVariableState interface is deprecated, use InferenceEngine::VariableState wrapper
|
||||
|
||||
## 2021.2
|
||||
|
||||
### New API
|
||||
|
||||
@@ -1,88 +1,122 @@
|
||||
# Inference Engine Developer Guide {#openvino_docs_IE_DG_Deep_Learning_Inference_Engine_DevGuide}
|
||||
|
||||
## Introduction to the OpenVINO™ Toolkit
|
||||
> **NOTE:** [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
|
||||
|
||||
The OpenVINO™ toolkit is a comprehensive toolkit that you can use to develop and deploy vision-oriented solutions on
|
||||
Intel® platforms. Vision-oriented means the solutions use images or videos to perform specific tasks.
|
||||
A few of the solutions use cases include autonomous navigation, digital surveillance cameras, robotics,
|
||||
and mixed-reality headsets.
|
||||
|
||||
The OpenVINO™ toolkit:
|
||||
|
||||
* Enables CNN-based deep learning inference on the edge
|
||||
* Supports heterogeneous execution across an Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2
|
||||
* Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
|
||||
* Includes optimized calls for computer vision standards including OpenCV\*, OpenCL™, and OpenVX\*
|
||||
|
||||
The OpenVINO™ toolkit includes the following components:
|
||||
|
||||
* Intel® Deep Learning Deployment Toolkit (Intel® DLDT)
|
||||
- [Deep Learning Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) — A cross-platform command-line tool for importing models and
|
||||
preparing them for optimal execution with the Deep Learning Inference Engine. The Model Optimizer supports converting Caffe*,
|
||||
TensorFlow*, MXNet*, Kaldi*, ONNX* models.
|
||||
- [Deep Learning Inference Engine](inference_engine_intro.md) — A unified API to allow high performance inference on many hardware types
|
||||
including Intel® CPU, Intel® Processor Graphics, Intel® FPGA, Intel® Neural Compute Stick 2.
|
||||
- [nGraph](../nGraph_DG/nGraph_dg.md) — graph representation and manipulation engine which is used to represent a model inside Inference Engine and allows the run-time model construction without using Model Optimizer.
|
||||
* [OpenCV](https://docs.opencv.org/) — OpenCV* community version compiled for Intel® hardware.
|
||||
Includes PVL libraries for computer vision.
|
||||
* Drivers and runtimes for OpenCL™ version 2.1
|
||||
* [Intel® Media SDK](https://software.intel.com/en-us/media-sdk)
|
||||
* [OpenVX*](https://software.intel.com/en-us/cvsdk-ovx-guide) — Intel's implementation of OpenVX*
|
||||
optimized for running on Intel® hardware (CPU, GPU, IPU).
|
||||
* [Demos and samples](Samples_Overview.md).
|
||||
|
||||
|
||||
This Guide provides overview of the Inference Engine describing the typical workflow for performing
|
||||
This Guide provides an overview of the Inference Engine describing the typical workflow for performing
|
||||
inference of a pre-trained and optimized deep learning model and a set of sample applications.
|
||||
|
||||
> **NOTES:**
|
||||
> - Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in run-time using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_intel_index).
|
||||
> - [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
|
||||
> **NOTE:** Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in run-time using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_group_intel).
|
||||
|
||||
After you have used the Model Optimizer to create an Intermediate Representation (IR), use the Inference Engine to infer the result for a given input data.
|
||||
|
||||
## Table of Contents
|
||||
Inference Engine is a set of C++ libraries providing a common API to deliver inference solutions on the platform of your choice: CPU, GPU, or VPU. Use the Inference Engine API to read the Intermediate Representation, set the input and output formats, and execute the model on devices. While the C++ libraries is the primary implementation, C libraries and Python bindings are also available.
|
||||
|
||||
* [Inference Engine API Changes History](API_Changes.md)
|
||||
For Intel® Distribution of OpenVINO™ toolkit, Inference Engine binaries are delivered within release packages.
|
||||
|
||||
* [Introduction to Inference Engine](inference_engine_intro.md)
|
||||
The open source version is available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino) and can be built for supported platforms using the <a href="https://github.com/openvinotoolkit/openvino/wiki/BuildingCode">Inference Engine Build Instructions</a>.
|
||||
|
||||
* [Understanding Inference Engine Memory Primitives](Memory_primitives.md)
|
||||
To learn about how to use the Inference Engine API for your application, see the [Integrating Inference Engine in Your Application](Integrate_with_customer_application_new_API.md) documentation.
|
||||
|
||||
* [Introduction to Inference Engine Device Query API](InferenceEngine_QueryAPI.md)
|
||||
For complete API Reference, see the [Inference Engine API References](./api_references.html) section.
|
||||
|
||||
* [Adding Your Own Layers to the Inference Engine](Extensibility_DG/Intro.md)
|
||||
Inference Engine uses a plugin architecture. Inference Engine plugin is a software component that contains complete implementation for inference on a certain Intel® hardware device: CPU, GPU, VPU, etc. Each plugin implements the unified API and provides additional hardware-specific APIs.
|
||||
|
||||
* [Integrating Inference Engine in Your Application](Integrate_with_customer_application_new_API.md)
|
||||
## Modules in the Inference Engine component
|
||||
### Core Inference Engine Libraries ###
|
||||
|
||||
* [[DEPRECATED] Migration from Inference Engine Plugin API to Core API](Migration_CoreAPI.md)
|
||||
Your application must link to the core Inference Engine libraries:
|
||||
* Linux* OS:
|
||||
- `libinference_engine.so`, which depends on `libinference_engine_transformations.so`, `libtbb.so`, `libtbbmalloc.so` and `libngraph.so`
|
||||
* Windows* OS:
|
||||
- `inference_engine.dll`, which depends on `inference_engine_transformations.dll`, `tbb.dll`, `tbbmalloc.dll` and `ngraph.dll`
|
||||
* macOS*:
|
||||
- `libinference_engine.dylib`, which depends on `libinference_engine_transformations.dylib`, `libtbb.dylib`, `libtbbmalloc.dylib` and `libngraph.dylib`
|
||||
|
||||
* [Introduction to Performance Topics](Intro_to_Performance.md)
|
||||
The required C++ header files are located in the `include` directory.
|
||||
|
||||
* [Inference Engine Python API Overview](../../inference-engine/ie_bridges/python/docs/api_overview.md)
|
||||
This library contains the classes to:
|
||||
* Create Inference Engine Core object to work with devices and read network (InferenceEngine::Core)
|
||||
* Manipulate network information (InferenceEngine::CNNNetwork)
|
||||
* Execute and pass inputs and outputs (InferenceEngine::ExecutableNetwork and InferenceEngine::InferRequest)
|
||||
|
||||
* [Using Dynamic Batching feature](DynamicBatching.md)
|
||||
### Plugin Libraries to Read a Network Object ###
|
||||
|
||||
* [Using Static Shape Infer feature](ShapeInference.md)
|
||||
Starting from 2020.4 release, Inference Engine introduced a concept of `CNNNetwork` reader plugins. Such plugins can be automatically dynamically loaded by Inference Engine in runtime depending on file format:
|
||||
* Linux* OS:
|
||||
- `libinference_engine_ir_reader.so` to read a network from IR
|
||||
- `libinference_engine_onnx_reader.so` to read a network from ONNX model format
|
||||
* Windows* OS:
|
||||
- `inference_engine_ir_reader.dll` to read a network from IR
|
||||
- `inference_engine_onnx_reader.dll` to read a network from ONNX model format
|
||||
|
||||
* [Using Low-Precision 8-bit Integer Inference](Int8Inference.md)
|
||||
### Device-Specific Plugin Libraries ###
|
||||
|
||||
* [Using Bfloat16 Inference](Bfloat16Inference.md)
|
||||
For each supported target device, Inference Engine provides a plugin — a DLL/shared library that contains complete implementation for inference on this particular device. The following plugins are available:
|
||||
|
||||
* Utilities to Validate Your Converted Model
|
||||
* [Using Cross Check Tool for Per-Layer Comparison Between Plugins](../../inference-engine/tools/cross_check_tool/README.md)
|
||||
| Plugin | Device Type |
|
||||
| ------- | ----------------------------- |
|
||||
|CPU | Intel® Xeon® with Intel® AVX2 and AVX512, Intel® Core™ Processors with Intel® AVX2, Intel® Atom® Processors with Intel® SSE |
|
||||
|GPU | Intel® Processor Graphics, including Intel® HD Graphics and Intel® Iris® Graphics |
|
||||
|MYRIAD | Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X |
|
||||
|GNA | Intel® Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel® Pentium® Silver J5005 Processor, Intel® Pentium® Silver N5000 Processor, Intel® Celeron® J4005 Processor, Intel® Celeron® J4105 Processor, Intel® Celeron® Processor N4100, Intel® Celeron® Processor N4000, Intel® Core™ i3-8121U Processor, Intel® Core™ i7-1065G7 Processor, Intel® Core™ i7-1060G7 Processor, Intel® Core™ i5-1035G4 Processor, Intel® Core™ i5-1035G7 Processor, Intel® Core™ i5-1035G1 Processor, Intel® Core™ i5-1030G7 Processor, Intel® Core™ i5-1030G4 Processor, Intel® Core™ i3-1005G1 Processor, Intel® Core™ i3-1000G1 Processor, Intel® Core™ i3-1000G4 Processor |
|
||||
|HETERO | Automatic splitting of a network inference between several devices (for example if a device doesn't support certain layers|
|
||||
|MULTI | Simultaneous inference of the same network on several devices in parallel|
|
||||
|
||||
* [Supported Devices](supported_plugins/Supported_Devices.md)
|
||||
* [GPU](supported_plugins/CL_DNN.md)
|
||||
* [CPU](supported_plugins/CPU.md)
|
||||
* [VPU](supported_plugins/VPU.md)
|
||||
* [MYRIAD](supported_plugins/MYRIAD.md)
|
||||
* [HDDL](supported_plugins/HDDL.md)
|
||||
* [Heterogeneous execution](supported_plugins/HETERO.md)
|
||||
* [GNA](supported_plugins/GNA.md)
|
||||
* [MULTI](supported_plugins/MULTI.md)
|
||||
The table below shows the plugin libraries and additional dependencies for Linux, Windows and macOS platforms.
|
||||
|
||||
* [Pre-Trained Models](@ref omz_models_intel_index)
|
||||
| Plugin | Library name for Linux | Dependency libraries for Linux | Library name for Windows | Dependency libraries for Windows | Library name for macOS | Dependency libraries for macOS |
|
||||
|--------|-----------------------------|-------------------------------------------------------------|--------------------------|--------------------------------------------------------------------------------------------------------|------------------------------|---------------------------------------------|
|
||||
| CPU | `libMKLDNNPlugin.so` | `libinference_engine_lp_transformations.so` | `MKLDNNPlugin.dll` | `inference_engine_lp_transformations.dll` | `libMKLDNNPlugin.so` | `inference_engine_lp_transformations.dylib` |
|
||||
| GPU | `libclDNNPlugin.so` | `libinference_engine_lp_transformations.so`, `libOpenCL.so` | `clDNNPlugin.dll` | `OpenCL.dll`, `inference_engine_lp_transformations.dll` | Is not supported | - |
|
||||
| MYRIAD | `libmyriadPlugin.so` | `libusb.so`, | `myriadPlugin.dll` | `usb.dll` | `libmyriadPlugin.so` | `libusb.dylib` |
|
||||
| HDDL | `libHDDLPlugin.so` | `libbsl.so`, `libhddlapi.so`, `libmvnc-hddl.so` | `HDDLPlugin.dll` | `bsl.dll`, `hddlapi.dll`, `json-c.dll`, `libcrypto-1_1-x64.dll`, `libssl-1_1-x64.dll`, `mvnc-hddl.dll` | Is not supported | - |
|
||||
| GNA | `libGNAPlugin.so` | `libgna.so`, | `GNAPlugin.dll` | `gna.dll` | Is not supported | - |
|
||||
| HETERO | `libHeteroPlugin.so` | Same as for selected plugins | `HeteroPlugin.dll` | Same as for selected plugins | `libHeteroPlugin.so` | Same as for selected plugins |
|
||||
| MULTI | `libMultiDevicePlugin.so` | Same as for selected plugins | `MultiDevicePlugin.dll` | Same as for selected plugins | `libMultiDevicePlugin.so` | Same as for selected plugins |
|
||||
|
||||
* [Known Issues](Known_Issues_Limitations.md)
|
||||
> **NOTE**: All plugin libraries also depend on core Inference Engine libraries.
|
||||
|
||||
**Typical Next Step:** [Introduction to Inference Engine](inference_engine_intro.md)
|
||||
Make sure those libraries are in your computer's path or in the place you pointed to in the plugin loader. Make sure each plugin's related dependencies are in the:
|
||||
|
||||
* Linux: `LD_LIBRARY_PATH`
|
||||
* Windows: `PATH`
|
||||
* macOS: `DYLD_LIBRARY_PATH`
|
||||
|
||||
On Linux and macOS, use the script `bin/setupvars.sh` to set the environment variables.
|
||||
|
||||
On Windows, run the `bin\setupvars.bat` batch file to set the environment variables.
|
||||
|
||||
To learn more about supported devices and corresponding plugins, see the [Supported Devices](supported_plugins/Supported_Devices.md) chapter.
|
||||
|
||||
## Common Workflow for Using the Inference Engine API
|
||||
|
||||
The common workflow contains the following steps:
|
||||
|
||||
1. **Create Inference Engine Core object** - Create an `InferenceEngine::Core` object to work with different devices, all device plugins are managed internally by the `Core` object. Register extensions with custom nGraph operations (`InferenceEngine::Core::AddExtension`).
|
||||
|
||||
2. **Read the Intermediate Representation** - Using the `InferenceEngine::Core` class, read an Intermediate Representation file into an object of the `InferenceEngine::CNNNetwork` class. This class represents the network in the host memory.
|
||||
|
||||
3. **Prepare inputs and outputs format** - After loading the network, specify input and output precision and the layout on the network. For these specification, use the `InferenceEngine::CNNNetwork::getInputsInfo()` and `InferenceEngine::CNNNetwork::getOutputsInfo()`.
|
||||
|
||||
4. Pass per device loading configurations specific to this device (`InferenceEngine::Core::SetConfig`), and register extensions to this device (`InferenceEngine::Core::AddExtension`).
|
||||
|
||||
5. **Compile and Load Network to device** - Use the `InferenceEngine::Core::LoadNetwork()` method with specific device (e.g. `CPU`, `GPU`, etc.) to compile and load the network on the device. Pass in the per-target load configuration for this compilation and load operation.
|
||||
|
||||
6. **Set input data** - With the network loaded, you have an `InferenceEngine::ExecutableNetwork` object. Use this object to create an `InferenceEngine::InferRequest` in which you signal the input buffers to use for input and output. Specify a device-allocated memory and copy it into the device memory directly, or tell the device to use your application memory to save a copy.
|
||||
|
||||
7. **Execute** - With the input and output memory now defined, choose your execution mode:
|
||||
|
||||
* Synchronously - `InferenceEngine::InferRequest::Infer()` method. Blocks until inference is completed.
|
||||
* Asynchronously - `InferenceEngine::InferRequest::StartAsync()` method. Check status with the `InferenceEngine::InferRequest::Wait()` method (0 timeout), wait, or specify a completion callback.
|
||||
|
||||
8. **Get the output** - After inference is completed, get the output memory or read the memory you provided earlier. Do this with the `InferenceEngine::IInferRequest::GetBlob()` method.
|
||||
|
||||
## Video: Inference Engine Concept
|
||||
[](https://www.youtube.com/watch?v=e6R13V8nbak)
|
||||
\htmlonly
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/e6R13V8nbak" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
\endhtmlonly
|
||||
|
||||
## Further Reading
|
||||
|
||||
For more details on the Inference Engine API, refer to the [Integrating Inference Engine in Your Application](Integrate_with_customer_application_new_API.md) documentation.
|
||||
|
||||
@@ -1,24 +1,24 @@
|
||||
# Custom nGraph Operation {#openvino_docs_IE_DG_Extensibility_DG_AddingNGraphOps}
|
||||
|
||||
Inference Engine Extension API allows to register operation sets (opsets) with custom nGraph operations, it allows to support Networks with unknown operations.
|
||||
Inference Engine Extension API enables you to register operation sets (opsets) with custom nGraph operations to support models with operations which OpenVINO™ does not support out-of-the-box.
|
||||
|
||||
## Operation Class
|
||||
|
||||
To add your custom nGraph operation, create a new class that extends `ngraph::Op`, which is in turn derived from `ngraph::Node`, the base class for all graph operations in nGraph. Follow the steps below:
|
||||
|
||||
1. Define a `NodeTypeInfo` object that identifies the type of the operation to the graph users and helps with dynamic type resolution. The type info of an nGraph operation currently consists of a string identifier and a version number, but this may change in the future.
|
||||
1. Add the `NGRAPH_RTTI_DECLARATION` and `NGRAPH_RTTI_DEFINITION` macros which define a `NodeTypeInfo` object that identifies the type of the operation to the graph users and helps with dynamic type resolution. The type info of an nGraph operation currently consists of a string identifier and a version number, but this may change in the future.
|
||||
|
||||
2. Implement constructors that can optionally take the operation inputs and attributes as parameters.
|
||||
2. Implement constructors that optionally take the operation inputs and attributes as parameters.
|
||||
|
||||
3. Override the shape inference method `validate_and_infer_types`. This method is called multiple times during graph manipulations to determine the shapes and element types of the outputs of the operations. You can access the input shapes through the `get_input_partial_shape()` method and input element types through the `get_input_element_type()` method of `ngraph::Node`. Set the inferred shape and element type of the output using `set_output_type`.
|
||||
3. Override the shape inference method `validate_and_infer_types`. This method is called multiple times during graph manipulations to determine the shapes and element types of the operations outputs. To access the input shapes and input element types, use the `get_input_partial_shape()` and `get_input_element_type()` methods of `ngraph::Node`. Set the inferred shape and element type of the output using `set_output_type`.
|
||||
|
||||
4. Override the `clone_with_new_inputs` method, which allows graph manipulation routines to create copies of this operation and connect it to different nodes during optimization.
|
||||
4. Override the `clone_with_new_inputs` method, which enables graph manipulation routines to create copies of this operation and connect it to different nodes during optimization.
|
||||
|
||||
5. Override the `visit_attributes` method, which allows serialization and deserialization of attributes. An `AttributeVisitor` is passed to the method, and the implementation is expected to walk over all the attributes in the op using the type-aware `on_attribute` helper. Helpers are already implemented for standard C++ types like `int64_t`, `float`, `bool`, `vector` and for existing nGraph defined types.
|
||||
5. Override the `visit_attributes` method, which enables serialization and deserialization of operation attributes. An `AttributeVisitor` is passed to the method, and the implementation is expected to walk over all the attributes in the op using the type-aware `on_attribute` helper. Helpers are already implemented for standard C++ types like `int64_t`, `float`, `bool`, `vector`, and for existing nGraph defined types.
|
||||
|
||||
6. Override `evaluate`, which is an optional method that enables the application of constant folding if there is a custom operation on the constant branch.
|
||||
|
||||
Based on that, declaration of a operation class can look as follows:
|
||||
Based on that, declaration of an operation class can look as follows:
|
||||
|
||||
@snippet template_extension/op.hpp op:header
|
||||
|
||||
@@ -26,36 +26,38 @@ Based on that, declaration of a operation class can look as follows:
|
||||
|
||||
The provided implementation has several fields:
|
||||
|
||||
* `add` of type `int64_t` is an attribute of custom operation
|
||||
* `type_info` of type `ngraph::NodeTypeInfo` defines the type and version of operation
|
||||
* `add` of type `int64_t` is an attribute of a custom operation.
|
||||
* `type_info` of type `ngraph::NodeTypeInfo` defines the type and version of an operation.
|
||||
|
||||
### Operation Constructors
|
||||
|
||||
nGraph operation contains two constructors: a default constructor, which allows to create operation without attributes and a constructor that creates and validates operation with specified inputs and attributes.
|
||||
nGraph operation contains two constructors:
|
||||
* Default constructor, which enables you to create an operation without attributes
|
||||
* Constructor that creates and validates an operation with specified inputs and attributes
|
||||
|
||||
@snippet template_extension/op.cpp op:ctor
|
||||
|
||||
### `validate_and_infer_types()`
|
||||
|
||||
`ngraph::Node::validate_and_infer_types` method validates operation attributes and calculates output shapes using attributes of operation.
|
||||
`ngraph::Node::validate_and_infer_types` method validates operation attributes and calculates output shapes using attributes of the operation.
|
||||
|
||||
@snippet template_extension/op.cpp op:validate
|
||||
|
||||
### `clone_with_new_inputs()`
|
||||
|
||||
`ngraph::Node::clone_with_new_inputs` method creates a copy of nGraph operation with new inputs.
|
||||
`ngraph::Node::clone_with_new_inputs` method creates a copy of the nGraph operation with new inputs.
|
||||
|
||||
@snippet template_extension/op.cpp op:copy
|
||||
|
||||
### `visit_attributes()`
|
||||
|
||||
`ngraph::Node::visit_attributes` method allows to visit all operation attributes.
|
||||
`ngraph::Node::visit_attributes` method enables you to visit all operation attributes.
|
||||
|
||||
@snippet template_extension/op.cpp op:visit_attributes
|
||||
|
||||
### `evaluate()`
|
||||
|
||||
`ngraph::Node::evaluate` method allows to apply constant folding to an operation.
|
||||
`ngraph::Node::evaluate` method enables you to apply constant folding to an operation.
|
||||
|
||||
@snippet template_extension/op.cpp op:evaluate
|
||||
|
||||
@@ -67,7 +69,7 @@ To add custom operations to the [Extension](Extension.md) class, create an opera
|
||||
|
||||
This method returns a map of opsets that exist in the extension library.
|
||||
|
||||
nGraph provides opsets mechanism for operation versioning. Different opsets distinguish between different versions of one operation.
|
||||
nGraph provides an opset mechanism to group operations into clusters. S. Different opsets distinguish between different versions of one operation.
|
||||
|
||||
When specifying opset names, follow the rules below:
|
||||
* Use unique opset names.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# How to Implement Custom CPU Operations {#openvino_docs_IE_DG_Extensibility_DG_CPU_Kernel}
|
||||
|
||||
The primary vehicle for the performance of the CPU codepath in the Inference Engine is the Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN), and new CPU kernels extend the Inference Engine plugin for the Intel MKL-DNN. Implementing the InferenceEngine::ILayerExecImpl defines a general CPU-side extension. There are no Intel MKL-DNN specifics in the way you need to implement a kernel.
|
||||
The primary means of the performance of the CPU codepath in the Inference Engine is the Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN), and new CPU kernels extend the Inference Engine plugin for the Intel MKL-DNN. Implementing the InferenceEngine::ILayerExecImpl defines a general CPU-side extension. There are no Intel MKL-DNN specifics in the way you need to implement a kernel.
|
||||
|
||||
## Implementation Class
|
||||
|
||||
@@ -13,20 +13,20 @@ Based on that, declaration of a kernel implementation class can look as follows:
|
||||
|
||||
The provided implementation has several fields:
|
||||
|
||||
* `add` of the type `int64_t` is an attribute of a custom operation
|
||||
* `inShape` of the type `ngraph::Shape` is an input shape
|
||||
* `outShape` of the type `ngraph::Shape` is an output shape
|
||||
* `error` of the type `std::string` is a field to handle errors from a constructor
|
||||
* `add` of the type `int64_t` is an attribute of a custom operation.
|
||||
* `inShape` of the type `ngraph::Shape` is an input shape.
|
||||
* `outShape` of the type `ngraph::Shape` is an output shape.
|
||||
* `error` of the type `std::string` is a field to handle errors from a constructor.
|
||||
|
||||
### Constructor of Implementation
|
||||
|
||||
An implementation constructor checks parameters of nGraph operation, stores needed attributes, and stores an error message in the case of an error.
|
||||
An implementation constructor checks parameters of an nGraph operation, stores required attributes, and stores an error message in the case of an error.
|
||||
|
||||
@snippet template_extension/cpu_kernel.cpp cpu_implementation:ctor
|
||||
|
||||
### `getSupportedConfigurations`
|
||||
|
||||
InferenceEngine::ILayerExecImpl::getSupportedConfigurations method returns all supported configuration formats (input/output tensor layouts) for your implementation. To specify formats of data, use InferenceEngine::TensorDesc. Refer to the [Memory Primitives](../Memory_primitives.md) section for instructions on how to do it.
|
||||
InferenceEngine::ILayerExecImpl::getSupportedConfigurations method returns all supported configuration formats (input/output tensor layouts) for your implementation. To specify formats of data, use InferenceEngine::TensorDesc. Refer to the [Memory Primitives](../Memory_primitives.md) section for instructions.
|
||||
|
||||
@snippet template_extension/cpu_kernel.cpp cpu_implementation:getSupportedConfigurations
|
||||
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
# Custom ONNX operators {#openvino_docs_IE_DG_Extensibility_DG_Custom_ONNX_Ops}
|
||||
# Custom ONNX* Operators {#openvino_docs_IE_DG_Extensibility_DG_Custom_ONNX_Ops}
|
||||
|
||||
ONNX importer provides mechanism to register custom ONNX operators based on predefined or user-defined nGraph operations.
|
||||
The ONNX\* importer provides a mechanism to register custom ONNX operators based on predefined or custom nGraph operations.
|
||||
The function responsible for registering a new operator is called `ngraph::onnx_import::register_operator` and is defined in `onnx_import/onnx_utils.hpp`.
|
||||
|
||||
## Registering custom ONNX operator based on predefined nGraph operations
|
||||
## Register Custom ONNX Operator Based on Predefined nGraph Operations
|
||||
|
||||
The steps below explain how to register a custom ONNX operator, for example, CustomRelu, in a domain called com.example.
|
||||
The steps below explain how to register a custom ONNX operator, for example, CustomRelu, in a domain called `com.example`.
|
||||
CustomRelu is defined as follows:
|
||||
```
|
||||
x >= 0 => f(x) = x * alpha
|
||||
x < 0 => f(x) = x * beta
|
||||
x < 0 => f(x) = x * beta
|
||||
```
|
||||
where alpha, beta are float constants.
|
||||
where `alpha` and `beta` are float constants.
|
||||
|
||||
1. Include headers:
|
||||
@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:headers
|
||||
@@ -20,38 +20,40 @@ where alpha, beta are float constants.
|
||||
@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:register_operator
|
||||
The `register_operator` function takes four arguments: op_type, opset version, domain, and a function object.
|
||||
The function object is a user-defined function that takes `ngraph::onnx_import::Node` as an input and based on that, returns a graph with nGraph operations.
|
||||
The `ngraph::onnx_import::Node` class represents a node in ONNX model. It provides functions to fetch input node(s) (`get_ng_inputs`), fetch attribute value (`get_attribute_value`) and many more (please refer to `onnx_import/core/node.hpp` for full class declaration).
|
||||
New operator registration must happen before the ONNX model is read, for example, if an ONNX model uses the 'CustomRelu' operator, `register_operator("CustomRelu", ...)` must be called before InferenceEngine::Core::ReadNetwork.
|
||||
Re-registering ONNX operators within the same process is supported. During registration of the existing operator, a warning is printed.
|
||||
The `ngraph::onnx_import::Node` class represents a node in an ONNX model. It provides functions to fetch input node(s) using `get_ng_inputs`, attribute value using `get_attribute_value`, and many more. See `onnx_import/core/node.hpp` for full class declaration.
|
||||
|
||||
The example below demonstrates an exemplary model that requires previously created 'CustomRelu' operator:
|
||||
New operator registration must happen before an ONNX model is read. For example, if an model uses the `CustomRelu` operator, call `register_operator("CustomRelu", ...)` before InferenceEngine::Core::ReadNetwork.
|
||||
Reregistering ONNX operators within the same process is supported. If you register an existing operator, you get a warning.
|
||||
|
||||
The example below demonstrates an exemplary model that requires a previously created `CustomRelu` operator:
|
||||
@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:model
|
||||
|
||||
|
||||
For a reference on how to create a graph with nGraph operations, visit [Custom nGraph Operations](AddingNGraphOps.md).
|
||||
For a complete list of predefined nGraph operators, visit [available operations sets](../../ops/opset.md).
|
||||
To create a graph with nGraph operations, visit [Custom nGraph Operations](AddingNGraphOps.md).
|
||||
For a complete list of predefined nGraph operators, visit [Available Operations Sets](../../ops/opset.md).
|
||||
|
||||
If operator is no longer needed, it can be unregistered by calling `unregister_operator`. The function takes three arguments `op_type`, `version`, and `domain`.
|
||||
If you do not need an operator anymore, unregister it by calling `unregister_operator`. The function takes three arguments: `op_type`, `version`, and `domain`.
|
||||
@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:unregister_operator
|
||||
|
||||
## Registering custom ONNX operator based on custom nGraph operations
|
||||
## Register Custom ONNX Operator Based on Custom nGraph Operations
|
||||
|
||||
The same principles apply when registering custom ONNX operator based on custom nGraph operations.
|
||||
This example shows how to register custom ONNX operator based on `Operation` presented in [this tutorial](AddingNGraphOps.md), which is used in [TemplateExtension](Extension.md).
|
||||
The same principles apply when registering a custom ONNX operator based on custom nGraph operations.
|
||||
This example shows how to register a custom ONNX operator based on `Operation` presented in [this tutorial](AddingNGraphOps.md), which is used in [TemplateExtension](Extension.md).
|
||||
@snippet template_extension/extension.cpp extension:ctor
|
||||
|
||||
Here, the `register_operator` function is called in Extension's constructor, which makes sure that it is called before InferenceEngine::Core::ReadNetwork (since InferenceEngine::Core::AddExtension must be called before a model with custom operator is read).
|
||||
Here, the `register_operator` function is called in the constructor of Extension. The constructor makes sure that the function is called before InferenceEngine::Core::ReadNetwork, because InferenceEngine::Core::AddExtension must be called before a model with a custom operator is read.
|
||||
|
||||
The example below demonstrates how to unregister operator from Extension's destructor:
|
||||
The example below demonstrates how to unregister an operator from the destructor of Extension:
|
||||
@snippet template_extension/extension.cpp extension:dtor
|
||||
Note that it is mandatory to unregister custom ONNX operator if it is defined in dynamic shared library.
|
||||
|
||||
## Requirements for building with CMake
|
||||
> **NOTE**: It is mandatory to unregister a custom ONNX operator if it is defined in a dynamic shared library.
|
||||
|
||||
Program that uses the `register_operator` functionality, requires (in addition to Inference Engine) `ngraph` and `onnx_importer` libraries.
|
||||
The `onnx_importer` is a component of `ngraph` package , so `find_package(ngraph REQUIRED COMPONENTS onnx_importer)` is sufficient to find both.
|
||||
The `ngraph` package exposes two variables (`${NGRAPH_LIBRARIES}` and `${ONNX_IMPORTER_LIBRARIES}`), which reference `ngraph` and `onnx_importer` libraries.
|
||||
## Requirements for Building with CMake
|
||||
|
||||
A program that uses the `register_operator` functionality requires `ngraph` and `onnx_importer` libraries in addition to the Inference Engine.
|
||||
The `onnx_importer` is a component of the `ngraph` package , so `find_package(ngraph REQUIRED COMPONENTS onnx_importer)` can find both.
|
||||
The `ngraph` package exposes two variables, `${NGRAPH_LIBRARIES}` and `${ONNX_IMPORTER_LIBRARIES}`, which reference the `ngraph` and `onnx_importer` libraries.
|
||||
Those variables need to be passed to the `target_link_libraries` command in the CMakeLists.txt file.
|
||||
|
||||
See below CMakeLists.txt for reference:
|
||||
See CMakeLists.txt below for reference:
|
||||
@snippet onnx_custom_op/CMakeLists.txt cmake:onnx_custom_op
|
||||
|
||||
@@ -1,29 +1,29 @@
|
||||
# Extension Library {#openvino_docs_IE_DG_Extensibility_DG_Extension}
|
||||
|
||||
Inference Engine provides an InferenceEngine::IExtension interface, which defines the interface for Inference Engine Extension libraries.
|
||||
All extension libraries should be inherited from this interface. The example below contains implementation of two operations: `Template`
|
||||
Inherit all extension libraries from this interface. The example below contains an implementation of two operations: `Template`
|
||||
used as an example in this document and `FFT` used as a more complex example from the [Custom Operations Guide](../../HOWTO/Custom_Layers_Guide.md).
|
||||
|
||||
> **NOTE**: `FFT` operation is implemented using OpenCV library functions `cv::dft` and `cv::idft`.
|
||||
> **NOTE**: `FFT` operation is implemented using the OpenCV library functions `cv::dft` and `cv::idft`.
|
||||
|
||||
Based on that, declaration of an extension class can look as follows:
|
||||
Based on that, the declaration of an extension class can look as follows:
|
||||
|
||||
@snippet template_extension/extension.hpp extension:header
|
||||
|
||||
The extension library should contain and export the method InferenceEngine::CreateExtension, which creates an `Extension` class:
|
||||
The extension library should contain and export the InferenceEngine::CreateExtension method, which creates an `Extension` class:
|
||||
|
||||
@snippet template_extension/extension.cpp extension:CreateExtension
|
||||
|
||||
Also, an `Extension` object should implement the following methods:
|
||||
|
||||
* InferenceEngine::IExtension::Release deletes an extension object
|
||||
* InferenceEngine::IExtension::Release deletes an extension object.
|
||||
|
||||
* InferenceEngine::IExtension::GetVersion returns information about version of the library
|
||||
* InferenceEngine::IExtension::GetVersion returns information about the version of the library.
|
||||
|
||||
@snippet template_extension/extension.cpp extension:GetVersion
|
||||
|
||||
Implement the InferenceEngine::IExtension::getOpSets method if the extension contains custom layers.
|
||||
Read the [guide about custom operations](AddingNGraphOps.md) for more information.
|
||||
Implement the InferenceEngine::IExtension::getOpSets method if the extension contains custom layers.
|
||||
Read [Custom nGraph Operation](AddingNGraphOps.md) for more information.
|
||||
|
||||
To understand how integrate execution kernels to the extension library, read the [guide about development of custom CPU kernels](CPU_Kernel.md).
|
||||
To understand how to register custom ONNX operator to the extension library, read the [guide about custom ONNX operators](Custom_ONNX_Ops.md).
|
||||
To integrate execution kernels to the extension library, read [How to Implement Custom CPU Operations](CPU_Kernel.md).
|
||||
To register a custom ONNX\* operator to the extension library, read [Custom ONNX Operators](Custom_ONNX_Ops.md).
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
# How to Implement Custom GPU Operations {#openvino_docs_IE_DG_Extensibility_DG_GPU_Kernel}
|
||||
|
||||
The GPU codepath abstracts many details about OpenCL™. You need to provide the kernel code in OpenCL C and the configuration file that connects the kernel and its parameters to the parameters of the operation.
|
||||
The GPU codepath abstracts many details about OpenCL\*. You need to provide the kernel code in OpenCL C and the configuration file that connects the kernel and its parameters to the parameters of the operation.
|
||||
|
||||
There are two options of using custom operation configuration file:
|
||||
There are two options of using the custom operation configuration file:
|
||||
|
||||
* Include a section with your kernels into the global automatically-loaded `cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml` file, which is hosted in the `<INSTALL_DIR>/deployment_tools/inference_engine/bin/intel64/{Debug/Release}` folder
|
||||
* Call the `InferenceEngine::Core::SetConfig()` method from your application with the `InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE` key and the configuration file name as a value before loading the network that uses custom operations to the plugin:
|
||||
|
||||
@snippet snippets/GPU_Kernel.cpp part0
|
||||
|
||||
All Inference Engine samples, except trivial `hello_classification`,
|
||||
All Inference Engine samples, except the trivial `hello_classification`,
|
||||
feature a dedicated command-line option `-c` to load custom kernels. For example, to load custom operations for the classification sample, run the command below:
|
||||
```sh
|
||||
$ ./classification_sample -m <path_to_model>/bvlc_alexnet_fp16.xml -i ./validation_set/daily/227x227/apron.bmp -d GPU
|
||||
@@ -25,12 +25,12 @@ The definitions described in the sections below use the following notations:
|
||||
|
||||
Notation | Description
|
||||
---|---
|
||||
(0/1) | Can have 0 or 1 instances of this node/attribute
|
||||
(1) | Must have only 1 instance of this node/attribute
|
||||
(0+) | Can have any number of instances of this node/attribute
|
||||
(1+) | Can have 1 or more instances of this node/attribute
|
||||
(0/1) | Can have zero or one instance of this node or attribute
|
||||
(1) | Must have only one instance of this node or attribute
|
||||
(0+) | Can have any number of instances of this node or attribute
|
||||
(1+) | Can have one or more instances of this node or attribute
|
||||
|
||||
### CustomLayer Node and Sub-node Structure
|
||||
### CustomLayer Node and Sub-Node Structure
|
||||
|
||||
`CustomLayer` node contains the entire configuration for a single custom operation.
|
||||
|
||||
@@ -43,60 +43,60 @@ Notation | Description
|
||||
**Sub-nodes**: `Kernel` (1), `Buffers` (1), `CompilerOptions` (0+),
|
||||
`WorkSizes` (0/1)
|
||||
|
||||
### Kernel Node and Sub-node Structure
|
||||
### Kernel Node and Sub-Node Structure
|
||||
|
||||
`Kernel` node contains all kernel source code configuration. No kernel
|
||||
node structure exists.
|
||||
|
||||
**Sub-nodes**: `Source` (1+), `Define` (0+)
|
||||
|
||||
### Source Node and Sub-node Structure
|
||||
### Source Node and Sub-Node Structure
|
||||
|
||||
`Source` node points to a single OpenCL source file.
|
||||
|
||||
| Attribute Name | \# ||
|
||||
| Attribute Name | \# |Description|
|
||||
|-----|-----|-----|
|
||||
| `filename` | (1) | Name of the file containing OpenCL source code. Notice that path is relative to your executable. Multiple source nodes will have their sources concatenated in order. |
|
||||
| `filename` | (1) | Name of the file containing OpenCL source code. Note that the path is relative to your executable. Multiple source nodes will have their sources concatenated in order. |
|
||||
|
||||
**Sub-nodes**: None
|
||||
|
||||
### Define Node and Sub-node Structure
|
||||
### Define Node and Sub-Node Structure
|
||||
|
||||
`Define` node configures a single `#‍define` instruction to be added to
|
||||
the sources during compilation (JIT).
|
||||
|
||||
| Attribute Name | \# | Description |
|
||||
|------|-------|------|
|
||||
| `name` | (1) | The name of the defined JIT. For static constants, this can include the value as well (taken as a string). |
|
||||
| `name` | (1) | The name of the defined JIT. For static constants, this can include the value as well, which is taken as a string. |
|
||||
| `param` | (0/1) | This parameter value is used as the value of this JIT definition. |
|
||||
| `type` | (0/1) | The parameter type. Accepted values: `int`, `float`, and `int[]`, `float[]` for arrays. |
|
||||
| `default` | (0/1) | The default value to be used if the specified parameters is missing from the operation in the IR. |
|
||||
| `default` | (0/1) | The default value to be used if the specified parameters are missing from the operation in the IR. |
|
||||
|
||||
**Sub-nodes:** None
|
||||
|
||||
The resulting JIT has the following form:
|
||||
`#‍define [name] [type] [value/default]`.
|
||||
|
||||
### Buffers Node and Sub-node Structure
|
||||
### Buffers Node and Sub-Node Structure
|
||||
|
||||
`Buffers` node configures all input/output buffers for the OpenCL entry
|
||||
function. No buffers node structure exists.
|
||||
|
||||
**Sub-nodes:** `Data` (0+), `Tensor` (1+)
|
||||
|
||||
### Data Node and Sub-node Structure
|
||||
### Data Node and Sub-Node Structure
|
||||
|
||||
`Data` node configures a single input with static data (for example,
|
||||
weights or biases).
|
||||
`Data` node configures a single input with static data, for example,
|
||||
weights or biases.
|
||||
|
||||
| Attribute Name | \# | Description |
|
||||
|----|-----|------|
|
||||
| `name` | (1) | Name of a blob attached to a operation in the IR |
|
||||
| `name` | (1) | Name of a blob attached to an operation in the IR |
|
||||
| `arg-index` | (1) | 0-based index in the entry function arguments to be bound to |
|
||||
|
||||
**Sub-nodes**: None
|
||||
|
||||
### Tensor Node and Sub-node Structure
|
||||
### Tensor Node and Sub-Node Structure
|
||||
|
||||
`Tensor` node configures a single input or output tensor.
|
||||
|
||||
@@ -105,9 +105,9 @@ weights or biases).
|
||||
| `arg-index` | (1) | 0-based index in the entry function arguments to be bound to. |
|
||||
| `type` | (1) | `input` or `output` |
|
||||
| `port-index` | (1) | 0-based index in the operation input/output ports in the IR |
|
||||
| `format` | (0/1) | Data layout declaration for the tensor. Accepted values: `BFYX`, `BYXF`, `YXFB`, `FYXB` (also in all lowercase). Default value: `BFYX` |
|
||||
| `format` | (0/1) | Data layout declaration for the tensor. Accepted values: `BFYX`, `BYXF`, `YXFB`, `FYXB`, and same values in all lowercase. Default value: `BFYX` |
|
||||
|
||||
### CompilerOptions Node and Sub-node Structure
|
||||
### CompilerOptions Node and Sub-Node Structure
|
||||
|
||||
`CompilerOptions` node configures the compilation flags for the OpenCL
|
||||
sources.
|
||||
@@ -118,22 +118,22 @@ sources.
|
||||
|
||||
**Sub-nodes**: None
|
||||
|
||||
### WorkSizes Node and Sub-node Structure
|
||||
### WorkSizes Node and Sub-Node Structure
|
||||
|
||||
`WorkSizes` node configures the global/local work sizes to be used when
|
||||
queuing the OpenCL program for execution.
|
||||
queuing an OpenCL program for execution.
|
||||
|
||||
| Attribute Name | \# | Description |
|
||||
|-----|------|-----|
|
||||
| `global`<br>`local` | (0/1)<br>(0/1) | An array of up to 3 integers (or formulas) for defining the OpenCL work-sizes to be used during execution.<br> The formulas can use the values of the B,F,Y,X dimensions and contain the operators: +,-,/,\*,% (all evaluated in integer arithmetic). <br>Default value: `global=”B*F*Y*X” local=””` |
|
||||
| `dim` | (0/1) | A tensor to take the work size from. Accepted values: `input N`, `output`, where `N` is an index of input tensor starting with 0. Default value: `output` |
|
||||
| `global`<br>`local` | (0/1)<br>(0/1) | An array of up to three integers or formulas for defining OpenCL work-sizes to be used during execution.<br> The formulas can use the values of the B,F,Y,X dimensions and contain the operators: +,-,/,\*,%. All operators are evaluated in integer arithmetic. <br>Default value: `global=”B*F*Y*X” local=””` |
|
||||
| `dim` | (0/1) | A tensor to take the work-size from. Accepted values: `input N`, `output`, where `N` is an index of input tensor starting with 0. Default value: `output` |
|
||||
|
||||
**Sub-nodes**: None
|
||||
|
||||
## Example Configuration File
|
||||
|
||||
The following code sample provides an example configuration file (in the
|
||||
`.xml` format). For information on configuration file structure, see
|
||||
The following code sample provides an example configuration file in the
|
||||
`.xml` format. For information on the configuration file structure, see
|
||||
[Configuration File Format](#config-file-format).
|
||||
```xml
|
||||
<CustomLayer name="ReLU" type="SimpleGPU" version="1">
|
||||
@@ -150,10 +150,10 @@ The following code sample provides an example configuration file (in the
|
||||
</CustomLayer>
|
||||
```
|
||||
|
||||
## Built-In Defines for Custom Layers
|
||||
## Built-In Definitions for Custom Layers
|
||||
|
||||
The following table includes definitions that are attached before
|
||||
the user sources, where `<TENSOR>` is the actual input and output, for
|
||||
user sources, where `<TENSOR>` is the actual input and output, for
|
||||
example, `INPUT0` or `OUTPUT0`.
|
||||
|
||||
For an example, see [Example Kernel](#example-kernel).
|
||||
@@ -175,10 +175,10 @@ For an example, see [Example Kernel](#example-kernel).
|
||||
| `<TENSOR>_UPPER_PADDING_SIZE` | The size of the `<TENSOR>_UPPER_PADDING` array |
|
||||
| `<TENSOR>_PITCHES` | The number of elements between adjacent elements in each dimension. Always ordered as BFYX.|
|
||||
| `<TENSOR>_PITCHES_SIZE`| The size of the `<TENSOR>_PITCHES` array |
|
||||
| `<TENSOR>_OFFSET`| The number of elements from the start of the tensor to the first valid element (bypassing the lower padding) |
|
||||
| `<TENSOR>_OFFSET`| The number of elements from the start of the tensor to the first valid element, bypassing the lower padding. |
|
||||
All `<TENSOR>` values are automatically defined for every tensor
|
||||
bound to this operation (`INPUT0`, `INPUT1`, `OUTPUT0`, and so on), as shown
|
||||
in the following for example:
|
||||
bound to this operation, such as `INPUT0`, `INPUT1`, and `OUTPUT0`, as shown
|
||||
in the following example:
|
||||
|
||||
```sh
|
||||
#define INPUT0_DIMS_SIZE 4
|
||||
@@ -208,7 +208,7 @@ __kernel void example_relu_kernel(
|
||||
}
|
||||
```
|
||||
|
||||
> **NOTE:** As described in the previous section, all the things like
|
||||
> **NOTE:** As described in the previous section, all things like
|
||||
> `INPUT0_TYPE` are actually defined as OpenCL (pre-)compiler inputs by
|
||||
> the Inference Engine for efficiency reasons. See [Debugging
|
||||
> Tips](#debugging-tips) for information on debugging the results.
|
||||
|
||||
@@ -1,25 +1,25 @@
|
||||
# Inference Engine Extensibility Mechanism {#openvino_docs_IE_DG_Extensibility_DG_Intro}
|
||||
|
||||
Inference Engine Extensibility API allows to add support of custom operations to the Inference Engine.
|
||||
Inference Engine Extensibility API enables you to add support of custom operations to the Inference Engine.
|
||||
Extension should contain operation sets with custom operations and execution kernels for custom operations.
|
||||
Physically, an extension library can be represented as a dynamic library exporting the single `CreateExtension` function
|
||||
that allows to create a new extension instance.
|
||||
that creates a new extension instance.
|
||||
|
||||
Extensibility library can be loaded to the `InferenceEngine::Core` object using the
|
||||
To load the Extensibility library to the `InferenceEngine::Core` object, use the
|
||||
`InferenceEngine::Core::AddExtension` method.
|
||||
|
||||
## Inference Engine Extension Library
|
||||
|
||||
Inference Engine Extension dynamic library contains several components:
|
||||
Inference Engine Extension dynamic library contains the following components:
|
||||
|
||||
* [Extension Library](Extension.md):
|
||||
- Contains custom operation sets
|
||||
- Provides CPU implementations for custom operations
|
||||
- Contains custom operation sets.
|
||||
- Provides CPU implementations for custom operations.
|
||||
* [Custom nGraph Operation](AddingNGraphOps.md):
|
||||
- Allows to use `InferenceEngine::Core::ReadNetwork` to read Intermediate Representation (IR) with unsupported
|
||||
operations
|
||||
- Allows to create `ngraph::Function` with unsupported operations
|
||||
- Provides shape inference mechanism for custom operations
|
||||
- Enables the use of `InferenceEngine::Core::ReadNetwork` to read Intermediate Representation (IR) with unsupported
|
||||
operations.
|
||||
- Enables the creation of `ngraph::Function` with unsupported operations.
|
||||
- Provides a shape inference mechanism for custom operations.
|
||||
|
||||
> **NOTE**: This documentation is written based on the `Template extension`, which demonstrates extension
|
||||
development details. Find the complete code of the `Template extension`, which is fully compilable and up-to-date,
|
||||
@@ -43,10 +43,8 @@ The following pages describe how to integrate custom _kernels_ into the Inferenc
|
||||
* [Introduction to development of custom GPU kernels](GPU_Kernel.md)
|
||||
* [Introduction to development of custom VPU kernels](VPU_Kernel.md)
|
||||
|
||||
## Additional Resources
|
||||
## See Also
|
||||
|
||||
* [Build an extension library using CMake*](Building.md)
|
||||
|
||||
## See Also
|
||||
* [Using Inference Engine Samples](../Samples_Overview.md)
|
||||
* [Hello Shape Infer SSD sample](../../../inference-engine/samples/hello_reshape_ssd/README.md)
|
||||
|
||||
@@ -1,24 +1,24 @@
|
||||
# How to Implement Custom Layers for VPU (Intel® Neural Compute Stick 2) {#openvino_docs_IE_DG_Extensibility_DG_VPU_Kernel}
|
||||
|
||||
> **NOTE:** OpenCL™ custom layer support is available in the preview mode.
|
||||
> **NOTES:**
|
||||
> * OpenCL\* custom layer support is available in the preview mode.
|
||||
> * This section assumes you are familiar with developing kernels using OpenCL.
|
||||
|
||||
> **NOTE:** This section assumes you are familiar with developing kernels using OpenCL™.
|
||||
To customize your topology with an OpenCL layer, follow the steps below:
|
||||
|
||||
To customize your topology with an OpenCL™ layer, follow the steps below:
|
||||
1. Write and compile your OpenCL code with the standalone offline OpenCL compiler (`clc`).
|
||||
2. Write a configuration file to bind the OpenCL kernel to the topology file (`.xml`) of the model IR.
|
||||
3. Pass the configuration file to the Inference Engine with the model IR.
|
||||
|
||||
1. Write and compile you OpenCL™ code with the standalone offline OpenCL™ compiler (`clc`).
|
||||
2. Write a configuration file to bind the OpenCL™ kernel to the topology file (`.xml`) of the model IR.
|
||||
3. Pass the configuration file to Inference engine with the model IR.
|
||||
|
||||
## Compile OpenCL™ code for VPU (Intel® Neural Compute Stick 2)
|
||||
## Compile OpenCL code for VPU (Intel® Neural Compute Stick 2)
|
||||
|
||||
> **NOTE:** OpenCL compiler, targeting Intel® Neural Compute Stick 2 for the SHAVE* processor only, is redistributed with OpenVINO.
|
||||
OpenCL support is provided by ComputeAorta*, and is distributed under a license agreement between Intel® and Codeplay* Software Ltd.
|
||||
|
||||
The OpenCL™ toolchain for the Intel® Neural Compute Stick 2 supports offline compilation only, so first compile OpenCL C code using the standalone `clc` compiler. You can find the compiler binary at `<INSTALL_DIR>/deployment_tools/tools/cl_compiler`.
|
||||
The OpenCL toolchain for the Intel® Neural Compute Stick 2 supports offline compilation only, so first compile OpenCL C code using the standalone `clc` compiler. You can find the compiler binary at `<INSTALL_DIR>/deployment_tools/tools/cl_compiler`.
|
||||
|
||||
> **NOTE:** By design, custom OpenCL layers support any OpenCL kernels written with 1.2 version assumed. It also supports half float
|
||||
extension and is optimized for this type, because it is a native type for Intel® Movidius™ VPUs.
|
||||
extension and is optimized for this type, because it is a native type for Intel® Movidius™ VPUs.
|
||||
|
||||
1. Prior to running a compilation, make sure that the following variables are set:
|
||||
* `SHAVE_MA2X8XLIBS_DIR=<INSTALL_DIR>/deployment_tools/tools/cl_compiler/lib/`
|
||||
@@ -57,25 +57,25 @@ Configuration file for this kernel might be the following:
|
||||
```
|
||||
Each custom layer is described with the `CustomLayer` node. It has the following nodes and attributes:
|
||||
- Root node `CustomLayer` contains the following attributes:
|
||||
- `name` – (Required) A name of the Inference Engine layer to bind the kernel with.
|
||||
- `name` – (Required) The name of the Inference Engine layer to bind the kernel with.
|
||||
- `type` and `version` – (Required) Reserved for future use. Set them to `MVCL` and `1` respectively.
|
||||
- `max-shaves` – (Optional) The maximum number of SHAVE cores that should be dedicated for the layer. It is useful for debugging concurrency issues or for resource saving if memory bound kernel does not scale well with the number of cores, so more resources can be left for the rest of a topology.
|
||||
- `max-shaves` – (Optional) The maximum number of SHAVE cores that should be dedicated for the layer. It is useful for debugging concurrency issues or for resource saving that memory bound kernel does not scale well with the number of cores, so more resources can be left for the rest of a topology.
|
||||
- Sub-node `Kernel` must contain the following attributes:
|
||||
- `entry` – A name of your kernel function as you defined it in a source file (in the example above, it is `reorg_nhwc`).
|
||||
- `entry` – The name of your kernel function as you defined it in a source file. In the example above, it is `reorg_nhwc`.
|
||||
- Node `Source` must contain the following attributes:
|
||||
- `filename` – A path to a compiled binary relative to the `.xml` binding file.
|
||||
- `filename` – The path to a compiled binary relative to the `.xml` binding file.
|
||||
- Sub-node `Parameters` – Describes parameters bindings. For more information, see the description below.
|
||||
- Sub-node `WorkSizes` – Describes local and global work group sizes and the source for dimension deduction as a pair `direction,port`. In the example above, the work group is described relatively to the dimension of the input tensor that comes through port 0 in the IR. `global` and `local` work group configurations support any simple math expressions with +,-,\*,/, and () from `B`(batch), `Y`(height), `X`(width) and `F`(channels).
|
||||
- Sub-node `Where` – Allows to customize bindings with the `key="value"` attribute. For example, to substitute only 3x3 convolutions, write `<Where kernel="3,3"/>` in the binging xml.
|
||||
- Sub-node `Where` – Allows to customize bindings with the `key="value"` attribute. For example, to substitute only 3x3 convolutions, write `<Where kernel="3,3"/>` in the binding xml.
|
||||
|
||||
Parameter description supports `Tensor` of one of tensor types such as `input`, `output`, `input_buffer`, `output_buffer` or `data`, `Scalar`, or `Data` nodes and has the following format:
|
||||
- Each `Tensor` node of `input` or `output` type must contain the following attributes:
|
||||
- `arg-name` – A name of a kernel parameter in the kernel signature.
|
||||
- `arg-name` – The name of a kernel parameter in the kernel signature.
|
||||
- `type` – Node type: `input` or `output` as in the IR.
|
||||
- `port-index` – A number of input/output ports as in the IR.
|
||||
- `format` – The channel order in the tensor. Optional conversion layers are generated if the custom layer format is not compatible with formats of neighboring layers. `BFXY`, `BYXF`, and `ANY` formats are supported currently.
|
||||
- Each `Tensor` node of `input_buffer` or `output_buffer` type must contain the following attributes:
|
||||
- `arg-name` – A name of a kernel parameter in the kernel signature.
|
||||
- `arg-name` – The name of a kernel parameter in the kernel signature.
|
||||
- `type` – Node type: `input_buffer` or `output_buffer`. Use the appropriate type to bind multiple kernels that correspond to different stages of the same layer.
|
||||
- `port-index` – The unique identifier to bind by.
|
||||
- `dim` – The dim source with the same `direction,port` format used for `WorkSizes` bindings.
|
||||
@@ -110,7 +110,7 @@ Each custom layer is described with the `CustomLayer` node. It has the following
|
||||
</CustomLayer>
|
||||
```
|
||||
- Each `Tensor` node that has the type `data` must contain the following attributes:
|
||||
- `source` – A name of the blob as it is in the IR (typical example is `weights` for convolution
|
||||
- `source` – A name of the blob as it is in the IR. Typical example is `weights` for convolution.
|
||||
- `format` – Specifies the channel order in the tensor. Optional conversion layers are generated if the custom layer format is not.
|
||||
```xml
|
||||
<CustomLayer name="BinaryConvolution" type="MVCL" version="1">
|
||||
@@ -127,15 +127,15 @@ Each custom layer is described with the `CustomLayer` node. It has the following
|
||||
</CustomLayer>
|
||||
```
|
||||
- Each `Scalar` node must contain the following attributes:
|
||||
- `arg-name` – A name of a kernel parameter in the kernel signature.
|
||||
- `arg-name` – The name of a kernel parameter in the kernel signature.
|
||||
- `type` – `int` or `float` value. It is used for correct argument extraction from IR parameters.
|
||||
- `source` – Contains the name of the parameter in the IR file or input/output (`I`/`O`, `In`/`On`, where `n` is a port number)
|
||||
followed by dimension `B`(batch), `Y`(height), `X`(width), or `F`(channels).
|
||||
|
||||
- Each `Data` node must contain the following attributes:
|
||||
- `arg-name` – A name of a kernel parameter in the kernel signature.
|
||||
- `type` – Node type. Currently, `local_data` is the only supported value, which defines buffer allocated in fast local on-chip memory. It is limited to 100K for all `__local` and
|
||||
`__private` arrays defined inside the kernel as well as all `__local` parameters passed to the kernel. Please, consider that a manual-DMA extension requires double buffering.
|
||||
- `arg-name` – The name of a kernel parameter in the kernel signature.
|
||||
- `type` – Node type. Currently, `local_data` is the only supported value, which defines buffer allocated in fast local on-chip memory. It is limited to 100KB for all `__local` and
|
||||
`__private` arrays defined inside the kernel as well as all `__local` parameters passed to the kernel. Note that a manual-DMA extension requires double buffering.
|
||||
If the custom layer is detected to run out of local memory, the inference fails.
|
||||
- `dim` – The dim source with the same `direction,port` format used for `WorkSizes` bindings.
|
||||
- `size` – Amount of bytes needed. The current expression syntax supports only expression over dimensions of over selected input/output tensor or constants and may be extended in the future.
|
||||
@@ -177,7 +177,7 @@ config["VPU_CUSTOM_LAYERS"] = "/path/to/your/customLayers.xml";
|
||||
auto exeNetwork = core.LoadNetwork(cnnNetwork, "MYRIAD", networkConfig);
|
||||
```
|
||||
|
||||
## Optimizing Kernels with OpenCL™ for VPU (Intel® Neural Compute Stick 2)
|
||||
## Optimizing Kernels with OpenCL for VPU (Intel® Neural Compute Stick 2)
|
||||
|
||||
This section provides optimization guidelines on writing custom layers with OpenCL for VPU devices. Knowledge about general OpenCL
|
||||
programming model and OpenCL kernel language is assumed and not a subject of this section. The OpenCL model mapping to VPU is described in the table below.
|
||||
@@ -196,9 +196,9 @@ work grid among available compute resources and executes them in an arbitrary or
|
||||
|
||||
1. Split work evenly across work groups.
|
||||
2. Adjust work group granularity to maintain equal workload for all compute codes.
|
||||
3. Set the maximum number of cores (using the `max-shaves` attribute for the `CustomLayer` node). This keeps more resources for the rest of topology. It is also useful if the kernel scalability reached its limits, which may happen while optimizing memory bound kernels or kernels with poor parallelization.
|
||||
3. Set the maximum number of cores using the `max-shaves` attribute for the `CustomLayer` node. This keeps more resources for the rest of topology. It is also useful if the kernel scalability reached its limits, which may happen while optimizing memory bound kernels or kernels with poor parallelization.
|
||||
4. Try an alternate data layout (`BFXY`/`BYXF`) for the kernel if it improves work group partitioning or data access patterns.
|
||||
Consider full topology performance (not just specific layer boost) since data conversion layers would be automatically inserted
|
||||
Consider not just specific layer boost, but full topology performance because data conversion layers would be automatically inserted
|
||||
as appropriate.
|
||||
|
||||
Offline OpenCL compiler (`clc`) features automatic vectorization over `get_global_id(0)` usage, if uniform access is detected.
|
||||
@@ -218,11 +218,11 @@ patterns. WGV works if and only if vector types are not used in the code.
|
||||
Here is a short list of optimization tips:
|
||||
|
||||
1. Help auto-vectorizer ensure non-aliasing pointers for kernel parameters by putting `restrict` where possible.
|
||||
- This may give a performance boost, especially for kernels with unrolling, like `ocl_grn` from the example below.
|
||||
- This can give a performance boost, especially for kernels with unrolling, like `ocl_grn` from the example below.
|
||||
- Place `restrict` markers for kernels with manually vectorized codes. In the `ocl_grn` kernel below, the unrolled version without `restrict` is up to 20% slower than the most optimal one, which combines unrolling and `restrict`.
|
||||
2. Put `#‍pragma unroll N` to your loop header. Since the compiler does not trigger unrolling by default, it is your responsibility to
|
||||
2. Put `#‍pragma unroll N` to your loop header. The compiler does not trigger unrolling by default, so it is your responsibility to
|
||||
annotate the code with pragmas as appropriate. The `ocl_grn` version with `#‍pragma unroll 4` is up to 50% faster, most of which comes from unrolling the first loop, because LLVM, in general, is better in scheduling 3-stage loops (load-compute-store), while the fist loop
|
||||
`variance += (float)(src_data[c*H*W + y*W + x] * src_data[c*H*W + y*W + x]);` is only 2-stage (load-compute). Please, pay
|
||||
`variance += (float)(src_data[c*H*W + y*W + x] * src_data[c*H*W + y*W + x]);` is only 2-stage (load-compute). Pay
|
||||
attention to unrolling such cases first. Unrolling factor is loop-dependent. Choose the smallest number that
|
||||
still improves performance as an optimum between the kernel size and execution speed. For this specific kernel, changing the unroll factor from `4`to `6` results in the same performance, so unrolling factor equal to 4 is an optimum. For Intel® Neural Compute Stick 2, unrolling is conjugated with the automatic software pipelining for load, store, and compute stages:
|
||||
```cpp
|
||||
@@ -294,15 +294,15 @@ __kernel void ocl_grn_line(__global const half* restrict src_data, __global hal
|
||||
Both versions perform the same, but the second one has more complex code.
|
||||
|
||||
3. If it is easy to predict the work group size, you can also use the `reqd_work_group_size` kernel attribute to ask the compiler
|
||||
to unroll the code up to local size of the work group. Please note that if the kernel is actually executed with the
|
||||
to unroll the code up to the local size of the work group. Note that if the kernel is actually executed with the
|
||||
different work group configuration, the result is undefined.
|
||||
|
||||
4. Prefer to use the `half` compute, if it keeps reasonable accuracy. 16-bit float is a native type for Intel® Neural Compute Stick 2, most of the functions `half_*` are mapped to a single hardware instruction.
|
||||
4. Prefer to use the `half` compute if it keeps reasonable accuracy. 16-bit float is a native type for Intel® Neural Compute Stick 2, most of the functions `half_*` are mapped to a single hardware instruction.
|
||||
Use the standard `native_*` function for the rest of types.
|
||||
|
||||
5. Prefer to use the `convert_half` function over `vstore_half` if conversion to 32-bit float is required. `convert_half` is mapped to a single hardware instruction. For the `cvtf32f16` kernel above, the line `outImage[idx] = convert_half(inImage[idx]*scale+bais);` is 8 times slower than the code with `vstore_half`.
|
||||
5. Prefer to use the `convert_half` function over `vstore_half` if conversion to 32-bit float is required. `convert_half` is mapped to a single hardware instruction. For the `cvtf32f16` kernel above, the line `outImage[idx] = convert_half(inImage[idx]*scale+bais);` is eight times slower than the code with `vstore_half`.
|
||||
|
||||
6. Mind early exits. Early exit may be extremely costly for the current version of the `clc` compiler due to conflicts with the
|
||||
6. Mind early exits. Early exit can be extremely costly for the current version of the `clc` compiler due to conflicts with the
|
||||
auto-vectorizer. The generic advice would be to setup local size by `x` dimension equal to inputs or/and outputs width.
|
||||
If it is impossible to define the work grid that exactly matches inputs or/and outputs to eliminate checks, for example,
|
||||
`if (get_global_id(0) >= width) return`, use line-wise kernel variant with manual vectorization.
|
||||
@@ -333,9 +333,9 @@ The kernel example below demonstrates the impact of early exits on kernel perfor
|
||||
out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2];
|
||||
}
|
||||
```
|
||||
This `reorg` kernel is auto-vectorizable, but an input for YOLO v2 topology is `NCHW=<1,64,26,26>` and it is not multiple of vector width (which is `8` for `half` data type). As a result, the Inference Engine does not select the auto-vectorized kernel.
|
||||
To compare performance of auto-vectorized and scalar version of the kernel, change the input size to`NCHW=<1,64,26,32>`. This allows the auto-vectorized version to be selected by the Inference Engine and can give you about 30% uplift.
|
||||
Since the auto-vectorized version is faster, it makes sense to enable it for the YOLO v2 topology input size by setting the local size multiple of vector (e.g. 32) and adjust global sizes accordingly. As a result, the execution work grid exceeds actual input dimension, so out-of-bound checks should be inserted. See the updated kernel version below:
|
||||
This `reorg` kernel is auto-vectorizable, but an input for YOLO v2 topology is `NCHW=<1,64,26,26>` and it is not multiple of vector width, which is `8` for `half` data type. As a result, the Inference Engine does not select the auto-vectorized kernel.
|
||||
To compare performance of auto-vectorized and scalar version of the kernel, change the input size to`NCHW=<1,64,26,32>`. This enables the auto-vectorized version to be selected by the Inference Engine and can give you about 30% uplift.
|
||||
Since the auto-vectorized version is faster, it makes sense to enable it for the YOLO v2 topology input size by setting the local size multiple of vector, for example, 32, and adjust global sizes accordingly. As a result, the execution work grid exceeds actual input dimension, so out-of-bound checks should be inserted. See the updated kernel version below:
|
||||
```cpp
|
||||
// Version with out-of-bound checks added
|
||||
__kernel void reorg(const __global half* restrict src, __global half* restrict out, int W, int stride)
|
||||
@@ -445,7 +445,7 @@ from/to a `__blobal` pointer since work-group copying could be done in a vector
|
||||
}
|
||||
}
|
||||
```
|
||||
This kernel can be rewritten to introduce special data binding `__dma_preload` and `__dma_postwrite intrinsics`. This means that instead of one kernel, a group of three kernels should be implemented: `kernelName`, `__dma_preload_kernelName` and `__dma_postwrite_kernelName`. `__dma_preload_kernelName` for a particular work group `n` is guaranteed to be executed before `n`-th work group itself, while `__dma_postwrite_kernelName` is guaranteed to be executed after a corresponding work group. You can define one of those functions that are intended to be used to copy data from-to `__global` and `__local` memory. The syntactics requires exact functional signature match. The example below illustrates how to prepare your kernel for manual-DMA.
|
||||
This kernel can be rewritten to introduce special data binding `__dma_preload` and `__dma_postwrite intrinsics`. This means that instead of one kernel, a group of three kernels should be implemented: `kernelName`, `__dma_preload_kernelName`, and `__dma_postwrite_kernelName`. `__dma_preload_kernelName` for a particular work group `n` is guaranteed to be executed before the `n`-th work group itself, while `__dma_postwrite_kernelName` is guaranteed to be executed after a corresponding work group. You can define one of those functions that are intended to be used to copy data from-to `__global` and `__local` memory. The syntactics requires exact functional signature match. The example below illustrates how to prepare your kernel for manual-DMA.
|
||||
```cpp
|
||||
__kernel void __dma_preload_grn_NCHW(
|
||||
__global const half* restrict src,
|
||||
@@ -612,7 +612,7 @@ __kernel void grn_NCHW(
|
||||
}
|
||||
```
|
||||
|
||||
Please note `get_local_size` and `get_local_id` usage inside the kernel. 21x speedup is expected for a kernel on enet-curbs setup since it was completely limited by memory usage.
|
||||
Note the `get_local_size` and `get_local_id` usage inside the kernel. 21x speedup is expected for a kernel on enet-curbs setup because it was completely limited by memory usage.
|
||||
|
||||
An alternative method of using DMA is to use work item copy extension. Those functions are executed inside a kernel and requires work groups equal to single work item.
|
||||
|
||||
|
||||
@@ -2,61 +2,18 @@
|
||||
|
||||
## Disclaimer
|
||||
|
||||
Inference Engine with low-precision 8-bit integer inference requires the following prerequisites to be satisfied:
|
||||
- Inference Engine [CPU Plugin](supported_plugins/CPU.md) must be built with the Intel® Math Kernel Library (Intel® MKL) dependency. In the Intel® Distribution of OpenVINO™ it is
|
||||
satisfied by default, this is mostly the requirement if you are using OpenVINO™ available in open source, because [open source version of OpenVINO™](https://github.com/openvinotoolkit/openvino) can be built with OpenBLAS* that is unacceptable if you want to use 8-bit integer inference.
|
||||
- Intel® platforms that support at least one extension to x86 instruction set from the following list:
|
||||
Low-precision 8-bit inference is optimized for:
|
||||
- Intel® architecture processors with the following instruction set architecture extensions:
|
||||
- Intel® Advanced Vector Extensions 512 Vector Neural Network Instructions (Intel® AVX-512 VNNI)
|
||||
- Intel® Advanced Vector Extensions 512 (Intel® AVX-512)
|
||||
- Intel® Advanced Vector Extensions 2.0 (Intel® AVX2)
|
||||
- Intel® Streaming SIMD Extensions 4.2 (Intel® SSE4.2)
|
||||
- A model must be quantized. To quantize the model, you can use the [Post-Training Optimization Tool](@ref pot_README) delivered with the Intel® Distribution of OpenVINO™ toolkit release package.
|
||||
|
||||
The 8-bit inference feature was validated on the following topologies:
|
||||
* **Classification models:**
|
||||
* Caffe\* DenseNet-121, DenseNet-161, DenseNet-169, DenseNet-201
|
||||
* Caffe Inception v1, Inception v2, Inception v3, Inception v4
|
||||
* Caffe YOLO v1 tiny, YOLO v3
|
||||
* Caffe ResNet-50 v1, ResNet-101 v1, ResNet-152 v1, ResNet-269 v1
|
||||
* Caffe ResNet-18
|
||||
* Caffe MobileNet, MobileNet v2
|
||||
* Caffe SE ResNeXt-50
|
||||
* Caffe SqueezeNet v1.0, SqueezeNet v1.1
|
||||
* Caffe VGG16, VGG19
|
||||
* TensorFlow\* DenseNet-121, DenseNet-169
|
||||
* TensorFlow Inception v1, Inception v2, Inception v3, Inception v4, Inception ResNet v2
|
||||
* TensorFlow Lite Inception v1, Inception v2, Inception v3, Inception v4, Inception ResNet v2
|
||||
* TensorFlow Lite MobileNet v1, MobileNet v2
|
||||
* TensorFlow MobileNet v1, MobileNet v2
|
||||
* TensorFlow ResNet-50 v1.5, ResNet-50 v1, ResNet-101 v1, ResNet-152 v1, ResNet-50 v2, ResNet-101 v2, ResNet-152 v2
|
||||
* TensorFlow VGG16, VGG19
|
||||
* TensorFlow YOLO v3
|
||||
* MXNet\* CaffeNet
|
||||
* MXNet DenseNet-121, DenseNet-161, DenseNet-169, DenseNet-201
|
||||
* MXNet Inception v3, inception_v4
|
||||
* MXNet Mobilenet, Mobilenet v2
|
||||
* MXNet ResNet-101 v1, ResNet-152 v1, ResNet-101 v2, ResNet-152 v2
|
||||
* MXNet ResNeXt-101
|
||||
* MXNet SqueezeNet v1.1
|
||||
* MXNet VGG16, VGG19
|
||||
|
||||
|
||||
* **Object detection models:**
|
||||
* Caffe SSD GoogLeNet
|
||||
* Caffe SSD MobileNet
|
||||
* Caffe SSD SqueezeNet
|
||||
* Caffe SSD VGG16 300, SSD VGG16 512
|
||||
* TensorFlow SSD MobileNet v1, SSD MobileNet v2
|
||||
* MXNet SSD Inception v3 512
|
||||
* MXNet SSD MobileNet 512
|
||||
* MXNet SSD ResNet-50 512
|
||||
* MXNet SSD VGG16 300
|
||||
* ONNX\* SSD ResNet 34
|
||||
|
||||
* **Semantic segmentation models:**
|
||||
* Unet2D
|
||||
|
||||
* **Recommendation system models:**
|
||||
* NCF
|
||||
- Intel® processor graphics:
|
||||
- Intel® Iris® Xe Graphics
|
||||
- Intel® Iris® Xe MAX Graphics
|
||||
- A model must be quantized. You can use a quantized model from [OpenVINO™ Toolkit Intel's Pre-Trained Models](@ref omz_models_group_intel) or quantize a model yourself. For quantization, you can use the:
|
||||
- [Post-Training Optimization Tool](@ref pot_README) delivered with the Intel® Distribution of OpenVINO™ toolkit release package.
|
||||
- [Neural Network Compression Framework](https://www.intel.com/content/www/us/en/artificial-intelligence/posts/openvino-nncf.html) available on GitHub: https://github.com/openvinotoolkit/nncf
|
||||
|
||||
## Introduction
|
||||
|
||||
@@ -65,63 +22,62 @@ A lot of investigation was made in the field of deep learning with the idea of u
|
||||
|
||||
8-bit computations (referred to as `int8`) offer better performance compared to the results of inference in higher precision (for example, `fp32`), because they allow loading more data into a single processor instruction. Usually the cost for significant boost is a reduced accuracy. However, it is proved that an accuracy drop can be negligible and depends on task requirements, so that the application engineer can set up the maximum accuracy drop that is acceptable.
|
||||
|
||||
Current Inference Engine solution for low-precision inference uses Intel MKL-DNN and supports inference of the following layers in 8-bit integer computation mode:
|
||||
* Convolution
|
||||
* FullyConnected
|
||||
* ReLU
|
||||
* ReLU6
|
||||
* Reshape
|
||||
* Permute
|
||||
* Pooling
|
||||
* Squeeze
|
||||
* Eltwise
|
||||
* Concat
|
||||
* Resample
|
||||
* MVN
|
||||
|
||||
This means that 8-bit inference can only be performed with the CPU plugin on the layers listed above. All other layers are executed in the format supported by the CPU plugin: 32-bit floating point format (`fp32`).
|
||||
Let's explore quantized [TensorFlow* implementation of ResNet-50](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) model. Use [Model Downloader](@ref omz_tools_downloader) tool to download the `fp16` model from [OpenVINO™ Toolkit - Open Model Zoo repository](https://github.com/openvinotoolkit/open_model_zoo):
|
||||
```sh
|
||||
./downloader.py --name resnet-50-tf --precisions FP16-INT8
|
||||
```
|
||||
After that you should quantize model by [Model Quantizer](@ref omz_tools_downloader) tool.
|
||||
```sh
|
||||
./quantizer.py --model_dir public/resnet-50-tf --dataset_dir <DATASET_DIR> --precisions=FP16-INT8
|
||||
```
|
||||
The simplest way to infer the model and collect performance counters is [C++ Benchmark Application](../../inference-engine/samples/benchmark_app/README.md).
|
||||
```sh
|
||||
./benchmark_app -m resnet-50-tf.xml -d CPU -niter 1 -api sync -report_type average_counters -report_folder pc_report_dir
|
||||
```
|
||||
If you infer the model in the OpenVINO™ CPU plugin and collect performance counters, all operations (except last not quantized SoftMax) are executed in INT8 precision.
|
||||
|
||||
## Low-Precision 8-bit Integer Inference Workflow
|
||||
|
||||
For 8-bit integer computations, a model must be quantized. If the model is not quantized then you can use the [Post-Training Optimization Tool](@ref pot_README) to quantize the model. The quantization process adds `FakeQuantize` layers on activations and weights for most layers. Read more about mathematical computations under the hood in the [white paper](https://intel.github.io/mkl-dnn/ex_int8_simplenet.html).
|
||||
For 8-bit integer computations, a model must be quantized. Quantized models can be downloaded from [Overview of OpenVINO™ Toolkit Intel's Pre-Trained Models](@ref omz_models_group_intel). If the model is not quantized, you can use the [Post-Training Optimization Tool](@ref pot_README) to quantize the model. The quantization process adds [FakeQuantize](../ops/quantization/FakeQuantize_1.md) layers on activations and weights for most layers. Read more about mathematical computations in the [Uniform Quantization with Fine-Tuning](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md).
|
||||
|
||||
8-bit inference pipeline includes two stages (also refer to the figure below):
|
||||
1. *Offline stage*, or *model quantization*. During this stage, `FakeQuantize` layers are added before most layers to have quantized tensors before layers in a way that low-precision accuracy drop for 8-bit integer inference satisfies the specified threshold. The output of this stage is a quantized model. Quantized model precision is not changed, quantized tensors are in original precision range (`fp32`). `FakeQuantize` layer has `Quantization Levels` attribute which defines quants count. Quants count defines precision which is used during inference. For `int8` range `Quantization Levels` attribute value has to be 255 or 256.
|
||||
1. *Offline stage*, or *model quantization*. During this stage, [FakeQuantize](../ops/quantization/FakeQuantize_1.md) layers are added before most layers to have quantized tensors before layers in a way that low-precision accuracy drop for 8-bit integer inference satisfies the specified threshold. The output of this stage is a quantized model. Quantized model precision is not changed, quantized tensors are in original precision range (`fp32`). `FakeQuantize` layer has `levels` attribute which defines quants count. Quants count defines precision which is used during inference. For `int8` range `levels` attribute value has to be 255 or 256. To quantize the model, you can use the [Post-Training Optimization Tool](@ref pot_README) delivered with the Intel® Distribution of OpenVINO™ toolkit release package.
|
||||
|
||||
When you pass the quantized IR to the OpenVINO™ plugin, the plugin automatically recognizes it as a quantized model and performs 8-bit inference. Note, if you pass a quantized model to another plugin that does not support 8-bit inference but supports all operations from the model, the model is inferred in precision that this plugin supports.
|
||||
|
||||
2. *Run-time stage*. This stage is an internal procedure of the OpenVINO™ plugin. During this stage, the quantized model is loaded to the plugin. The plugin uses `Low Precision Transformation` component to update the model to infer it in low precision:
|
||||
- Update `FakeQuantize` layers to have quantized output tensors in low precision range and add dequantization layers to compensate the update. Dequantization layers are pushed through as many layers as possible to have more layers in low precision. After that, most layers have quantized input tensors in low precision range and can be inferred in low precision. Ideally, dequantization layers should be fused in the next `FakeQuantize` layer.
|
||||
- Weights are quantized and stored in `Constant` layers.
|
||||
|
||||
2. *Run-time stage*. This stage is an internal procedure of the [CPU Plugin](supported_plugins/CPU.md). During this stage, the quantized model is loaded to the plugin. The plugin updates each `FakeQuantize` layer on activations and weights to have `FakeQuantize` output tensor values in low precision range.
|
||||
![int8_flow]
|
||||
|
||||
### Offline Stage: Model Quantization
|
||||
|
||||
To infer a layer in low precision and get maximum performance, the input tensor for the layer has to be quantized and each value has to be in the target low precision range. For this purpose, `FakeQuantize` layer is used in the OpenVINO™ intermediate representation file (IR). To quantize the model, you can use the [Post-Training Optimization Tool](@ref pot_README) delivered with the Intel® Distribution of OpenVINO™ toolkit release package.
|
||||
|
||||
When you pass the calibrated IR to the [CPU plugin](supported_plugins/CPU.md), the plugin automatically recognizes it as a quantized model and performs 8-bit inference. Note, if you pass a quantized model to another plugin that does not support 8-bit inference, the model is inferred in precision that this plugin supports.
|
||||
|
||||
### Run-Time Stage: Quantization
|
||||
|
||||
This is the second stage of the 8-bit integer inference. After you load the quantized model IR to a plugin, the pluing uses the `Low Precision Transformation` component to update the model to infer it in low precision:
|
||||
* Updates `FakeQuantize` layers to have quantized output tensors in low precision range and add dequantization layers to compensate the update. Dequantization layers are pushed through as many layers as possible to have more layers in low precision. After that, most layers have quantized input tensors in low precision range and can be inferred in low precision. Ideally, dequantization layers should be fused in next `FakeQuantize` or `ScaleShift` layers.
|
||||
* Weights are quantized and stored in `Const` layers.
|
||||
* Biases are updated to avoid shifts in dequantization layers.
|
||||
|
||||
## Performance Counters
|
||||
|
||||
Information about layer precision is stored in the performance counters that are
|
||||
available from the Inference Engine API. The layers have the following marks:
|
||||
* Suffix `I8` for layers that had 8-bit data type input and were computed in 8-bit precision
|
||||
* Suffix `FP32` for layers computed in 32-bit precision
|
||||
available from the Inference Engine API. For example, the part of performance counters table for quantized [TensorFlow* implementation of ResNet-50](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) model inference on [CPU Plugin](supported_plugins/CPU.md) looks as follows:
|
||||
|
||||
For example, the performance counters table for the Inception model can look as follows:
|
||||
|
||||
```
|
||||
inception_5b/5x5_reduce EXECUTED layerType: Convolution realTime: 417 cpu: 417 execType: gemm_blas_I8
|
||||
inception_5b/output EXECUTED layerType: Concat realTime: 34 cpu: 34 execType: ref_I8
|
||||
inception_5b/output_U8_nhw... EXECUTED layerType: Reorder realTime: 33092 cpu: 33092 execType: reorder_I8
|
||||
inception_5b/output_oScale... EXECUTED layerType: ScaleShift realTime: 1390 cpu: 1390 execType: jit_avx2_FP32
|
||||
inception_5b/output_oScale... EXECUTED layerType: Reorder realTime: 143 cpu: 143 execType: reorder_FP32
|
||||
inception_5b/pool EXECUTED layerType: Pooling realTime: 59301 cpu: 59301 execType: ref_any_I8
|
||||
```
|
||||
| layerName | execStatus | layerType | execType | realTime (ms) | cpuTime (ms) |
|
||||
| --------------------------------------------------------- | ---------- | ------------ | -------------------- | ------------- | ------------ |
|
||||
| resnet\_model/batch\_normalization\_15/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_1x1\_I8 | 0.377 | 0.377 |
|
||||
| resnet\_model/conv2d\_16/Conv2D/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
|
||||
| resnet\_model/batch\_normalization\_16/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_I8 | 0.499 | 0.499 |
|
||||
| resnet\_model/conv2d\_17/Conv2D/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
|
||||
| resnet\_model/batch\_normalization\_17/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_1x1\_I8 | 0.399 | 0.399 |
|
||||
| resnet\_model/add\_4/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
|
||||
| resnet\_model/add\_4 | NOT\_RUN | Eltwise | undef | 0 | 0 |
|
||||
| resnet\_model/add\_5/fq\_input\_1 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
|
||||
|
||||
The `execType` column of the table includes inference primitives with specific suffixes.
|
||||
|
||||
[int8_flow]: img/cpu_int8_flow.png
|
||||
> The `exeStatus` column of the table includes possible values:
|
||||
> - `EXECUTED` - layer was executed by standalone primitive,
|
||||
> - `NOT_RUN` - layer was not executed by standalone primitive or was fused with another operation and executed in another layer primitive.
|
||||
>
|
||||
> The `execType` column of the table includes inference primitives with specific suffixes. The layers have the following marks:
|
||||
> * Suffix `I8` for layers that had 8-bit data type input and were computed in 8-bit precision
|
||||
> * Suffix `FP32` for layers computed in 32-bit precision
|
||||
|
||||
All `Convolution` layers are executed in int8 precision. Rest layers are fused into Convolutions using post operations optimization technique, which is described in [Internal CPU Plugin Optimizations](supported_plugins/CPU.md).
|
||||
|
||||
[int8_flow]: img/cpu_int8_flow.png
|
||||
|
||||
@@ -29,7 +29,7 @@ Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README
|
||||
## Using Async API
|
||||
To gain better performance on accelerators, such as VPU, the Inference Engine uses the asynchronous approach (see
|
||||
[Integrating Inference Engine in Your Application (current API)](Integrate_with_customer_application_new_API.md)).
|
||||
The point is amortizing the costs of data transfers, by pipe-lining, see [Async API explained](@ref omz_demos_object_detection_demo_ssd_async_README).
|
||||
The point is amortizing the costs of data transfers, by pipe-lining, see [Async API explained](@ref omz_demos_object_detection_demo_cpp).
|
||||
Since the pipe-lining relies on the availability of the parallel slack, running multiple inference requests in parallel is essential.
|
||||
Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample, which enables running a number of inference requests in parallel. Specifying different number of request produces different throughput measurements.
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ Inference Engine sample applications include the following:
|
||||
- **Hello Query Device Sample** – Query of available Inference Engine devices and their metrics, configuration values.
|
||||
- [Hello Query Device C++ Sample](../../inference-engine/samples/hello_query_device/README.md)
|
||||
- [Hello Query Device Python* Sample](../../inference-engine/ie_bridges/python/sample/hello_query_device/README.md)
|
||||
- **[Hello Reshape SSD C++ Sample**](../../inference-engine/samples/hello_reshape_ssd/README.md)** – Inference of SSD networks resized by ShapeInfer API according to an input size.
|
||||
- **[Hello Reshape SSD C++ Sample](../../inference-engine/samples/hello_reshape_ssd/README.md)** – Inference of SSD networks resized by ShapeInfer API according to an input size.
|
||||
- **Image Classification Sample Async** – Inference of image classification networks like AlexNet and GoogLeNet using Asynchronous Inference Request API (the sample supports only images as inputs).
|
||||
- [Image Classification C++ Sample Async](../../inference-engine/samples/classification_sample_async/README.md)
|
||||
- [Image Classification Python* Sample Async](../../inference-engine/ie_bridges/python/sample/classification_sample_async/README.md)
|
||||
@@ -43,7 +43,7 @@ To run the sample applications, you can use images and videos from the media fil
|
||||
|
||||
## Samples that Support Pre-Trained Models
|
||||
|
||||
To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
|
||||
To run the sample, you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
|
||||
|
||||
## Build the Sample Applications
|
||||
|
||||
@@ -205,7 +205,7 @@ vi <user_home_directory>/.bashrc
|
||||
|
||||
2. Add this line to the end of the file:
|
||||
```sh
|
||||
source /opt/intel/openvino/bin/setupvars.sh
|
||||
source /opt/intel/openvino_2021/bin/setupvars.sh
|
||||
```
|
||||
|
||||
3. Save and close the file: press the **Esc** key, type `:wq` and press the **Enter** key.
|
||||
@@ -242,4 +242,4 @@ sample, read the sample documentation by clicking the sample name in the samples
|
||||
list above.
|
||||
|
||||
## See Also
|
||||
* [Introduction to Inference Engine](inference_engine_intro.md)
|
||||
* [Inference Engine Developer Guide](Deep_Learning_Inference_Engine_DevGuide.md)
|
||||
|
||||
@@ -66,8 +66,8 @@ Shape collision during shape propagation may be a sign that a new shape does not
|
||||
Changing the model input shape may result in intermediate operations shape collision.
|
||||
|
||||
Examples of such operations:
|
||||
- [`Reshape` operation](../ops/shape/Reshape_1.md) with a hard-coded output shape value
|
||||
- [`MatMul` operation](../ops/matrix/MatMul_1.md) with the `Const` second input cannot be resized by spatial dimensions due to operation semantics
|
||||
- [Reshape](../ops/shape/Reshape_1.md) operation with a hard-coded output shape value
|
||||
- [MatMul](../ops/matrix/MatMul_1.md) operation with the `Const` second input cannot be resized by spatial dimensions due to operation semantics
|
||||
|
||||
Model structure and logic should not change significantly after model reshaping.
|
||||
- The Global Pooling operation is commonly used to reduce output feature map of classification models output.
|
||||
@@ -100,7 +100,7 @@ Here is a code example:
|
||||
|
||||
@snippet snippets/ShapeInference.cpp part0
|
||||
|
||||
Shape Inference feature is used in [Smart classroom sample](@ref omz_demos_smart_classroom_demo_README).
|
||||
Shape Inference feature is used in [Smart Classroom Demo](@ref omz_demos_smart_classroom_demo_cpp).
|
||||
|
||||
## Extensibility
|
||||
|
||||
|
||||
@@ -6,9 +6,9 @@ The OpenVINO™ toolkit installation includes the following tools:
|
||||
|
||||
|Tool | Location in the Installation Directory|
|
||||
|-----------------------------------------------------------------------------|---------------------------------------|
|
||||
|[Accuracy Checker Tool](@ref omz_tools_accuracy_checker_README) | `<INSTALL_DIR>/deployment_tools/tools/open_model_zoo/tools/accuracy_checker`|
|
||||
|[Accuracy Checker Tool](@ref omz_tools_accuracy_checker) | `<INSTALL_DIR>/deployment_tools/tools/open_model_zoo/tools/accuracy_checker`|
|
||||
|[Post-Training Optimization Tool](@ref pot_README) | `<INSTALL_DIR>/deployment_tools/tools/post_training_optimization_toolkit`|
|
||||
|[Model Downloader](@ref omz_tools_downloader_README) | `<INSTALL_DIR>/deployment_tools/tools/model_downloader`|
|
||||
|[Model Downloader](@ref omz_tools_downloader) | `<INSTALL_DIR>/deployment_tools/tools/model_downloader`|
|
||||
|[Cross Check Tool](../../inference-engine/tools/cross_check_tool/README.md) | `<INSTALL_DIR>/deployment_tools/tools/cross_check_tool`|
|
||||
|[Compile Tool](../../inference-engine/tools/compile_tool/README.md) | `<INSTALL_DIR>/deployment_tools/inference_engine/lib/intel64/`|
|
||||
|
||||
|
||||
3
docs/IE_DG/img/applying_low_latency.png
Executable file
3
docs/IE_DG/img/applying_low_latency.png
Executable file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f4f6e9d35869fa2c414e58914aaec1607eb7d4768b69c0cbcce5d5fa3ceddba3
|
||||
size 56444
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3965f4830c45518ee1dc169c2b1760cae83f8a8819023770a28893c6cef558c2
|
||||
size 68441
|
||||
oid sha256:83bcd7888d3843ddfd9a601288627e98f5874290c00b9988bf1beac9209f2e8d
|
||||
size 79741
|
||||
|
||||
3
docs/IE_DG/img/low_latency_limitation_1.png
Executable file
3
docs/IE_DG/img/low_latency_limitation_1.png
Executable file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:28f4e7ee50785e9c571725942e67c899d08e87af3802f6bea4721c64bfdb2bac
|
||||
size 21722
|
||||
3
docs/IE_DG/img/low_latency_limitation_2.png
Executable file
3
docs/IE_DG/img/low_latency_limitation_2.png
Executable file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0923af3acfb69dd0b88a5edf097e60c2655828b643d8e328561b13b0196c0850
|
||||
size 47997
|
||||
3
docs/IE_DG/img/state_network_example.png
Normal file
3
docs/IE_DG/img/state_network_example.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9976341ca931f3ab4e4fbccea26844b738adb27b091149a4c6231eda841ab867
|
||||
size 144541
|
||||
@@ -1,5 +1,11 @@
|
||||
Introduction to Inference Engine {#openvino_docs_IE_DG_inference_engine_intro}
|
||||
================================
|
||||
# Introduction to Inference Engine {#openvino_docs_IE_DG_inference_engine_intro}
|
||||
|
||||
> **NOTE:** [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
|
||||
|
||||
This Guide provides an overview of the Inference Engine describing the typical workflow for performing
|
||||
inference of a pre-trained and optimized deep learning model and a set of sample applications.
|
||||
|
||||
> **NOTE:** Before you perform inference with the Inference Engine, your models should be converted to the Inference Engine format using the Model Optimizer or built directly in run-time using nGraph API. To learn about how to use Model Optimizer, refer to the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To learn about the pre-trained and optimized models delivered with the OpenVINO™ toolkit, refer to [Pre-Trained Models](@ref omz_models_intel_index).
|
||||
|
||||
After you have used the Model Optimizer to create an Intermediate Representation (IR), use the Inference Engine to infer the result for a given input data.
|
||||
|
||||
|
||||
336
docs/IE_DG/network_state_intro.md
Normal file
336
docs/IE_DG/network_state_intro.md
Normal file
@@ -0,0 +1,336 @@
|
||||
Introduction to OpenVINO state API {#openvino_docs_IE_DG_network_state_intro}
|
||||
==============================
|
||||
|
||||
This section describes how to work with stateful networks in OpenVINO toolkit, specifically:
|
||||
* How stateful networks are represented in IR and nGraph
|
||||
* How operations with state can be done
|
||||
|
||||
The section additionally provides small examples of stateful network and code to infer it.
|
||||
|
||||
## What is a stateful network
|
||||
|
||||
Several use cases require processing of data sequences. When length of a sequence is known and small enough,
|
||||
we can process it with RNN like networks that contain a cycle inside. But in some cases, like online speech recognition of time series
|
||||
forecasting, length of data sequence is unknown. Then data can be divided in small portions and processed step-by-step. But dependency
|
||||
between data portions should be addressed. For that, networks save some data between inferences - state. When one dependent sequence is over,
|
||||
state should be reset to initial value and new sequence can be started.
|
||||
|
||||
Several frameworks have special API for states in networks. For example, Keras have special option for RNNs `stateful` that turns on saving state
|
||||
between inferences. Kaldi contains special specifier `Offset` to define time offset in a network.
|
||||
|
||||
OpenVINO also contains special API to simplify work with networks with states. State is automatically saved between inferences,
|
||||
and there is a way to reset state when needed. You can also read state or set it to some new value between inferences.
|
||||
|
||||
## OpenVINO state representation
|
||||
|
||||
OpenVINO contains a special abstraction `Variable` to represent a state in a network. There are two operations to work with the state:
|
||||
* `Assign` to save value in state
|
||||
* `ReadValue` to read value saved on previous iteration
|
||||
|
||||
You can find more details on these operations in [ReadValue specification](../ops/infrastructure/ReadValue_3.md) and
|
||||
[Assign specification](../ops/infrastructure/Assign_3.md).
|
||||
|
||||
## Examples of representation of a network with states
|
||||
|
||||
To get a model with states ready for inference, you can convert a model from another framework to IR with Model Optimizer or create an nGraph function
|
||||
(details can be found in [Build nGraph Function section](../nGraph_DG/build_function.md)).
|
||||
Let's represent the following graph in both forms:
|
||||
![state_network_example]
|
||||
|
||||
### Example of IR with state
|
||||
|
||||
The `bin` file for this graph should contain float 0 in binary form. Content of `xml` is the following.
|
||||
|
||||
```xml
|
||||
<?xml version="1.0" ?>
|
||||
<net name="summator" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="init_value" type="Const" version="opset6">
|
||||
<data element_type="f32" offset="0" shape="1,1" size="4"/>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="read" type="ReadValue" version="opset6">
|
||||
<data variable_id="id"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="input" type="Parameter" version="opset6">
|
||||
<data element_type="f32" shape="1,1"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="add_sum" type="Add" version="opset6">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="4" name="save" type="Assign" version="opset6">
|
||||
<data variable_id="id"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
<layer id="10" name="add" type="Add" version="opset6">
|
||||
<data axis="1"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="5" name="output/sink_port_0" type="Result" version="opset6">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="1" to-layer="1" to-port="0"/>
|
||||
<edge from-layer="2" from-port="0" to-layer="3" to-port="1"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="3" to-port="0"/>
|
||||
<edge from-layer="3" from-port="2" to-layer="4" to-port="0"/>
|
||||
<edge from-layer="3" from-port="2" to-layer="10" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="10" to-port="1"/>
|
||||
<edge from-layer="10" from-port="2" to-layer="5" to-port="0"/>
|
||||
</edges>
|
||||
<meta_data>
|
||||
<MO_version value="unknown version"/>
|
||||
<cli_parameters>
|
||||
</cli_parameters>
|
||||
</meta_data>
|
||||
</net>
|
||||
```
|
||||
|
||||
### Example of creating model nGraph API
|
||||
|
||||
```cpp
|
||||
#include <ngraph/opsets/opset6.hpp>
|
||||
#include <ngraph/op/util/variable.hpp>
|
||||
// ...
|
||||
|
||||
auto arg = make_shared<ngraph::opset6::Parameter>(element::f32, Shape{1, 1});
|
||||
auto init_const = ngraph::opset6::Constant::create(element::f32, Shape{1, 1}, {0});
|
||||
|
||||
// The ReadValue/Assign operations must be used in pairs in the network.
|
||||
// For each such a pair, its own variable object must be created.
|
||||
const std::string variable_name("variable0");
|
||||
auto variable = std::make_shared<ngraph::Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, variable_name});
|
||||
|
||||
// Creating ngraph::function
|
||||
auto read = make_shared<ngraph::opset6::ReadValue>(init_const, variable);
|
||||
std::vector<shared_ptr<ngraph::Node>> args = {arg, read};
|
||||
auto add = make_shared<ngraph::opset6::Add>(arg, read);
|
||||
auto assign = make_shared<ngraph::opset6::Assign>(add, variable);
|
||||
auto add2 = make_shared<ngraph::opset6::Add>(add, read);
|
||||
auto res = make_shared<ngraph::opset6::Result>(add2);
|
||||
|
||||
auto f = make_shared<Function>(ResultVector({res}), ParameterVector({arg}), SinkVector({assign}));
|
||||
```
|
||||
|
||||
In this example, `SinkVector` is used to create `ngraph::Function`. For network with states, except inputs and outputs, `Assign` nodes should also point to `Function`
|
||||
to avoid deleting it during graph transformations. You can do it with the constructor, as shown in the example, or with the special method `add_sinks(const SinkVector& sinks)`. Also you can delete
|
||||
sink from `ngraph::Function` after deleting the node from graph with the `delete_sink()` method.
|
||||
|
||||
## OpenVINO state API
|
||||
|
||||
Inference Engine has the `InferRequest::QueryState` method to get the list of states from a network and `IVariableState` interface to operate with states. Below you can find brief description of methods and the workable example of how to use this interface.
|
||||
is below and next section contains small workable example how this interface can be used.
|
||||
|
||||
* `std::string GetName() const`
|
||||
returns name(variable_id) of according Variable
|
||||
* `void Reset()`
|
||||
reset state to default value
|
||||
* `void SetState(Blob::Ptr newState)`
|
||||
set new value for state
|
||||
* `Blob::CPtr GetState() const`
|
||||
returns current value of state
|
||||
|
||||
## Example of stateful network inference
|
||||
|
||||
Let's take an IR from the previous section example. The example below demonstrates inference of two independent sequences of data. State should be reset between these sequences.
|
||||
|
||||
One infer request and one thread
|
||||
will be used in this example. Using several threads is possible if you have several independent sequences. Then each sequence can be processed in its own infer
|
||||
request. Inference of one sequence in several infer requests is not recommended. In one infer request state will be saved automatically between inferences, but
|
||||
if the first step is done in one infer request and the second in another, state should be set in new infer request manually (using `IVariableState::SetState` method).
|
||||
|
||||
@snippet openvino/docs/snippets/InferenceEngine_network_with_state_infer.cpp part1
|
||||
|
||||
You can find more powerful examples demonstrating how to work with networks with states in speech sample and demo.
|
||||
Decsriptions can be found in [Samples Overview](./Samples_Overview.md)
|
||||
|
||||
[state_network_example]: ./img/state_network_example.png
|
||||
|
||||
|
||||
## LowLatency transformation
|
||||
|
||||
If the original framework does not have a special API for working with states, after importing the model, OpenVINO representation will not contain Assign/ReadValue layers. For example, if the original ONNX model contains RNN operations, IR will contain TensorIterator operations and the values will be obtained only after the execution of whole TensorIterator primitive, intermediate values from each iteration will not be available. To be able to work with these intermediate values of each iteration and receive them with a low latency after each infer request, a special LowLatency transformation was introduced.
|
||||
|
||||
LowLatency transformation changes the structure of the network containing [TensorIterator](../ops/infrastructure/TensorIterator_1.md) and [Loop](../ops/infrastructure/Loop_5.md) by adding the ability to work with the state, inserting the Assign/ReadValue layers as it is shown in the picture below.
|
||||
|
||||

|
||||
|
||||
After applying the transformation, ReadValue operations can receive other operations as an input, as shown in the picture above. These inputs should set the initial value for initialization of ReadValue operations. However, such initialization is not supported in the current State API implementation. Input values are ignored and the initial values for the ReadValue operations are set to zeros unless otherwise specified by the user via [State API](#openvino-state-api).
|
||||
|
||||
### Steps to apply LowLatency transformation
|
||||
|
||||
1. Get CNNNetwork. Any way is acceptable:
|
||||
|
||||
* [from IR or ONNX model](Integrate_with_customer_application_new_API.md#integration-steps)
|
||||
* [from nGraph Function](../nGraph_DG/build_function.md)
|
||||
|
||||
2. [Reshape](ShapeInference) CNNNetwork network if necessary
|
||||
**Necessary case:** the sequence_lengths dimension of input > 1, it means the TensorIterator layer will have number_iterations > 1. We should reshape the inputs of the network to set sequence_dimension exactly to 1.
|
||||
|
||||
Usually, the following exception, which occurs after applying a transform when trying to infer the network in a plugin, indicates the need to apply reshape feature: `C++ exception with description "Function is incorrect. Assign and ReadValue operations must be used in pairs in the network."`
|
||||
This means that there are several pairs of Assign/ReadValue operations with the same variable_id in the network, operations were inserted into each iteration of the TensorIterator.
|
||||
|
||||
```cpp
|
||||
|
||||
// Network before reshape: Parameter (name: X, shape: [2 (sequence_lengths), 1, 16]) -> TensorIterator (num_iteration = 2, axis = 0) -> ...
|
||||
|
||||
cnnNetwork.reshape({"X" : {1, 1, 16});
|
||||
|
||||
// Network after reshape: Parameter (name: X, shape: [1 (sequence_lengths), 1, 16]) -> TensorIterator (num_iteration = 1, axis = 0) -> ...
|
||||
|
||||
```
|
||||
|
||||
3. Apply LowLatency transformation
|
||||
```cpp
|
||||
#include "ie_transformations.hpp"
|
||||
|
||||
...
|
||||
|
||||
InferenceEngine::LowLatency(cnnNetwork);
|
||||
```
|
||||
**State naming rule:** a name of a state is a concatenation of names: original TensorIterator operation, Parameter of the body, and additional suffix "variable_" + id (0-base indexing, new indexing for each TensorIterator). You can use these rules to predict what the name of the inserted State will be after the transformation is applied. For example:
|
||||
```cpp
|
||||
// Precondition in ngraph::function.
|
||||
// Created TensorIterator and Parameter in body of TensorIterator with names
|
||||
std::string tensor_iterator_name = "TI_name"
|
||||
std::string body_parameter_name = "param_name"
|
||||
std::string idx = "0"; // it's a first variable in the network
|
||||
|
||||
// The State will be named "TI_name/param_name/variable_0"
|
||||
auto state_name = tensor_iterator_name + "//" + body_parameter_name + "//" + "variable_" + idx;
|
||||
|
||||
InferenceEngine::CNNNetwork cnnNetwork = InferenceEngine::CNNNetwork{function};
|
||||
InferenceEngine::LowLatency(cnnNetwork);
|
||||
|
||||
InferenceEngine::ExecutableNetwork executableNetwork = core->LoadNetwork(/*cnnNetwork, targetDevice, configuration*/);
|
||||
|
||||
// Try to find the Variable by name
|
||||
auto states = executableNetwork.QueryState();
|
||||
for (auto& state : states) {
|
||||
auto name = state.GetName();
|
||||
if (name == state_name) {
|
||||
// some actions
|
||||
}
|
||||
}
|
||||
```
|
||||
4. Use state API. See sections [OpenVINO state API](#openvino-state-api), [Example of stateful network inference](#example-of-stateful-network-inference).
|
||||
|
||||
|
||||
### Known limitations
|
||||
1. Parameters connected directly to ReadValues (States) after the transformation is applied are not allowed.
|
||||
|
||||
Unnecessary parameters may remain on the graph after applying the transformation. The automatic handling of this case inside the transformation is not possible now. Such Parameters should be removed manually from `ngraph::Function` or replaced with a Constant.
|
||||
|
||||

|
||||
|
||||
**Current solutions:**
|
||||
* Replace Parameter with Constant (freeze) with the value [0, 0, 0 … 0] via [ModelOptimizer CLI](../MO_DG/prepare_model/convert_model/Converting_Model_General.md) `--input` or `--freeze_placeholder_with_value`.
|
||||
* Use ngraph API to replace Parameter with Constant.
|
||||
|
||||
```cpp
|
||||
// nGraph example. How to replace Parameter with Constant.
|
||||
auto func = cnnNetwork.getFunction();
|
||||
// Creating the new Constant with zero values.
|
||||
auto new_const = std::make_shared<ngraph::opset6::Constant>( /*type, shape, std::vector with zeros*/ );
|
||||
for (const auto& param : func->get_parameters()) {
|
||||
// Trying to find the problematic Constant by name.
|
||||
if (param->get_friendly_name() == "param_name") {
|
||||
// Replacing the problematic Param with a Constant.
|
||||
ngraph::replace_node(param, new_const);
|
||||
// Removing problematic Parameter from ngraph::function
|
||||
func->remove_parameter(param);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. Unable to execute reshape precondition to apply the transformation correctly due to hardcoded values of shapes somewhere in the network.
|
||||
|
||||
Networks can be non-reshapable, the most common reason is that the value of shapes is hardcoded in the Constant somewhere in the network.
|
||||
|
||||

|
||||
|
||||
**Current solution:** trim non-reshapable layers via [ModelOptimizer CLI](../MO_DG/prepare_model/convert_model/Converting_Model_General.md) `--input`, `--output`. For example, we can trim the Parameter and the problematic Constant in the picture above, using the following command line option:
|
||||
`--input Reshape_layer_name`. We can also replace the problematic Constant using ngraph, as shown in the example below.
|
||||
|
||||
```cpp
|
||||
// nGraph example. How to replace a Constant with hardcoded values of shapes in the network with another one with the new values.
|
||||
// Assume we know which Constant (const_with_hardcoded_shape) prevents the reshape from being applied.
|
||||
// Then we can find this Constant by name on the network and replace it with a new one with the correct shape.
|
||||
auto func = cnnNetwork.getFunction();
|
||||
// Creating the new Constant with a correct shape.
|
||||
// For the example shown in the picture above, the new values of the Constant should be 1, 1, 10 instead of 1, 49, 10
|
||||
auto new_const = std::make_shared<ngraph::opset6::Constant>( /*type, shape, value_with_correct_shape*/ );
|
||||
for (const auto& node : func->get_ops()) {
|
||||
// Trying to find the problematic Constant by name.
|
||||
if (node->get_friendly_name() == "name_of_non_reshapable_const") {
|
||||
auto const_with_hardcoded_shape = std::dynamic_pointer_cast<ngraph::opset6::Constant>(node);
|
||||
// Replacing the problematic Constant with a new one. Do this for all the problematic Constants in the network, then
|
||||
// you can apply the reshape feature.
|
||||
ngraph::replace_node(const_with_hardcoded_shape, new_const);
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -58,5 +58,5 @@ should be called with `weights` passed as an empty `Blob`.
|
||||
- Model Optimizer Developer Guide: [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
- Inference Engine Developer Guide: [Inference Engine Developer Guide](Deep_Learning_Inference_Engine_DevGuide.md)
|
||||
- For more information on Sample Applications, see the [Inference Engine Samples Overview](Samples_Overview.md)
|
||||
- For information on a set of pre-trained models, see the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index)
|
||||
- For information on a set of pre-trained models, see the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_group_intel)
|
||||
- For IoT Libraries and Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit).
|
||||
|
||||
@@ -113,8 +113,8 @@ CPU-specific settings:
|
||||
| Parameter name | Parameter values | Default | Description |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| KEY_CPU_THREADS_NUM | positive integer values| 0 | Specifies the number of threads that CPU plugin should use for inference. Zero (default) means using all (logical) cores|
|
||||
| KEY_CPU_BIND_THREAD | YES/NUMA/NO | YES | Binds inference threads to CPU cores. 'YES' (default) binding option maps threads to cores - this works best for static/synthetic scenarios like benchmarks. The 'NUMA' binding is more relaxed, binding inference threads only to NUMA nodes, leaving further scheduling to specific cores to the OS. This option might perform better in the real-life/contended scenarios. Note that for the latency-oriented cases (single execution stream, see below) both YES and NUMA options limit number of inference threads to the number of hardware cores (ignoring hyper-threading) on the multi-socket machines. |
|
||||
| KEY_CPU_THROUGHPUT_STREAMS | KEY_CPU_THROUGHPUT_NUMA, KEY_CPU_THROUGHPUT_AUTO, or positive integer values| 1 | Specifies number of CPU "execution" streams for the throughput mode. Upper bound for the number of inference requests that can be executed simultaneously. All available CPU cores are evenly distributed between the streams. The default value is 1, which implies latency-oriented behavior with all available cores processing requests one by one.<br>KEY_CPU_THROUGHPUT_NUMA creates as many streams as needed to accommodate NUMA and avoid associated penalties.<br>KEY_CPU_THROUGHPUT_AUTO creates bare minimum of streams to improve the performance; this is the most portable option if you don't know how many cores your target machine has (and what would be the optimal number of streams). Note that your application should provide enough parallel slack (for example, run many inference requests) to leverage the throughput mode. <br> Non-negative integer value creates the requested number of streams. If a number of streams is 0, no internal streams are created and user threads are interpreted as stream master threads.|
|
||||
| KEY_CPU_BIND_THREAD | YES/NUMA/NO | YES | Binds inference threads to CPU cores. 'YES' (default) binding option maps threads to cores - this works best for static/synthetic scenarios like benchmarks. The 'NUMA' binding is more relaxed, binding inference threads only to NUMA nodes, leaving further scheduling to specific cores to the OS. This option might perform better in the real-life/contended scenarios. Note that for the latency-oriented cases (number of the streams is less or equal to the number of NUMA nodes, see below) both YES and NUMA options limit number of inference threads to the number of hardware cores (ignoring hyper-threading) on the multi-socket machines. |
|
||||
| KEY_CPU_THROUGHPUT_STREAMS | KEY_CPU_THROUGHPUT_NUMA, KEY_CPU_THROUGHPUT_AUTO, or positive integer values| 1 | Specifies number of CPU "execution" streams for the throughput mode. Upper bound for the number of inference requests that can be executed simultaneously. All available CPU cores are evenly distributed between the streams. The default value is 1, which implies latency-oriented behavior for single NUMA-node machine, with all available cores processing requests one by one. On the multi-socket (multiple NUMA nodes) machine, the best latency numbers usually achieved with a number of streams matching the number of NUMA-nodes. <br>KEY_CPU_THROUGHPUT_NUMA creates as many streams as needed to accommodate NUMA and avoid associated penalties.<br>KEY_CPU_THROUGHPUT_AUTO creates bare minimum of streams to improve the performance; this is the most portable option if you don't know how many cores your target machine has (and what would be the optimal number of streams). Note that your application should provide enough parallel slack (for example, run many inference requests) to leverage the throughput mode. <br> Non-negative integer value creates the requested number of streams. If a number of streams is 0, no internal streams are created and user threads are interpreted as stream master threads.|
|
||||
| KEY_ENFORCE_BF16 | YES/NO| YES | The name for setting to execute in bfloat16 precision whenever it is possible. This option lets plugin know to downscale the precision where it sees performance benefits from bfloat16 execution. Such option does not guarantee accuracy of the network, you need to verify the accuracy in this mode separately, based on performance and accuracy results. It should be your decision whether to use this option or not. |
|
||||
|
||||
> **NOTE**: To disable all internal threading, use the following set of configuration parameters: `KEY_CPU_THROUGHPUT_STREAMS=0`, `KEY_CPU_THREADS_NUM=1`, `KEY_CPU_BIND_THREAD=NO`.
|
||||
|
||||
@@ -69,7 +69,7 @@ Limitations include:
|
||||
- Only 1D convolutions are natively supported.
|
||||
- The number of output channels for convolutions must be a multiple of 4.
|
||||
- Permute layer support is limited to the cases where no data reordering is needed or when reordering is happening for two dimensions, at least one of which is not greater than 8.
|
||||
- Concatinations and splittings are supported only along the channel dimension (axis=1).
|
||||
- Splits and concatenations are supported for continuous portions of memory (e.g., split of 1,2,3,4 to 1,1,3,4 and 1,1,3,4 or concats of 1,2,3,4 and 1,2,3,5 to 2,2,3,4).
|
||||
|
||||
#### Experimental Support for 2D Convolutions
|
||||
|
||||
@@ -77,7 +77,7 @@ The Intel® GNA hardware natively supports only 1D convolution.
|
||||
|
||||
However, 2D convolutions can be mapped to 1D when a convolution kernel moves in a single direction. GNA Plugin performs such a transformation for Kaldi `nnet1` convolution. From this perspective, the Intel® GNA hardware convolution operation accepts an `NHWC` input and produces an `NHWC` output. Because OpenVINO™ only supports the `NCHW` layout, you may need to insert `Permute` layers before or after convolutions.
|
||||
|
||||
For example, the Kaldi model optimizer inserts such a permute after convolution for the [rm_cnn4a network](https://download.01.org/openvinotoolkit/models_contrib/speech/kaldi/rm_cnn4a_smbr/). This `Permute` layer is automatically removed by the GNA Plugin, because the Intel® GNA hardware convolution layer already produces the required `NHWC` result.
|
||||
For example, the Kaldi model optimizer inserts such a permute after convolution for the [rm_cnn4a network](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/rm_cnn4a_smbr/). This `Permute` layer is automatically removed by the GNA Plugin, because the Intel® GNA hardware convolution layer already produces the required `NHWC` result.
|
||||
|
||||
## Operation Precision
|
||||
|
||||
|
||||
@@ -92,11 +92,20 @@ Notice that until R2 you had to calculate number of requests in your application
|
||||
Notice that every OpenVINO sample that supports "-d" (which stays for "device") command-line option transparently accepts the multi-device.
|
||||
The [Benchmark Application](../../../inference-engine/samples/benchmark_app/README.md) is the best reference to the optimal usage of the multi-device. As discussed multiple times earlier, you don't need to setup number of requests, CPU streams or threads as the application provides optimal out of the box performance.
|
||||
Below is example command-line to evaluate HDDL+GPU performance with that:
|
||||
```bash
|
||||
$ ./benchmark_app –d MULTI:HDDL,GPU –m <model> -i <input> -niter 1000
|
||||
|
||||
```sh
|
||||
./benchmark_app –d MULTI:HDDL,GPU –m <model> -i <input> -niter 1000
|
||||
```
|
||||
Notice that you can use the FP16 IR to work with multi-device (as CPU automatically upconverts it to the fp32) and rest of devices support it naturally.
|
||||
Also notice that no demos are (yet) fully optimized for the multi-device, by means of supporting the OPTIMAL_NUMBER_OF_INFER_REQUESTS metric, using the GPU streams/throttling, and so on.
|
||||
|
||||
## Video: MULTI Plugin
|
||||
[](https://www.youtube.com/watch?v=xbORYFEmrqU)
|
||||
\htmlonly
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/xbORYFEmrqU" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
\endhtmlonly
|
||||
|
||||
## See Also
|
||||
* [Supported Devices](Supported_Devices.md)
|
||||
|
||||
|
||||
|
||||
@@ -16,6 +16,8 @@ The Inference Engine provides unique capabilities to infer deep learning models
|
||||
|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel® devices in parallel |
|
||||
|[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel® devices (for example if a device doesn't [support certain layers](#supported-layers)). |
|
||||
|
||||
Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/).
|
||||
|
||||
## Supported Configurations
|
||||
|
||||
The Inference Engine can inference models in different formats with various input and output formats.
|
||||
@@ -88,7 +90,7 @@ the supported output precision depends on the actual underlying devices. _Gener
|
||||
|:-------------|:------------:|:------------:|:------------:|:------------:|
|
||||
|CPU plugin |Supported |Supported |Supported |Supported |
|
||||
|GPU plugin |Supported |Supported |Supported |Supported |
|
||||
|VPU plugins |Not supported |Supported |Supported |Supported |
|
||||
|VPU plugins |Supported |Supported |Supported |Supported |
|
||||
|GNA plugin |Not supported |Supported |Supported |Supported |
|
||||
|
||||
### Supported Output Layout
|
||||
@@ -111,9 +113,9 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| Acosh | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Activation-Clamp | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Activation-ELU | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Activation-Exp | Supported |Supported\*\*\*| Not Supported | Supported | Supported |
|
||||
| Activation-Exp | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Activation-Leaky ReLU | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Activation-Not | Supported |Supported\*\*\*| Not Supported | Not Supported | Supported |
|
||||
| Activation-Not | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Activation-PReLU | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Activation-ReLU | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Activation-ReLU6 | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
@@ -127,7 +129,7 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| BatchNormalization | Supported | Supported | Supported | Not Supported | Supported |
|
||||
| BinaryConvolution | Supported | Supported | Not Supported | Not Supported | Supported |
|
||||
| Broadcast | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Ceil | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Ceil | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Concat | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Const | Supported | Supported | Supported | Supported | Not Supported |
|
||||
| Convolution-Dilated | Supported | Supported | Supported | Not Supported | Supported |
|
||||
@@ -145,8 +147,8 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| DeformableConvolution | Supported | Supported | Not Supported | Not Supported | Supported |
|
||||
| DepthToSpace | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| DetectionOutput | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
|
||||
| Eltwise-And | Supported |Supported\*\*\*| Not Supported | Not Supported | Supported |
|
||||
| Eltwise-Add | Supported |Supported\*\*\*| Not Supported | Not Supported | Supported |
|
||||
| Eltwise-And | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Eltwise-Add | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Eltwise-Div | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Eltwise-Equal | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Eltwise-FloorMod | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
@@ -166,12 +168,12 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| Eltwise-SquaredDiff | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Eltwise-Sub | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Eltwise-Sum | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Erf | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Exp | Supported | Supported | Not Supported | Supported | Supported |
|
||||
| Erf | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Exp | Supported | Supported | Supported | Supported | Supported |
|
||||
| FakeQuantize | Not Supported | Supported | Not Supported | Not Supported | Supported |
|
||||
| Fill | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Flatten | Supported | Supported | Supported | Not Supported | Supported |
|
||||
| Floor | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Floor | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| FullyConnected (Inner Product) | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Gather | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| GatherTree | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
@@ -191,9 +193,9 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| Memory | Not Supported | Supported | Not Supported | Supported | Supported |
|
||||
| MVN | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
|
||||
| Neg | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| NonMaxSuppression | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| NonMaxSuppression | Not Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Normalize | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
|
||||
| OneHot | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| OneHot | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Pad | Supported | Supported\*\* | Supported\* | Not Supported | Supported |
|
||||
| Permute | Supported | Supported | Supported | Supported\* | Supported |
|
||||
| Pooling(AVG,MAX) | Supported | Supported | Supported | Supported | Supported |
|
||||
@@ -206,17 +208,17 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| PSROIPooling | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Range | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Reciprocal | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceAnd | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceAnd | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReduceL1 | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceL2 | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceLogSum | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceLogSumExp | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceMax | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceMean | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceMin | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceMax | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReduceMean | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReduceMin | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReduceOr | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceProd | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceSum | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ReduceSum | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReduceSumSquare | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| RegionYolo | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| ReorgYolo | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
@@ -226,7 +228,7 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| RNN | Not Supported | Supported | Supported | Not Supported | Not Supported |
|
||||
| ROIPooling | Supported\* | Supported | Supported | Not Supported | Supported |
|
||||
| ScaleShift | Supported |Supported\*\*\*| Supported\* | Supported | Supported |
|
||||
| ScatterUpdate | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ScatterUpdate | Not Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Select | Supported | Supported | Supported | Not Supported | Supported |
|
||||
| Selu | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| ShuffleChannels | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
@@ -236,17 +238,17 @@ The following layers are supported by the plugins and by [Shape Inference featur
|
||||
| SimplerNMS | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Slice | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| SoftMax | Supported |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| Softplus | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Softplus | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Softsign | Supported | Supported\*\* | Not Supported | Supported | Supported |
|
||||
| SpaceToDepth | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| SpatialTransformer | Not Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| Split | Supported |Supported\*\*\*| Supported | Supported | Supported |
|
||||
| Squeeze | Supported | Supported\*\* | Supported | Supported | Supported |
|
||||
| StridedSlice | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| StridedSlice | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Tan | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| TensorIterator | Not Supported | Supported | Supported | Supported | Not Supported |
|
||||
| Tile | Supported\*\* |Supported\*\*\*| Supported | Not Supported | Supported |
|
||||
| TopK | Supported | Supported\*\* | Not Supported | Not Supported | Supported |
|
||||
| TopK | Supported | Supported\*\* | Supported | Not Supported | Supported |
|
||||
| Unpooling | Supported | Not Supported | Not Supported | Not Supported | Not Supported |
|
||||
| Unsqueeze | Supported | Supported\*\* | Supported | Supported | Supported |
|
||||
| Upsampling | Supported | Not Supported | Not Supported | Not Supported | Not Supported |
|
||||
|
||||
@@ -93,7 +93,11 @@ Model Optimizer produces an Intermediate Representation (IR) of the network, whi
|
||||
* [Converting Your ONNX* Model](prepare_model/convert_model/Convert_Model_From_ONNX.md)
|
||||
* [Converting Faster-RCNN ONNX* Model](prepare_model/convert_model/onnx_specific/Convert_Faster_RCNN.md)
|
||||
* [Converting Mask-RCNN ONNX* Model](prepare_model/convert_model/onnx_specific/Convert_Mask_RCNN.md)
|
||||
* [Converting DLRM ONNX* Model](prepare_model/convert_model/onnx_specific/Convert_DLRM.md)
|
||||
* [Converting GPT2 ONNX* Model](prepare_model/convert_model/onnx_specific/Convert_GPT2.md)
|
||||
* [Converting Your PyTorch* Model](prepare_model/convert_model/Convert_Model_From_PyTorch.md)
|
||||
* [Converting F3Net PyTorch* Model](prepare_model/convert_model/pytorch_specific/Convert_F3Net.md)
|
||||
* [Converting QuartzNet PyTorch* Model](prepare_model/convert_model/pytorch_specific/Convert_QuartzNet.md)
|
||||
* [Converting YOLACT PyTorch* Model](prepare_model/convert_model/pytorch_specific/Convert_YOLACT.md)
|
||||
* [Model Optimizations Techniques](prepare_model/Model_Optimization_Techniques.md)
|
||||
* [Cutting parts of the model](prepare_model/convert_model/Cutting_Model.md)
|
||||
* [Sub-graph Replacement in Model Optimizer](prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md)
|
||||
@@ -111,3 +115,22 @@ Model Optimizer produces an Intermediate Representation (IR) of the network, whi
|
||||
* [Known Issues](Known_Issues_Limitations.md)
|
||||
|
||||
**Typical Next Step:** [Preparing and Optimizing your Trained Model with Model Optimizer](prepare_model/Prepare_Trained_Model.md)
|
||||
|
||||
## Video: Model Optimizer Concept
|
||||
|
||||
[](https://www.youtube.com/watch?v=Kl1ptVb7aI8)
|
||||
\htmlonly
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/Kl1ptVb7aI8" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
\endhtmlonly
|
||||
|
||||
## Video: Model Optimizer Basic Operation
|
||||
[](https://www.youtube.com/watch?v=BBt1rseDcy0)
|
||||
\htmlonly
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/BBt1rseDcy0" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
\endhtmlonly
|
||||
|
||||
## Video: Choosing the Right Precision
|
||||
[](https://www.youtube.com/watch?v=RF8ypHyiKrY)
|
||||
\htmlonly
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/RF8ypHyiKrY" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
\endhtmlonly
|
||||
|
||||
@@ -255,6 +255,89 @@ Standard TensorFlow\* operations:
|
||||
| ZerosLike | No |
|
||||
|
||||
|
||||
## TensorFlow 2 Keras\* Supported Operations
|
||||
|
||||
Standard TensorFlow 2 Keras\* operations:
|
||||
|
||||
| Operation Name in TensorFlow 2 Keras\* | Limitations|
|
||||
| :----------| :----------|
|
||||
| ActivityRegularization | No |
|
||||
| Add | No |
|
||||
| AdditiveAttention | No |
|
||||
| AlphaDropout | No |
|
||||
| Attention | No |
|
||||
| Average | No |
|
||||
| AveragePooling1D | No |
|
||||
| AveragePooling2D | No |
|
||||
| AveragePooling3D | No |
|
||||
| BatchNormalization | No |
|
||||
| Bidirectional | No |
|
||||
| Concatenate | No |
|
||||
| Conv1D | No |
|
||||
| Conv1DTranspose | Not supported if dilation is not equal to 1 |
|
||||
| Conv2D | No |
|
||||
| Conv2DTranspose | No |
|
||||
| Conv3D | No |
|
||||
| Conv3DTranspose | No |
|
||||
| Cropping1D | No |
|
||||
| Cropping2D | No |
|
||||
| Cropping3D | No |
|
||||
| Dense | No |
|
||||
| DenseFeatures | Not supported for categorical and crossed features |
|
||||
| DepthwiseConv2D | No |
|
||||
| Dot | No |
|
||||
| Dropout | No |
|
||||
| ELU | No |
|
||||
| Embedding | No |
|
||||
| Flatten | No |
|
||||
| GRU | No |
|
||||
| GRUCell | No |
|
||||
| GaussianDropout | No |
|
||||
| GaussianNoise | No |
|
||||
| GlobalAveragePooling1D | No |
|
||||
| GlobalAveragePooling2D | No |
|
||||
| GlobalAveragePooling3D | No |
|
||||
| GlobalMaxPool1D | No |
|
||||
| GlobalMaxPool2D | No |
|
||||
| GlobalMaxPool3D | No |
|
||||
| LSTM | No |
|
||||
| LSTMCell | No |
|
||||
| Lambda | No |
|
||||
| LayerNormalization | No |
|
||||
| LeakyReLU | No |
|
||||
| LocallyConnected1D | No |
|
||||
| LocallyConnected2D | No |
|
||||
| MaxPool1D | No |
|
||||
| MaxPool2D | No |
|
||||
| MaxPool3D | No |
|
||||
| Maximum | No |
|
||||
| Minimum | No |
|
||||
| Multiply | No |
|
||||
| PReLU | No |
|
||||
| Permute | No |
|
||||
| RNN | Not supported for some custom cells |
|
||||
| ReLU | No |
|
||||
| RepeatVector | No |
|
||||
| Reshape | No |
|
||||
| SeparableConv1D | No |
|
||||
| SeparableConv2D | No |
|
||||
| SimpleRNN | No |
|
||||
| SimpleRNNCell | No |
|
||||
| Softmax | No |
|
||||
| SpatialDropout1D | No |
|
||||
| SpatialDropout2D | No |
|
||||
| SpatialDropout3D | No |
|
||||
| StackedRNNCells | No |
|
||||
| Subtract | No |
|
||||
| ThresholdedReLU | No |
|
||||
| TimeDistributed | No |
|
||||
| UpSampling1D | No |
|
||||
| UpSampling2D | No |
|
||||
| UpSampling3D | No |
|
||||
| ZeroPadding1D | No |
|
||||
| ZeroPadding2D | No |
|
||||
| ZeroPadding3D | No |
|
||||
|
||||
## Kaldi\* Supported Layers
|
||||
|
||||
Standard Kaldi\* Layers:
|
||||
|
||||
@@ -23,20 +23,10 @@
|
||||
| VGG19 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/vgg19.tar.gz) |
|
||||
| zfnet512 | [model archive](https://s3.amazonaws.com/download.onnx/models/opset_8/zfnet512.tar.gz) |
|
||||
| GPT-2 | [model archive](https://github.com/onnx/models/blob/master/text/machine_comprehension/gpt-2/model/gpt2-10.tar.gz) |
|
||||
| YOLOv3 | [model archive](https://github.com/onnx/models/blob/master/vision/object_detection_segmentation/yolov3/model/yolov3-10.tar.gz) |
|
||||
|
||||
Listed models are built with the operation set version 8 except the GPT-2 model. Models that are upgraded to higher operation set versions may not be supported.
|
||||
|
||||
## Supported Pytorch* Models via ONNX Conversion
|
||||
Starting from the 2019R4 release, the OpenVINO™ toolkit officially supports public Pytorch* models (from `torchvision` 0.2.1 and `pretrainedmodels` 0.7.4 packages) via ONNX conversion.
|
||||
The list of supported topologies is presented below:
|
||||
|
||||
|Package Name|Supported Models|
|
||||
|:----|:----|
|
||||
| [Torchvision Models](https://pytorch.org/docs/stable/torchvision/index.html) | alexnet, densenet121, densenet161, densenet169, densenet201, resnet101, resnet152, resnet18, resnet34, resnet50, vgg11, vgg13, vgg16, vgg19 |
|
||||
| [Pretrained Models](https://github.com/Cadene/pretrained-models.pytorch) | alexnet, fbresnet152, resnet101, resnet152, resnet18, resnet34, resnet152, resnet18, resnet34, resnet50, resnext101_32x4d, resnext101_64x4d, vgg11 |
|
||||
| [ESPNet Models](https://github.com/sacmehta/ESPNet/tree/master/pretrained) | |
|
||||
| [MobileNetV3](https://github.com/d-li14/mobilenetv3.pytorch) | |
|
||||
|
||||
## Supported PaddlePaddle* Models via ONNX Conversion
|
||||
Starting from the R5 release, the OpenVINO™ toolkit officially supports public PaddlePaddle* models via ONNX conversion.
|
||||
The list of supported topologies downloadable from PaddleHub is presented below:
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
# Converting a PyTorch* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch}
|
||||
|
||||
PyTorch* framework is supported through export to ONNX\* format. A summary of the steps for optimizing and deploying a model that was trained with the PyTorch\* framework:
|
||||
|
||||
1. [Export PyTorch model to ONNX\*](#export-to-onnx).
|
||||
2. [Configure the Model Optimizer](../Config_Model_Optimizer.md) for ONNX\*.
|
||||
3. [Convert an ONNX\* model](Convert_Model_From_ONNX.md) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values.
|
||||
4. Test the model in the Intermediate Representation format using the [Inference Engine](../../../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) in the target environment via provided [sample applications](../../../IE_DG/Samples_Overview.md).
|
||||
5. [Integrate](../../../IE_DG/Samples_Overview.md) the Inference Engine in your application to deploy the model in the target environment.
|
||||
|
||||
## Supported Topologies
|
||||
|
||||
Here is the list of models that were tested and are guaranteed to be supported.
|
||||
It is not a full list of models that can be converted to ONNX\* and to IR.
|
||||
|
||||
|Package Name|Supported Models|
|
||||
|:----|:----|
|
||||
| [Torchvision Models](https://pytorch.org/docs/stable/torchvision/index.html) | alexnet, densenet121, densenet161, densenet169, densenet201, resnet101, resnet152, resnet18, resnet34, resnet50, vgg11, vgg13, vgg16, vgg19 |
|
||||
| [Pretrained Models](https://github.com/Cadene/pretrained-models.pytorch) | alexnet, fbresnet152, resnet101, resnet152, resnet18, resnet34, resnet152, resnet18, resnet34, resnet50, resnext101_32x4d, resnext101_64x4d, vgg11 |
|
||||
|
||||
**Other supported topologies**
|
||||
|
||||
* [ESPNet Models](https://github.com/sacmehta/ESPNet/tree/master/pretrained)
|
||||
* [MobileNetV3](https://github.com/d-li14/mobilenetv3.pytorch)
|
||||
* F3Net topology can be converted using [Convert PyTorch\* F3Net to the IR](pytorch_specific/Convert_F3Net.md) instruction.
|
||||
* QuartzNet topologies from [NeMo project](https://github.com/NVIDIA/NeMo) can be converted using [Convert PyTorch\* QuartzNet to the IR](pytorch_specific/Convert_QuartzNet.md) instruction.
|
||||
* YOLACT topology can be converted using [Convert PyTorch\* YOLACT to the IR](pytorch_specific/Convert_YOLACT.md) instruction.
|
||||
|
||||
## Export PyTorch\* Model to ONNX\* Format <a name="export-to-onnx"></a>
|
||||
|
||||
PyTorch models are defined in a Python\* code, to export such models use `torch.onnx.export()` method.
|
||||
Only the basics will be covered here, the step to export to ONNX\* is crucial but it is covered by PyTorch\* framework.
|
||||
For more information, please refer to [PyTorch\* documentation](https://pytorch.org/docs/stable/onnx.html).
|
||||
|
||||
To export a PyTorch\* model you need to obtain the model as an instance of `torch.nn.Module` class and call the `export` function.
|
||||
```python
|
||||
import torch
|
||||
|
||||
# Instantiate your model. This is just a regular PyTorch model that will be exported in the following steps.
|
||||
model = SomeModel()
|
||||
# Evaluate the model to switch some operations from training mode to inference.
|
||||
model.eval()
|
||||
# Create dummy input for the model. It will be used to run the model inside export function.
|
||||
dummy_input = torch.randn(1, 3, 224, 224)
|
||||
# Call the export function
|
||||
torch.onnx.export(model, (dummy_input, ), 'model.onnx')
|
||||
```
|
||||
|
||||
## Known Issues
|
||||
|
||||
* Not all PyTorch\* operations can be exported to ONNX\* opset 9 which is used by default, as of version 1.8.1.
|
||||
It is recommended to export models to opset 11 or higher when export to default opset 9 is not working. In that case, use `opset_version`
|
||||
option of the `torch.onnx.export`. For more information about ONNX* opset, refer to the [Operator Schemas](https://github.com/onnx/onnx/blob/master/docs/Operators.md).
|
||||
@@ -37,7 +37,7 @@ Detailed information on how to convert models from the <a href="https://github.c
|
||||
|
||||
**Supported Frozen Topologies from TensorFlow Object Detection Models Zoo**
|
||||
|
||||
Detailed information on how to convert models from the <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md">Object Detection Models Zoo</a> is available in the [Converting TensorFlow Object Detection API Models](tf_specific/Convert_Object_Detection_API_Models.md) chapter. The table below contains models from the Object Detection Models zoo that are supported.
|
||||
Detailed information on how to convert models from the <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md">Object Detection Models Zoo</a> is available in the [Converting TensorFlow Object Detection API Models](tf_specific/Convert_Object_Detection_API_Models.md) chapter. The table below contains models from the Object Detection Models zoo that are supported.
|
||||
|
||||
| Model Name| TensorFlow Object Detection API Models (Frozen)|
|
||||
| :------------- | -----:|
|
||||
@@ -115,6 +115,7 @@ Where `HEIGHT` and `WIDTH` are the input images height and width for which the m
|
||||
| Keras-TCN | [Repo](https://github.com/philipperemy/keras-tcn) |
|
||||
| PRNet | [Repo](https://github.com/YadiraF/PRNet) |
|
||||
| YOLOv4 | [Repo](https://github.com/Ma-Dan/keras-yolo4) |
|
||||
| STN | [Repo](https://github.com/oarriaga/STN.keras) |
|
||||
|
||||
* YOLO topologies from DarkNet* can be converted using [instruction](tf_specific/Convert_YOLO_From_Tensorflow.md),
|
||||
* FaceNet topologies can be converted using [instruction](tf_specific/Convert_FaceNet_From_Tensorflow.md).
|
||||
@@ -342,11 +343,9 @@ model = tf.keras.models.load_model('model.h5', custom_objects={'CustomLayer': Cu
|
||||
tf.saved_model.save(model,'model')
|
||||
```
|
||||
|
||||
Then follow the above instructions for the SavedModel format.
|
||||
Then follow the above instructions for the SavedModel format.
|
||||
|
||||
> **NOTE:** Do not use other hacks to resave TensorFlow* 2 models into TensorFlow* 1 formats.
|
||||
|
||||
> **NOTE**: Currently, OpenVINO™ support for TensorFlow* 2 models is in preview (aka Beta), which means limited and not of production quality yet. OpenVINO™ does not support models with Keras RNN and Embedding layers.
|
||||
> **NOTE:** Do not use other hacks to resave TensorFlow* 2 models into TensorFlow* 1 formats.
|
||||
|
||||
|
||||
## Custom Layer Definition
|
||||
@@ -360,7 +359,7 @@ See [Custom Layers in the Model Optimizer](../customize_model_optimizer/Customiz
|
||||
* Custom layer implementation details
|
||||
|
||||
|
||||
## Supported TensorFlow\* Layers
|
||||
## Supported TensorFlow\* and TensorFlow 2 Keras\* Layers
|
||||
Refer to [Supported Framework Layers ](../Supported_Frameworks_Layers.md) for the list of supported standard layers.
|
||||
|
||||
|
||||
@@ -368,6 +367,11 @@ Refer to [Supported Framework Layers ](../Supported_Frameworks_Layers.md) for th
|
||||
|
||||
The Model Optimizer provides explanatory messages if it is unable to run to completion due to issues like typographical errors, incorrectly used options, or other issues. The message describes the potential cause of the problem and gives a link to the [Model Optimizer FAQ](../Model_Optimizer_FAQ.md). The FAQ has instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong.
|
||||
|
||||
## Video: Converting a TensorFlow Model
|
||||
[](https://www.youtube.com/watch?v=QW6532LtiTc)
|
||||
\htmlonly
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/QW6532LtiTc" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
\endhtmlonly
|
||||
|
||||
## Summary
|
||||
In this document, you learned:
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
# Convert PyTorch* F3Net to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net}
|
||||
|
||||
[F3Net](https://github.com/weijun88/F3Net): Fusion, Feedback and Focus for Salient Object Detection
|
||||
|
||||
## Download and Convert the Model to ONNX*
|
||||
|
||||
To download the pretrained model or train the model yourself, refer to the
|
||||
[instruction](https://github.com/weijun88/F3Net/blob/master/README.md) in the F3Net model repository. Firstly,
|
||||
convert the model to ONNX\* format. Create and run the script with the following content in the `src`
|
||||
directory of the model repository:
|
||||
```python
|
||||
import torch
|
||||
|
||||
from dataset import Config
|
||||
from net import F3Net
|
||||
|
||||
cfg = Config(mode='test', snapshot=<path_to_checkpoint_dir>)
|
||||
net = F3Net(cfg)
|
||||
image = torch.zeros([1, 3, 352, 352])
|
||||
torch.onnx.export(net, image, 'f3net.onnx', export_params=True, do_constant_folding=True, opset_version=11)
|
||||
```
|
||||
The script generates the ONNX\* model file f3net.onnx. The model conversion was tested with the repository hash commit `eecace3adf1e8946b571a4f4397681252f9dc1b8`.
|
||||
|
||||
## Convert ONNX* F3Net Model to IR
|
||||
|
||||
```sh
|
||||
./mo.py --input_model <MODEL_DIR>/f3net.onnx
|
||||
```
|
||||
@@ -0,0 +1,32 @@
|
||||
# Convert PyTorch* QuartzNet to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet}
|
||||
|
||||
[NeMo project](https://github.com/NVIDIA/NeMo) provides the QuartzNet model.
|
||||
|
||||
## Download the Pre-Trained QuartzNet Model
|
||||
|
||||
To download the pre-trained model, refer to the [NeMo Speech Models Catalog](https://ngc.nvidia.com/catalog/models/nvidia:nemospeechmodels).
|
||||
Here are the instructions on how to obtain QuartzNet in ONNX* format.
|
||||
```python
|
||||
import nemo
|
||||
import nemo.collections.asr as nemo_asr
|
||||
|
||||
quartznet = nemo_asr.models.ASRConvCTCModel.from_pretrained(model_info='QuartzNet15x5-En')
|
||||
# Export QuartzNet model to ONNX* format
|
||||
quartznet.export('qn.onnx')
|
||||
```
|
||||
This code produces 3 ONNX* model files: `encoder_qt.onnx`, `decoder_qt.onnx`, `qn.onnx`.
|
||||
They are `decoder`, `encoder` and a combined `decoder(encoder(x))` models, respectively.
|
||||
|
||||
## Convert ONNX* QuartzNet model to IR
|
||||
|
||||
If using a combined model:
|
||||
```sh
|
||||
./mo.py --input_model <MODEL_DIR>/qt.onnx --input_shape [B,64,X]
|
||||
```
|
||||
If using separate models:
|
||||
```sh
|
||||
./mo.py --input_model <MODEL_DIR>/encoder_qt.onnx --input_shape [B,64,X]
|
||||
./mo.py --input_model <MODEL_DIR>/decoder_qt.onnx --input_shape [B,1024,Y]
|
||||
```
|
||||
|
||||
Where shape is determined by the audio file Mel-Spectrogram length: B - batch dimension, X - dimension based on the input length, Y - determined by encoder output, usually `X / 2`.
|
||||
@@ -0,0 +1,107 @@
|
||||
# Convert PyTorch\* RNN-T Model to the Intermediate Representation (IR) {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT}
|
||||
|
||||
This instruction covers conversion of RNN-T model from [MLCommons](https://github.com/mlcommons) repository. Follow
|
||||
the steps below to export a PyTorch* model into ONNX* before converting it to IR:
|
||||
|
||||
**Step 1**. Clone RNN-T PyTorch implementation from MLCommons repository (revision r1.0). Make a shallow clone to pull
|
||||
only RNN-T model without full repository. If you already have a full repository, skip this and go to **Step 2**:
|
||||
```bash
|
||||
git clone -b r1.0 -n https://github.com/mlcommons/inference rnnt_for_openvino --depth 1
|
||||
cd rnnt_for_openvino
|
||||
git checkout HEAD speech_recognition/rnnt
|
||||
```
|
||||
|
||||
**Step 2**. If you already have a full clone of MLCommons inference repository, create a folder for
|
||||
pretrained PyTorch model, where conversion into IR will take place. You will also need to specify the path to
|
||||
your full clone at **Step 5**. Skip this step if you have a shallow clone.
|
||||
|
||||
```bash
|
||||
mkdir rnnt_for_openvino
|
||||
cd rnnt_for_openvino
|
||||
```
|
||||
|
||||
**Step 3**. Download pretrained weights for PyTorch implementation from https://zenodo.org/record/3662521#.YG21DugzZaQ.
|
||||
For UNIX*-like systems you can use wget:
|
||||
```bash
|
||||
wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt
|
||||
```
|
||||
The link was taken from `setup.sh` in the `speech_recoginitin/rnnt` subfolder. You will get exactly the same weights as
|
||||
if you were following the steps from https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt.
|
||||
|
||||
**Step 4**. Install required python* packages:
|
||||
```bash
|
||||
pip3 install torch toml
|
||||
```
|
||||
|
||||
**Step 5**. Export RNN-T model into ONNX with the script below. Copy the code below into a file named
|
||||
`export_rnnt_to_onnx.py` and run it in the current directory `rnnt_for_openvino`:
|
||||
|
||||
> **NOTE**: If you already have a full clone of MLCommons inference repository, you need to
|
||||
> specify `mlcommons_inference_path` variable.
|
||||
|
||||
```python
|
||||
import toml
|
||||
import torch
|
||||
import sys
|
||||
|
||||
|
||||
def load_and_migrate_checkpoint(ckpt_path):
|
||||
checkpoint = torch.load(ckpt_path, map_location="cpu")
|
||||
migrated_state_dict = {}
|
||||
for key, value in checkpoint['state_dict'].items():
|
||||
key = key.replace("joint_net", "joint.net")
|
||||
migrated_state_dict[key] = value
|
||||
del migrated_state_dict["audio_preprocessor.featurizer.fb"]
|
||||
del migrated_state_dict["audio_preprocessor.featurizer.window"]
|
||||
return migrated_state_dict
|
||||
|
||||
|
||||
mlcommons_inference_path = './' # specify relative path for MLCommons inferene
|
||||
checkpoint_path = 'DistributedDataParallel_1576581068.9962234-epoch-100.pt'
|
||||
config_toml = 'speech_recognition/rnnt/pytorch/configs/rnnt.toml'
|
||||
config = toml.load(config_toml)
|
||||
rnnt_vocab = config['labels']['labels']
|
||||
sys.path.insert(0, mlcommons_inference_path + 'speech_recognition/rnnt/pytorch')
|
||||
|
||||
from model_separable_rnnt import RNNT
|
||||
|
||||
model = RNNT(config['rnnt'], len(rnnt_vocab) + 1, feature_config=config['input_eval'])
|
||||
model.load_state_dict(load_and_migrate_checkpoint(checkpoint_path))
|
||||
|
||||
seq_length, batch_size, feature_length = 157, 1, 240
|
||||
inp = torch.randn([seq_length, batch_size, feature_length])
|
||||
feature_length = torch.LongTensor([seq_length])
|
||||
x_padded, x_lens = model.encoder(inp, feature_length)
|
||||
torch.onnx.export(model.encoder, (inp, feature_length), "rnnt_encoder.onnx", opset_version=12,
|
||||
input_names=['input.1', '1'], dynamic_axes={'input.1': {0: 'seq_len', 1: 'batch'}})
|
||||
|
||||
symbol = torch.LongTensor([[20]])
|
||||
hidden = torch.randn([2, batch_size, 320]), torch.randn([2, batch_size, 320])
|
||||
g, hidden = model.prediction.forward(symbol, hidden)
|
||||
torch.onnx.export(model.prediction, (symbol, hidden), "rnnt_prediction.onnx", opset_version=12,
|
||||
input_names=['input.1', '1', '2'],
|
||||
dynamic_axes={'input.1': {0: 'batch'}, '1': {1: 'batch'}, '2': {1: 'batch'}})
|
||||
|
||||
f = torch.randn([batch_size, 1, 1024])
|
||||
model.joint.forward(f, g)
|
||||
torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12,
|
||||
input_names=['0', '1'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}})
|
||||
```
|
||||
|
||||
```bash
|
||||
python3 export_rnnt_to_onnx.py
|
||||
```
|
||||
|
||||
After completing this step, the files rnnt_encoder.onnx, rnnt_prediction.onnx, and rnnt_joint.onnx will be saved in
|
||||
the current directory.
|
||||
|
||||
**Step 6**. Run the conversion command:
|
||||
|
||||
```bash
|
||||
python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input.1[157 1 240],1->157"
|
||||
python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "input.1[1 1],1[2 1 320],2[2 1 320]"
|
||||
python3 {path_to_openvino}/mo.py --input_model rnnt_joint.onnx --input "0[1 1 1024],1[1 1 320]"
|
||||
```
|
||||
Please note that hardcoded value for sequence length = 157 was taken from the MLCommons, but conversion to IR preserves
|
||||
network [reshapeability](../../../../IE_DG/ShapeInference.md); this means you can change input shapes manually to any value either during conversion or
|
||||
inference.
|
||||
@@ -0,0 +1,188 @@
|
||||
# Convert PyTorch* YOLACT to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT}
|
||||
|
||||
You Only Look At CoefficienTs (YOLACT) is a simple, fully convolutional model for real-time instance segmentation.
|
||||
The PyTorch\* implementation is publicly available in [this GitHub* repository](https://github.com/dbolya/yolact).
|
||||
The YOLACT++ model is not supported, because it uses deformable convolutional layers that cannot be represented in ONNX* format.
|
||||
|
||||
## Create a Patch File <a name="patch-file"></a>
|
||||
|
||||
Before converting the model, create a patch file for the repository.
|
||||
The patch modifies the framework code by adding a special command-line argument to the framework options that enables inference graph dumping:
|
||||
|
||||
1. Go to a writable directory and create a `YOLACT_onnx_export.patch` file.
|
||||
2. Copy the following diff code to the file:
|
||||
```git
|
||||
From 76deb67d4f09f29feda1a633358caa18335d9e9f Mon Sep 17 00:00:00 2001
|
||||
From: "OpenVINO" <openvino@intel.com>
|
||||
Date: Fri, 12 Mar 2021 00:27:35 +0300
|
||||
Subject: [PATCH] Add export to ONNX
|
||||
|
||||
---
|
||||
eval.py | 5 ++++-
|
||||
utils/augmentations.py | 7 +++++--
|
||||
yolact.py | 29 +++++++++++++++++++----------
|
||||
3 files changed, 28 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/eval.py b/eval.py
|
||||
index 547bc0a..bde0680 100644
|
||||
--- a/eval.py
|
||||
+++ b/eval.py
|
||||
@@ -593,9 +593,12 @@ def badhash(x):
|
||||
return x
|
||||
|
||||
def evalimage(net:Yolact, path:str, save_path:str=None):
|
||||
- frame = torch.from_numpy(cv2.imread(path)).cuda().float()
|
||||
+ frame = torch.from_numpy(cv2.imread(path)).float()
|
||||
+ if torch.cuda.is_available():
|
||||
+ frame = frame.cuda()
|
||||
batch = FastBaseTransform()(frame.unsqueeze(0))
|
||||
preds = net(batch)
|
||||
+ torch.onnx.export(net, batch, "yolact.onnx", opset_version=11)
|
||||
|
||||
img_numpy = prep_display(preds, frame, None, None, undo_transform=False)
|
||||
|
||||
diff --git a/utils/augmentations.py b/utils/augmentations.py
|
||||
index cc7a73a..2420603 100644
|
||||
--- a/utils/augmentations.py
|
||||
+++ b/utils/augmentations.py
|
||||
@@ -623,8 +623,11 @@ class FastBaseTransform(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
- self.mean = torch.Tensor(MEANS).float().cuda()[None, :, None, None]
|
||||
- self.std = torch.Tensor( STD ).float().cuda()[None, :, None, None]
|
||||
+ self.mean = torch.Tensor(MEANS).float()[None, :, None, None]
|
||||
+ self.std = torch.Tensor( STD ).float()[None, :, None, None]
|
||||
+ if torch.cuda.is_available():
|
||||
+ self.mean.cuda()
|
||||
+ self.std.cuda()
|
||||
self.transform = cfg.backbone.transform
|
||||
|
||||
def forward(self, img):
|
||||
diff --git a/yolact.py b/yolact.py
|
||||
index d83703b..f8c787c 100644
|
||||
--- a/yolact.py
|
||||
+++ b/yolact.py
|
||||
@@ -17,19 +17,22 @@ import torch.backends.cudnn as cudnn
|
||||
from utils import timer
|
||||
from utils.functions import MovingAverage, make_net
|
||||
|
||||
-# This is required for Pytorch 1.0.1 on Windows to initialize Cuda on some driver versions.
|
||||
-# See the bug report here: https://github.com/pytorch/pytorch/issues/17108
|
||||
-torch.cuda.current_device()
|
||||
-
|
||||
-# As of March 10, 2019, Pytorch DataParallel still doesn't support JIT Script Modules
|
||||
-use_jit = torch.cuda.device_count() <= 1
|
||||
-if not use_jit:
|
||||
- print('Multiple GPUs detected! Turning off JIT.')
|
||||
+use_jit = False
|
||||
|
||||
ScriptModuleWrapper = torch.jit.ScriptModule if use_jit else nn.Module
|
||||
script_method_wrapper = torch.jit.script_method if use_jit else lambda fn, _rcn=None: fn
|
||||
|
||||
|
||||
+def decode(loc, priors):
|
||||
+ variances = [0.1, 0.2]
|
||||
+ boxes = torch.cat((priors[:, :2] + loc[:, :, :2] * variances[0] * priors[:, 2:], priors[:, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2)
|
||||
+
|
||||
+ boxes_result1 = boxes[:, :, :2] - boxes[:, :, 2:] / 2
|
||||
+ boxes_result2 = boxes[:, :, 2:] + boxes[:, :, :2]
|
||||
+ boxes_result = torch.cat((boxes_result1, boxes_result2), 2)
|
||||
+
|
||||
+ return boxes_result
|
||||
+
|
||||
|
||||
class Concat(nn.Module):
|
||||
def __init__(self, nets, extra_params):
|
||||
@@ -476,7 +479,10 @@ class Yolact(nn.Module):
|
||||
|
||||
def load_weights(self, path):
|
||||
""" Loads weights from a compressed save file. """
|
||||
- state_dict = torch.load(path)
|
||||
+ if torch.cuda.is_available():
|
||||
+ state_dict = torch.load(path)
|
||||
+ else:
|
||||
+ state_dict = torch.load(path, map_location=torch.device('cpu'))
|
||||
|
||||
# For backward compatability, remove these (the new variable is called layers)
|
||||
for key in list(state_dict.keys()):
|
||||
@@ -673,8 +679,11 @@ class Yolact(nn.Module):
|
||||
else:
|
||||
pred_outs['conf'] = F.softmax(pred_outs['conf'], -1)
|
||||
|
||||
- return self.detect(pred_outs, self)
|
||||
+ pred_outs['boxes'] = decode(pred_outs['loc'], pred_outs['priors']) # decode output boxes
|
||||
|
||||
+ pred_outs.pop('priors') # remove unused in postprocessing layers
|
||||
+ pred_outs.pop('loc') # remove unused in postprocessing layers
|
||||
+ return pred_outs
|
||||
|
||||
|
||||
|
||||
--
|
||||
```
|
||||
3. Save and close the file.
|
||||
|
||||
## Convert YOLACT Model to the Intermediate Representation (IR) format
|
||||
|
||||
**Step 1**. Clone the GitHub repository and check out the commit:
|
||||
|
||||
1. Clone the YOLACT repository:
|
||||
```sh
|
||||
git clone https://github.com/dbolya/yolact
|
||||
```
|
||||
2. Check out the necessary commit:
|
||||
```sh
|
||||
git checkout 57b8f2d95e62e2e649b382f516ab41f949b57239
|
||||
```
|
||||
|
||||
**Step 2**. Download a pretrained model, for example `yolact_base_54_800000.pth`.
|
||||
|
||||
**Step 3**. Export the model to ONNX* format.
|
||||
|
||||
1. Apply the `YOLACT_onnx_export.patch` patch to the repository. Refer to the <a href="#patch-file">Create a Patch File</a> instructions if you do not have it:
|
||||
```sh
|
||||
git apply /path/to/patch/YOLACT_onnx_export.patch
|
||||
```
|
||||
|
||||
2. Evaluate the YOLACT model to export it to ONNX* format:
|
||||
|
||||
```sh
|
||||
python3 eval.py \
|
||||
--trained_model=/path/to/yolact_base_54_800000.pth \
|
||||
--score_threshold=0.3 \
|
||||
--top_k=10 \
|
||||
--image=/path/to/image.jpg
|
||||
```
|
||||
|
||||
3. You should get `yolact.onnx` file.
|
||||
|
||||
**Step 4**. Convert the model to the IR:
|
||||
|
||||
```sh
|
||||
python path/to/model_optimizer/mo.py --input_model /path/to/yolact.onnx
|
||||
```
|
||||
|
||||
**Step 4**. Embed input preprocessing into the IR:
|
||||
|
||||
To get performance gain by offloading to the OpenVINO application of mean/scale values and RGB->BGR conversion, use the following options of the Model Optimizer (MO):
|
||||
|
||||
* If the backbone of the model is Resnet50-FPN or Resnet101-FPN, use the following MO command line:
|
||||
|
||||
```sh
|
||||
python path/to/model_optimizer/mo.py \
|
||||
--input_model /path/to/yolact.onnx \
|
||||
--reverse_input_channels \
|
||||
--mean_values "[123.68, 116.78, 103.94]" \
|
||||
--scale_values "[58.40, 57.12, 57.38]"
|
||||
```
|
||||
|
||||
* If the backbone of the model is Darknet53-FPN, use the following MO command line:
|
||||
|
||||
```sh
|
||||
python path/to/model_optimizer/mo.py \
|
||||
--input_model /path/to/yolact.onnx \
|
||||
--reverse_input_channels \
|
||||
--scale 255
|
||||
```
|
||||
|
||||
@@ -103,7 +103,7 @@ Models with `keep_aspect_ratio_resizer` were trained to recognize object in real
|
||||
Inference Engine comes with a number of samples that use Object Detection API models including:
|
||||
|
||||
* [Object Detection for SSD Sample](../../../../../inference-engine/samples/object_detection_sample_ssd/README.md) --- for RFCN, SSD and Faster R-CNNs
|
||||
* [Mask R-CNN Sample for TensorFlow* Object Detection API Models](@ref omz_demos_mask_rcnn_demo_README) --- for Mask R-CNNs
|
||||
* [Mask R-CNN Sample for TensorFlow* Object Detection API Models](@ref omz_demos_mask_rcnn_demo_cpp) --- for Mask R-CNNs
|
||||
|
||||
There are a number of important notes about feeding input images to the samples:
|
||||
|
||||
@@ -1044,4 +1044,4 @@ The Mask R-CNN models are cut at the end with the sub-graph replacer `ObjectDete
|
||||
|
||||
```SecondStageBoxPredictor_1/Conv_3/BiasAdd|SecondStageBoxPredictor_1/Conv_1/BiasAdd```
|
||||
|
||||
One of these two nodes produces output mask tensors. The child nodes of these nodes are related to post-processing which is implemented in the [Mask R-CNN demo](@ref omz_demos_mask_rcnn_demo_README) and should be cut off.
|
||||
One of these two nodes produces output mask tensors. The child nodes of these nodes are related to post-processing which is implemented in the [Mask R-CNN demo](@ref omz_demos_mask_rcnn_demo_cpp) and should be cut off.
|
||||
|
||||
@@ -110,7 +110,7 @@ where:
|
||||
|
||||
> **NOTE:** The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](../Converting_Model_General.md).
|
||||
|
||||
OpenVINO™ toolkit provides a demo that uses YOLOv3 model. For more information, refer to [Object Detection C++ Demo](@ref omz_demos_object_detection_demo_ssd_async_README).
|
||||
OpenVINO™ toolkit provides a demo that uses YOLOv3 model. For more information, refer to [Object Detection C++ Demo](@ref omz_demos_object_detection_demo_cpp).
|
||||
|
||||
## Convert YOLOv1 and YOLOv2 Models to the IR
|
||||
|
||||
|
||||
@@ -1,53 +1,53 @@
|
||||
# Model Optimizer Extensibility {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer}
|
||||
|
||||
* [Model Representation in Memory](#model-representation-in-memory)
|
||||
* [Model Conversion Pipeline](#model-conversion-pipeline)
|
||||
* [Model Loading](#model-loading)
|
||||
* [Operations Attributes Extracting](#operations-attributes-extracting)
|
||||
* [Front Phase](#front-phase)
|
||||
* [Partial Inference](#partial-inference)
|
||||
* [Middle Phase](#middle-phase)
|
||||
* [NHWC to NCHW Layout Change](#layout-change)
|
||||
* [Back Phase](#back-phase)
|
||||
* [Intermediate Representation Emitting](#ir-emitting)
|
||||
* [Graph Traversal and Modification Using `Port`s and `Connection`s](#graph-ports-and-conneсtions)
|
||||
* [Ports](#intro-ports)
|
||||
* [Connections](#intro-connections)
|
||||
* [Model Optimizer Extensions](#extensions)
|
||||
* [Model Optimizer Operation](#extension-operation)
|
||||
* [Operation Extractor](#operation-extractor)
|
||||
* [Graph Transformation Extensions](#graph-transformations)
|
||||
* [Front Phase Transformations](#front-phase-transformations)
|
||||
* [Pattern-Defined Front Phase Transformations](#pattern-defined-front-phase-transformations)
|
||||
* [Specific Operation Front Phase Transformations](#specific-operation-front-phase-transformations)
|
||||
* [Generic Front Phase Transformations](#generic-front-phase-transformations)
|
||||
* [Node Name Pattern Front Phase Transformations](#node-name-pattern-front-phase-transformations)
|
||||
* [Front Phase Transformations Using Start and End Points](#start-end-points-front-phase-transformations)
|
||||
* [Generic Front Phase Transformations Enabled with Transformations Configuration File](#generic-transformations-config-front-phase-transformations)
|
||||
* [Middle Phase Transformations](#middle-phase-transformations)
|
||||
* [Pattern-Defined Middle Phase Transformations](#pattern-defined-middle-phase-transformations)
|
||||
* [Generic Middle Phase Transformations](#generic-middle-phase-transformations)
|
||||
* [Back Phase Transformations](#back-phase-transformations)
|
||||
* [Pattern-Defined Back Phase Transformations](#pattern-defined-back-phase-transformations)
|
||||
* [Generic Back Phase Transformations](#generic-back-phase-transformations)
|
||||
- <a href="#model-representation-in-memory">Model Representation in Memory</a>
|
||||
- <a href="#model-conversion-pipeline">Model Conversion Pipeline</a>
|
||||
- <a href="#model-loading">Model Loading</a>
|
||||
- <a href="#operations-attributes-extracting">Operations Attributes Extracting</a>
|
||||
- <a href="#front-phase">Front Phase</a>
|
||||
- <a href="#partial-inference">Partial Inference</a>
|
||||
- <a href="#middle-phase">Middle Phase</a>
|
||||
- <a href="#layout-change">NHWC to NCHW Layout Change</a>
|
||||
- <a href="#back-phase">Back Phase</a>
|
||||
- <a href="#ir-emitting">Intermediate Representation Emitting</a>
|
||||
- <a href="#graph-ports-and-conneсtions">Graph Traversal and Modification Using <code>Port</code>s and <code>Connection</code>s</a>
|
||||
- <a href="#intro-ports">Ports</a>
|
||||
- <a href="#intro-conneсtions">Connections</a>
|
||||
- <a href="#extensions">Model Optimizer Extensions</a>
|
||||
- <a href="#operation">Model Optimizer Operation</a>
|
||||
- <a href="#extension-extractor">Operation Extractor</a>
|
||||
- <a href="#graph-transformations">Graph Transformation Extensions</a>
|
||||
- <a href="#front-phase-transformations">Front Phase Transformations</a>
|
||||
- <a href="#pattern-defined-front-phase-transformations">Pattern-Defined Front Phase Transformations</a>
|
||||
- <a href="#specific-operation-front-phase-transformations">Specific Operation Front Phase Transformations</a>
|
||||
- <a href="#generic-front-phase-transformations">Generic Front Phase Transformations</a>
|
||||
- <a href="#node-name-pattern-front-phase-transformations">Node Name Pattern Front Phase Transformations</a>
|
||||
- <a href="#start-end-points-front-phase-transformations">Front Phase Transformations Using Start and End Points</a>
|
||||
- <a href="#generic-transformations-config-front-phase-transformations">Generic Front Phase Transformations Enabled with Transformations Configuration File</a>
|
||||
- <a href="#middle-phase-transformations">Middle Phase Transformations</a>
|
||||
- <a href="#pattern-defined-middle-phase-transformations">Pattern-Defined Middle Phase Transformations</a>
|
||||
- <a href="#generic-middle-phase-transformations">Generic Middle Phase Transformations</a>
|
||||
- <a href="#back-phase-transformations">Back Phase Transformations</a>
|
||||
- <a href="#pattern-defined-back-phase-transformations">Pattern-Defined Back Phase Transformations</a>
|
||||
- <a href="#generic-back-phase-transformations">Generic Back Phase Transformations</a>
|
||||
- <a href="#see-also">See Also</a>
|
||||
|
||||
Model Optimizer extensibility mechanism allows to support new operations and custom transformations to generate the
|
||||
optimized Intermediate Representation (IR) as described in the
|
||||
<a name="model-optimizer-extensibility"></a>Model Optimizer extensibility mechanism enables support of new operations and custom transformations to generate the
|
||||
optimized intermediate representation (IR) as described in the
|
||||
[Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../../IR_and_opsets.md). This
|
||||
mechanism is a core part of the Model Optimizer and the Model Optimizer uses it under the hood, so the Model Optimizer
|
||||
itself is a huge set of examples on how to add custom logic to support your model.
|
||||
mechanism is a core part of the Model Optimizer. The Model Optimizer itself uses it under the hood, being a huge set of examples on how to add custom logic to support your model.
|
||||
|
||||
There are several cases when the customization is needed:
|
||||
|
||||
* A model contains operation(s) not known for the Model Optimizer, but these operation(s) could be expressed as a
|
||||
combination of supported operations. In this case a custom transformation should be implemented to replace unsupported
|
||||
combination of supported operations. In this case, a custom transformation should be implemented to replace unsupported
|
||||
operation(s) with supported ones.
|
||||
* A model contains sub-graph of operations which can be replaced with a smaller number of operations to get the better
|
||||
* A model contains sub-graph of operations that can be replaced with a smaller number of operations to get the better
|
||||
performance. This example corresponds to so called fusing transformations. For example, replace a sub-graph performing
|
||||
the following calculation \f$x / (1.0 + e^{-(beta * x)})\f$ with a single operation of type
|
||||
[Swish](../../../ops/activation/Swish_4.md).
|
||||
* A model contains a custom framework operation (the operation which is not a part of an official operation set of the
|
||||
framework) which was developed using the framework extensibility mechanism. In this case the Model Optimizer should know
|
||||
* A model contains a custom framework operation (the operation that is not a part of an official operation set of the
|
||||
framework) that was developed using the framework extensibility mechanism. In this case, the Model Optimizer should know
|
||||
how to handle the operation and generate a corresponding section in an IR for it.
|
||||
|
||||
It is necessary to figure out how the Model Optimizer represents a model in a memory and converts it to an IR before
|
||||
@@ -61,14 +61,13 @@ The model can be represented as a directed graph where nodes are operations and
|
||||
producer operation (node) to a consumer operation (node).
|
||||
|
||||
Model Optimizer uses Python class `mo.graph.graph.Graph` instance to represent the computation graph in memory during
|
||||
the model conversion. This class is inherited from `networkx.MultiDiGraph` class of the standard `networkx` Python
|
||||
the model conversion. This class is inherited from the `networkx.MultiDiGraph` class of the standard `networkx` Python
|
||||
library and provides many convenient methods to traverse and modify the graph. Refer to the `mo/graph/graph.py` file for
|
||||
the examples.
|
||||
|
||||
Model Optimizer keeps all necessary information about the operation in a node attributes. Model Optimizer uses class
|
||||
`mo.graph.graph.Node` defined in the `mo/graph/graph.py` file which is a wrapper on top of a `networkx` node attributes
|
||||
dictionary and provides many convenient methods to work with the node. For example, the node `my_node` attribute with a
|
||||
name `'my_attr'` can be retrieved from the node with the following code `my_node.my_attr` which is equivalent to obtaining
|
||||
Model Optimizer keeps all necessary information about the operation in node attributes. Model Optimizer uses the `mo.graph.graph.Node` class defined in the `mo/graph/graph.py` file, which is a wrapper on top of a `networkx` node attributes
|
||||
dictionary, and provides many convenient methods to work with the node. For example, the node `my_node` attribute with a
|
||||
name `'my_attr'` can be retrieved from the node with the following code `my_node.my_attr`, which is equivalent to obtaining
|
||||
attribute with name `'my_attr'` in the `graph.node['my_node']` dictionary. Refer to the `mo/graph/graph.py` for the
|
||||
class implementation details.
|
||||
|
||||
@@ -76,12 +75,12 @@ An operation may have several inputs and outputs. For example, operation [Split]
|
||||
two inputs: data to split and axis to split along, and variable number of outputs depending on a value of attribute
|
||||
`num_splits`. Each input data to the operation is passed to a specific operation **input port**. An operation produces
|
||||
an output data from an **output port**. Input and output ports are numbered from 0 independently. Model Optimizer uses
|
||||
classes `mo.graph.port.Port` and `mo.graph.connection.Connection` which are useful abstraction to perform graph
|
||||
modifications like nodes connecting/re-connecting and a graph traversing. These classes are widely used in the Model
|
||||
classes `mo.graph.port.Port` and `mo.graph.connection.Connection`, which are useful abstraction to perform graph
|
||||
modifications like nodes connecting/re-connecting and graph traversing. These classes are widely used in the Model
|
||||
Optimizer code so it is easy to find a lot of usage examples.
|
||||
|
||||
There is no dedicated class corresponding to an edge, so low-level graph manipulation is needed to get access to
|
||||
edge attributes if needed. Meanwhile most manipulations with nodes connections should be done with help of
|
||||
edge attributes if needed. Meanwhile, most manipulations with nodes connections should be done with help of the
|
||||
`mo.graph.connection.Connection` and `mo.graph.port.Port` classes. Thus, low-level graph manipulation is error prone and
|
||||
is strongly not recommended.
|
||||
|
||||
@@ -94,19 +93,19 @@ A model conversion pipeline can be represented with the following diagram:
|
||||
|
||||

|
||||
|
||||
Lets review each conversion step in details.
|
||||
Each conversion step is reviewed in details below.
|
||||
|
||||
### Model Loading <a name="model-loading"></a>
|
||||
Model Optimizer gets as input a trained model file. The model loader component of the Model Optimizer reads a model file
|
||||
Model Optimizer gets a trained model file as an input. The model loader component of the Model Optimizer reads a model file
|
||||
using Python bindings provided with the framework and builds an in-memory representation of a computation graph. There
|
||||
is a separate loader for each supported framework. These loaders are implemented in the
|
||||
`extensions/load/<FRAMEWORK>/loader.py` files of the Model Optimizer.
|
||||
|
||||
> **NOTE**: Model Optimizer uses a special parser for Caffe\* models built on top of `caffe.proto` file. In case of a
|
||||
> **NOTE**: Model Optimizer uses a special parser for Caffe\* models built on top of the `caffe.proto` file. In case of a
|
||||
> model loading failure, the Model Optimizer throws an error and requests to prepare the parser that can read the model.
|
||||
> For more information on how to prepare the custom Caffe\* parser, refer to the [Model Optimizer Frequently Asked Questions #1](../Model_Optimizer_FAQ.md).
|
||||
|
||||
The result of a model loading step is a `Graph` object which can be depicted like in the following example:
|
||||
The result of a model loading step is a `Graph` object, which can be depicted like in the following example:
|
||||
|
||||

|
||||
|
||||
@@ -114,16 +113,16 @@ Model Optimizer loader saves an operation instance framework description (usuall
|
||||
attribute usually with a name `pb` for each operation of an input model. It is important that this is a
|
||||
**framework-specific** description of an operation. This means that an operation, for example,
|
||||
[Convolution](../../../ops/convolution/Convolution_1.md) may be represented differently in, for example, Caffe\* and
|
||||
TensorFlow\* frameworks but perform exactly the same calculations from a mathematical point of view.
|
||||
TensorFlow\* frameworks but performs the same calculations from a mathematical point of view.
|
||||
|
||||
In the example above the "Operation 2" has one input and two outputs. The tensor produced from the output port 0 is
|
||||
In the example above, the "Operation 2" has one input and two outputs. The tensor produced from the output port 0 is
|
||||
consumed with the "Operation 5" (the input port 0) and "Operation 3" (the input port 1). The tensor produced from the
|
||||
output port 1 is consumed with the "Operation 4" (the input port 0).
|
||||
|
||||
Each edge has two attributes `in` and `out` containing the input port number of the consumer node and the output port
|
||||
number of the producer node. These attribute describe the fact that nodes are operations consuming some input tensors
|
||||
number of the producer node. These attributes describe the fact that nodes are operations consuming some input tensors
|
||||
and producing some output tensors. But nodes themselves are "black boxes" from the Model Optimizer perspective because
|
||||
they don't contain required information about the operation they perform.
|
||||
they do not contain required information about the operation they perform.
|
||||
|
||||
### Operations Attributes Extracting <a name="operations-attributes-extracting"></a>
|
||||
The next step is to parse framework-dependent operation representation saved in a node attribute and update the node
|
||||
@@ -159,22 +158,22 @@ document). Detailed list of common node attributes and their values is provided
|
||||
[Model Optimizer Operation](#extension-operation).
|
||||
|
||||
### Front Phase <a name="front-phase"></a>
|
||||
Due to legacy reasons an user must specify shapes for all not fully-defined inputs of the model. In contrast, other
|
||||
machine learning frameworks like TensorFlow\* let user create a model with undefined or partially defined input shapes.
|
||||
For legacy reasons, you must specify shapes for all not fully-defined inputs of the model. In contrast, other
|
||||
machine learning frameworks like TensorFlow\* let you create a model with undefined or partially defined input shapes.
|
||||
As an example, undefined dimension is marked with an integer value `-1` in a TensorFlow\* model or has some string name
|
||||
in an ONNX\* model.
|
||||
|
||||
During the front phase the Model Optimizer knows shape of the model inputs and constants only and does not know shapes
|
||||
During the front phase, the Model Optimizer knows shape of the model inputs and constants only and does not know shapes
|
||||
(and even ranks) of the intermediate tensors. But information about shapes may not be needed to implement particular
|
||||
transformation. For example, the transformation `extensions/front/TopKNormalize.py` removes an attribute `k` from a
|
||||
`TopK` node and adds an input constant with the value `k`. The transformation is needed to convert a `TopK` operation
|
||||
which comes from frameworks where a number of output elements is defined as an attribute of the operation to the
|
||||
OpenVINO™ [TopK](../../../ops/sort/TopK_3.md) operation semantic which requires this value to be a separate input.
|
||||
that comes from frameworks where a number of output elements is defined as an attribute of the operation to the
|
||||
OpenVINO™ [TopK](../../../ops/sort/TopK_3.md) operation semantic, which requires this value to be a separate input.
|
||||
|
||||
It is important to mention that sometimes it seems like a transformation cannot be implemented during the front phase
|
||||
It is important to mention that sometimes it seems like transformation cannot be implemented during the front phase
|
||||
because the actual values of inputs or shapes are needed. But in fact shapes or values manipulations can be implemented
|
||||
using operations which are added to the graph. Consider the transformation
|
||||
`extensions/front/onnx/flattenONNX_to_reshape.py` which replaces an ONNX\* operation
|
||||
using operations that are added to the graph. Consider the
|
||||
`extensions/front/onnx/flattenONNX_to_reshape.py` transformation, which replaces an ONNX\* operation
|
||||
[Flatten](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Flatten) with a sub-graph of operations performing
|
||||
the following (for the case when `axis` is not equal to 0 and 1):
|
||||
|
||||
@@ -185,14 +184,14 @@ the following (for the case when `axis` is not equal to 0 and 1):
|
||||
[Reshape](../../../ops/shape/Reshape_1.md) specification for an explanation of this value).
|
||||
4. Use the concatenated value as the second input to the `Reshape` operation.
|
||||
|
||||
It is highly recommended to write shape-agnostic transformations to avoid model reshape-ability issues. Refer to
|
||||
It is highly recommended that you write shape-agnostic transformations to avoid model reshape-ability issues. Refer to
|
||||
[Using Shape Inference](../../../IE_DG/ShapeInference.md) for more information related to the reshaping of a model.
|
||||
|
||||
More information on how to develop front phase transformations and dedicated API description is provided in the
|
||||
[Front Phase Transformations](#front-phase-transformations).
|
||||
|
||||
### Partial Inference <a name="partial-inference"></a>
|
||||
Model Optimizer performs a partial inference of a model during a model conversion. This procedure includes output shapes
|
||||
Model Optimizer performs a partial inference of a model during model conversion. This procedure includes output shapes
|
||||
calculation of all operations in a model and constant folding (value calculation for constant sub-graphs). The constant
|
||||
folding is needed for the shape inference because in some cases evaluation of constant sub-graph is needed to calculate
|
||||
output shapes. For example, the output shape for the [Reshape](../../../ops/shape/Reshape_1.md) operation may be
|
||||
@@ -213,22 +212,22 @@ files.
|
||||
> [Const](../../../ops/infrastructure/Constant_1.md) operations defined with respective operation attributes.
|
||||
|
||||
Model Optimizer inserts "data" nodes to the computation graph before starting the partial inference phase. The data node
|
||||
corresponds to the specific tensor produced with the operation. Each data node contains two attributes: `shape`
|
||||
containing the shape of the tensor and `value` which may contain the actual value of the tensor. The value for a `value`
|
||||
corresponds to the specific tensor produced with the operation. Each data node contains two attributes: `shape`,
|
||||
containing the shape of the tensor, and `value`, which may contain the actual value of the tensor. The value for a `value`
|
||||
attribute is equal to `None` if this tensor value cannot be calculated. This happens in two cases: when a tensor value
|
||||
depends on a values passed to the [Parameter](../../../ops/infrastructure/Parameter_1.md) operation of a model or the
|
||||
Model Optimizer does not have value propagation implementation for the operation.
|
||||
|
||||
The graph before running the partial inference can be depicted like in the following example:
|
||||
Before running partial inference, the graph can be depicted like in the following example:
|
||||
|
||||

|
||||
|
||||
The difference in a graph structure with a graph during the front phase is not only in the data nodes, but also in the
|
||||
edge attributes. Note, that an `out` attribute is specified for edges **from operation** nodes only, while an `in`
|
||||
edge attributes. Note that an `out` attribute is specified for edges **from operation** nodes only, while an `in`
|
||||
attribute is specified for edges **from data** nodes only. This corresponds to the fact that a tensor (data node) is
|
||||
produced from a specific output port of an operation and is consumed with a specific input port of an operation. Also,
|
||||
a unique data node is created for each output port of an operation and may be used as an input node for several
|
||||
operation nodes, like the data node "data2_0" which is consumed with the input port 1 of the operation "Operation 3" and
|
||||
operation nodes, like the data node "data2_0", which is consumed with the input port 1 of the operation "Operation 3" and
|
||||
input port 0 of the operation "Operation 5".
|
||||
|
||||
Now consider how the Model Optimizer performs shape and value propagation. Model Optimizer performs graph nodes
|
||||
@@ -236,13 +235,13 @@ topological sort. An error message is thrown if a graph contains a cycle. Then s
|
||||
each node in the graph according to the topological order. Each node of the graph must have an attribute called `infer`
|
||||
with a shape inference function, which is a function with one parameter – an instance of the `Node` class. The `infer`
|
||||
attribute is usually set in the operation extractor or when a node is added in some transformation using the Model
|
||||
Optimizer operation class inherited from `mo.pos.Op` class. Refer to the [Model Optimizer Operation](#extension-operation)
|
||||
Optimizer operation class inherited from the `mo.pos.Op` class. Refer to the [Model Optimizer Operation](#extension-operation)
|
||||
and [Operation Extractor](#operation-extractor) for more information on how to specify a shape inference function.
|
||||
|
||||
A shape inference function should calculate an operation (node) output shape(s) based on input shape(s) and operation
|
||||
(node) attribute(s) and update `shape` and optionally `value` attributes of the corresponding data node(s). A simplified
|
||||
example of the shape infer function for the [Reshape](../../../ops/shape/Reshape_1.md) operation (the full version is
|
||||
available in the file `mo/ops/reshape.py`):
|
||||
available in the `mo/ops/reshape.py` file):
|
||||
|
||||
```py
|
||||
@staticmethod
|
||||
@@ -273,12 +272,12 @@ them.
|
||||
|
||||
> **NOTE**: There is a legacy approach to read data node attribute like `input_shape = op_node.in_node(0).shape` and
|
||||
> modify data nodes attributes like `op_node.out_node(0).shape = some_value`. This approach is still used in the Model
|
||||
> Optimizer code but is not recommended. Instead use approach described in the [Ports](#intro-ports).
|
||||
> Optimizer code but is not recommended. Instead, use the approach described in the [Ports](#intro-ports).
|
||||
|
||||
### Middle Phase <a name="middle-phase"></a>
|
||||
The middle phase starts after the partial inference. At this phase a graph contains data nodes and output shapes of all
|
||||
operations in the graph have been calculated. Any transformation implemented at this stage must update `shape`
|
||||
attribute for all newly added operations. It is highly recommended to use API desribed in the
|
||||
The middle phase starts after partial inference. At this phase, a graph contains data nodes and output shapes of all
|
||||
operations in the graph have been calculated. Any transformation implemented at this stage must update the `shape`
|
||||
attribute for all newly added operations. It is highly recommended to use API described in the
|
||||
[Graph Traversal and Modification Using `Port`s and `Connection`s](#graph-ports-and-conneсtions) because modification of
|
||||
a graph using this API causes automatic re-inference of affected nodes as well as necessary data nodes creation.
|
||||
|
||||
@@ -290,10 +289,10 @@ There are several middle transformations responsible for changing model layout f
|
||||
are triggered by default for TensorFlow\* models only because it is the only framework with Convolution operations in
|
||||
NHWC layout.
|
||||
|
||||
> **NOTE**: If a TensorFlow\* model is in NCHW layout then an user should specify `--disable_nhwc_to_nchw` command line
|
||||
> **NOTE**: If a TensorFlow\* model is in NCHW layout, you should specify the `--disable_nhwc_to_nchw` command line
|
||||
> parameter to disable these transformations.
|
||||
|
||||
The layout change is a complex problem and detailed explanation of it is out of scope of this document. A very brief
|
||||
The layout change is a complex problem and detailed explanation of it is out of this document scope. A very brief
|
||||
explanation of this process is provided below:
|
||||
|
||||
1. Model Optimizer changes output shapes of most of operations producing 4D and 5D (four dimensional and five
|
||||
@@ -313,11 +312,11 @@ Refer to the source code of these transformations for more details on how the la
|
||||
### Back Phase <a name="back-phase"></a>
|
||||
The back phase starts after the layout change to NCHW. This phase contains mostly the following transformations:
|
||||
|
||||
1. Transformations which should be working with a graph in the NCHW layout and thus cannot be implemented in the middle
|
||||
1. Transformations that should work with a graph in the NCHW layout and thus cannot be implemented in the middle
|
||||
phase.
|
||||
2. Transformations which replace nodes corresponding to internal Model Optimizer operations with nodes corresponding to
|
||||
2. Transformations that replace nodes corresponding to internal Model Optimizer operations with nodes corresponding to the
|
||||
[opset](@ref openvino_docs_ops_opset) operations.
|
||||
3. Transformations which normalize operations inputs according to the specification.
|
||||
3. Transformations that normalize operations inputs according to the specification.
|
||||
4. Final optimization transformations.
|
||||
|
||||
A graph structure during the back phase is the same as during the middle phase. There is no difference in writing middle
|
||||
@@ -330,30 +329,30 @@ More information on how to develop back transformations and dedicated API descri
|
||||
The last phase of a model conversion is the Intermediate Representation emitting. Model Optimizer performs the following
|
||||
steps:
|
||||
|
||||
1. Iterates over all operation nodes in the graph and checks that all nodes have attribute `type` set. This attribute
|
||||
defines the operation type and used in the Inference Engine to instantiate proper operation from the
|
||||
1. Iterates over all operation nodes in the graph and checks that all nodes have the `type` attribute set. This attribute
|
||||
defines the operation type and is used in the Inference Engine to instantiate proper operation from the
|
||||
[opset](@ref openvino_docs_ops_opset) specified in the `version` attribute of the node. If some node does not have
|
||||
attribute `type` or its values is equal to `None` then the Model Optimizer exits with an error.
|
||||
attribute `type` or its values is equal to `None`, the Model Optimizer exits with an error.
|
||||
2. Performs type inference of graph operations similar to the shape inference. Inferred data types are saved to a port
|
||||
attributes in the IR.
|
||||
3. Performs topological sort of the graph and changes `id` attribute of all operation nodes to be sequential integer
|
||||
values starting from 0.
|
||||
4. Saves all Constants values to the `.bin` file. Constants with the same value are shared among different operations.
|
||||
5. Generates `.xml` file defining a graph structure. The information about operation inputs and outputs are prepared
|
||||
uniformly for all operations regardless of their type. A list of attributes to be saved to an `.xml` file is defined
|
||||
5. Generates an `.xml` file defining a graph structure. The information about operation inputs and outputs are prepared
|
||||
uniformly for all operations regardless of their type. A list of attributes to be saved to the `.xml` file is defined
|
||||
with the `backend_attrs()` or `supported_attrs()` of the `Op` class used for a graph node instantiation. For more
|
||||
information on how the operation attributes are saved to XML refer to the function `prepare_emit_ir()` in
|
||||
information on how the operation attributes are saved to XML, refer to the function `prepare_emit_ir()` in
|
||||
the `mo/pipeline/common.py` file and [Model Optimizer Operation](#extension-operation).
|
||||
|
||||
## Graph Traversal and Modification Using `Port`s and `Connection`s <a name="graph-ports-and-conneсtions"></a>
|
||||
There are three APIs for a graph traversal and transformation used in the Model Optimizer:
|
||||
1. The API provided with the `networkx` Python library for the `networkx.MultiDiGraph` class which is the base class for
|
||||
1. The API provided with the `networkx` Python library for the `networkx.MultiDiGraph` class, which is the base class for
|
||||
the `mo.graph.graph.Graph` object. Refer to the [Model Representation in Memory](#model-representation-in-memory) for
|
||||
more details. For example, the following methods belong to this API level: `graph.add_edges_from([list])`,
|
||||
`graph.add_node(x, attrs)`, `graph.out_edges(node_id)` etc where `graph` is a an instance of the `networkx.MultiDiGraph`
|
||||
class. **This is the lowest-level API and its usage should be avoided in the Model Optimizer transformations**.
|
||||
2. The API built around the `mo.graph.graph.Node` class. The `Node` class is the primary class to work with graph nodes
|
||||
and their attributes. **There are some `Node` class methods not recommended to use and some functions defined in the
|
||||
and their attributes. **There are some `Node` class methods not recommended for use and some functions defined in the
|
||||
`mo.graph.graph` have been deprecated**. Examples of such methods and functions are:
|
||||
`node.in_node(y)`, `node.out_node(x)`, `node.get_outputs()`, `node.insert_node_after(n1, y)`, `create_edge(n1, n2)` etc.
|
||||
Refer to the `mo/graph/graph.py` for more details.
|
||||
@@ -364,24 +363,24 @@ Refer to the `mo/graph/graph.py` for more details.
|
||||
transformations and operations implementation**.
|
||||
|
||||
The main benefit of using Model Optimizer Graph API is that it hides some internal implementation details (the fact that
|
||||
the graph contains data nodes), provides API to perform safe and predictable graph manipulations and adds operation
|
||||
the graph contains data nodes), provides API to perform safe and predictable graph manipulations, and adds operation
|
||||
semantic to the graph. This is achieved with introduction of concepts of ports and connections. This chapter is
|
||||
dedicated to the Model Optimizer Graph API and does not cover other two non-recommended APIs.
|
||||
|
||||
### Ports <a name="intro-ports"></a>
|
||||
An operation semantic describes how many inputs and outputs the operation have. For example, operations
|
||||
An operation semantic describes how many inputs and outputs the operation has. For example, operations
|
||||
[Parameter](../../../ops/infrastructure/Parameter_1.md) and [Const](../../../ops/infrastructure/Constant_1.md) have no
|
||||
inputs and have one output, operation [ReLU](../../../ops/activation/ReLU_1.md) has one input and one output, operation
|
||||
[Split](../../../ops/movement/Split_1.md) has 2 inputs and variable number of outputs depending on the value of the
|
||||
attribute `num_splits`.
|
||||
|
||||
Each operation node in the graph (an instance of the `Node` class) has 0 or more input and output ports (instances of
|
||||
the `mo.graph.port.Port` class). `Port` object has several attributes:
|
||||
the `mo.graph.port.Port` class). The `Port` object has several attributes:
|
||||
* `node` - the instance of the `Node` object the port belongs to.
|
||||
* `idx` - the port number. Input and output ports are numbered independently starting from `0`. Thus operation
|
||||
* `idx` - the port number. Input and output ports are numbered independently starting from `0`. Thus, operation
|
||||
[ReLU](../../../ops/activation/ReLU_1.md) has one input port (with index `0`) and one output port (with index `0`).
|
||||
* `type` - the type of the port. Could be equal to either `"in"` or `"out"`.
|
||||
* `data` - the object which should be used to get attributes of the corresponding data node. This object has methods
|
||||
* `data` - the object that should be used to get attributes of the corresponding data node. This object has methods
|
||||
`get_shape()` / `set_shape()` and `get_value()` / `set_value()` to get/set shape/value of the corresponding data node.
|
||||
For example, `in_port.data.get_shape()` returns an input shape of a tensor connected to input port `in_port`
|
||||
(`in_port.type == 'in'`), `out_port.data.get_value()` returns a value of a tensor produced from output port `out_port`
|
||||
@@ -398,42 +397,42 @@ input/output port.
|
||||
|
||||
Attributes `in_ports_count` and `out_ports_count` of the `Op` class instance define default number of input and output
|
||||
ports to be created for the `Node` . However, additional input/output ports can be added using methods
|
||||
`add_input_port()` and `add_output_port()`. Port also can be removed using `delete_input_port()` and
|
||||
`add_input_port()` and `add_output_port()`. Port also can be removed using the `delete_input_port()` and
|
||||
`delete_output_port()` methods.
|
||||
|
||||
The `Port` class is just an abstraction which works with edges incoming/outgoing to/from a specific `Node` instance. For
|
||||
The `Port` class is just an abstraction that works with edges incoming/outgoing to/from a specific `Node` instance. For
|
||||
example, output port with `idx = 1` corresponds to the outgoing edge of a node with an attribute `out = 1`, the input
|
||||
port with `idx = 2` corresponds to the incoming edge of a node with an attribute `in = 2`.
|
||||
|
||||
Consider an example of a graph part with 4 operation nodes "Op1", "Op2", "Op3" and "Op4" and a number of data nodes
|
||||
Consider the example of a graph part with 4 operation nodes "Op1", "Op2", "Op3", and "Op4" and a number of data nodes
|
||||
depicted with light green boxes.
|
||||
|
||||

|
||||
|
||||
Operation nodes have input ports (yellow squares) and output ports (light purple squares). Input port may not be
|
||||
connected. For example, the input port 2 of node "Op1" does not have incoming edge, while output port always has an
|
||||
associated data node (after the partial inference when the data nodes are added to the graph) which may have no
|
||||
associated data node (after the partial inference when the data nodes are added to the graph), which may have no
|
||||
consumers.
|
||||
|
||||
Ports can be used to traverse a graph. The method `get_source()` of an input port returns an output port producing the
|
||||
tensor the input port consumes. It is important that the method works the same during front, middle and back phases of a
|
||||
model conversion even though the graph structure changes (there is no data nodes in the graph during the front phase).
|
||||
tensor consumed by the input port. It is important that the method works the same during front, middle and back phases of a
|
||||
model conversion even though the graph structure changes (there are no data nodes in the graph during the front phase).
|
||||
|
||||
Let's assume that there are 4 instances of `Node` object `op1, op2, op3` and `op4` corresponding to nodes "Op1", "Op2",
|
||||
"Op3" and "Op4" correspondingly. The result of `op2.in_port(0).get_source()` and `op4.in_port(1).get_source()` is the
|
||||
Let's assume that there are 4 instances of `Node` object `op1, op2, op3`, and `op4` corresponding to nodes "Op1", "Op2",
|
||||
"Op3", and "Op4", respectively. The result of `op2.in_port(0).get_source()` and `op4.in_port(1).get_source()` is the
|
||||
same object `op1.out_port(1)` of type `Port`.
|
||||
|
||||
The method `get_destination()` of an output port returns the input port of the node consuming this tensor. If there are
|
||||
multiple consumers of this tensor then the error is raised. The method `get_destinations()` of an output port returns a
|
||||
multiple consumers of this tensor, the error is raised. The method `get_destinations()` of an output port returns a
|
||||
list of input ports consuming the tensor.
|
||||
|
||||
The method `disconnect()` removes a node incoming edge corresponding to the specific input port. The method removes
|
||||
several edges if it is applied during the front phase for a node output port connected with multiple nodes.
|
||||
|
||||
The method `port.connect(another_port)` connects output port `port` and input port `another_port`. The method handles
|
||||
situations when the graph contains data nodes (middle and back phases) and not just creates an edge between two nodes
|
||||
but also automatically creates data node or re-uses existing data node. If the method is used during the front phase and
|
||||
data nodes do not exist the method creates edge and properly sets `in` and `out` edge attributes.
|
||||
situations when the graph contains data nodes (middle and back phases) and does not create an edge between two nodes
|
||||
but also automatically creates data node or reuses existing data node. If the method is used during the front phase and
|
||||
data nodes do not exist, the method creates edge and properly sets `in` and `out` edge attributes.
|
||||
|
||||
For example, applying the following two methods to the graph above will result in the graph depicted below:
|
||||
|
||||
@@ -454,16 +453,16 @@ and source output port producing data. So each port is connected with one or mor
|
||||
Model Optimizer uses the `mo.graph.connection.Connection` class to represent a connection.
|
||||
|
||||
There is only one method `get_connection()` of the `Port` class to get the instance of the corresponding `Connection`
|
||||
object. If the port is not connected then the returned value is `None`.
|
||||
object. If the port is not connected, the returned value is `None`.
|
||||
|
||||
For example, the method `op3.out_port(0).get_connection()` returns a `Connection` object encapsulating edges from node
|
||||
For example, the `op3.out_port(0).get_connection()` method returns a `Connection` object encapsulating edges from node
|
||||
"Op3" to data node "data_3_0" and two edges from data node "data_3_0" to two ports of the node "Op4".
|
||||
|
||||
The `Connection` class provides methods to get source and destination(s) ports the connection corresponds to:
|
||||
* `connection.get_source()` - returns an output `Port` object producing the tensor.
|
||||
* `connection.get_destinations()` - returns a list of input `Port`s consuming the data.
|
||||
* `connection.get_destination()` - returns a single input `Port` consuming the data. If there are multiple consumers
|
||||
then the exception is raised.
|
||||
* `connection.get_destination()` - returns a single input `Port` consuming the data. If there are multiple consumers,
|
||||
the exception is raised.
|
||||
|
||||
The `Connection` class provides methods to modify a graph by changing a source or destination(s) of a connection. For
|
||||
example, the function call `op3.out_port(0).get_connection().set_source(op1.out_port(0))` changes source port of edges
|
||||
@@ -472,22 +471,22 @@ below:
|
||||
|
||||

|
||||
|
||||
Another example is the method `connection.set_destination(dest_port)`. It disconnects `dest_port` and all input ports
|
||||
the connection is currently connected to and connects the connection source port to the `dest_port`.
|
||||
Another example is the method `connection.set_destination(dest_port)`. It disconnects `dest_port` and all input ports to which
|
||||
the connection is currently connected and connects the connection source port to `dest_port`.
|
||||
|
||||
Note that connection work seamlessly during front, middle and back phases and hides the fact that the graph structure is
|
||||
Note that connection works seamlessly during front, middle, and back phases and hides the fact that the graph structure is
|
||||
different.
|
||||
|
||||
> **NOTE**: Refer to the `Connection` class implementation in the `mo/graph/connection.py` for a full list of available
|
||||
methods.
|
||||
|
||||
## Model Optimizer Extensions <a name="extensions"></a>
|
||||
Model Optimizer extensions allow to inject some logic to the model conversion pipeline without changing the Model
|
||||
Model Optimizer extensions enable you to inject some logic to the model conversion pipeline without changing the Model
|
||||
Optimizer core code. There are three types of the Model Optimizer extensions:
|
||||
|
||||
1. Model Optimizer operation.
|
||||
2. A framework operation extractor.
|
||||
3. A model transformation which can be executed during front, middle or back phase of the model conversion.
|
||||
3. A model transformation, which can be executed during front, middle or back phase of the model conversion.
|
||||
|
||||
An extension is just a plain text file with a Python code. The file should contain a class (or classes) inherited from
|
||||
one of extension base classes. Extension files should be saved to a directory with the following structure:
|
||||
@@ -509,11 +508,11 @@ Model Optimizer uses the same layout internally to keep built-in extensions. The
|
||||
> **NOTE**: The name of a root directory with extensions should not be equal to "extensions" because it will result in a
|
||||
> name collision with the built-in Model Optimizer extensions.
|
||||
|
||||
> **NOTE**: Model Optimizer itself is built using these extensions so there are huge number of examples on how to use
|
||||
> **NOTE**: Model Optimizer itself is built using these extensions so there is a huge number of examples on how to use
|
||||
> them in the Model Optimizer code.
|
||||
|
||||
### Model Optimizer Operation <a name="extension-operation"></a>
|
||||
Model Optimizer defines a class `mo.ops.Op` (`Op` will be used later in the document to be short) which is a base class
|
||||
Model Optimizer defines a class `mo.ops.Op` (`Op` will be used later in the document to be short), which is a base class
|
||||
for an operation used in the Model Optimizer. The instance of the `Op` class serves several purposes:
|
||||
|
||||
1. Stores the operation attributes.
|
||||
@@ -525,7 +524,7 @@ graph.
|
||||
|
||||
It is important to mention that there is no connection between the instance of the `Op` class and the `Node` object
|
||||
created from it. The `Op` class is just an attributes container describing the operation. Model Optimizer uses the `Op`
|
||||
class during a model conversion to create node of the graph with attributes copied from the `Op` class instance. Graph
|
||||
class during a model conversion to create a node of the graph with attributes copied from the `Op` class instance. Graph
|
||||
manipulations are performed with graph `Node`s and their attributes and does not involve `Op`s.
|
||||
|
||||
There are a number of common attributes used in the operations. Here is the list of these attributes with description.
|
||||
@@ -536,19 +535,19 @@ There are a number of common attributes used in the operations. Here is the list
|
||||
* `type` — type of the operation according to the [opset specification](@ref openvino_docs_ops_opset). For the internal
|
||||
Model Optimizer operations this attribute should be set to `None`. The model conversion fails if an operation with
|
||||
`type` equal to `None` comes to the IR emitting phase. **Mandatory**.
|
||||
* `version` — the operation set (opset) name the operation belongs to. If not specified then the Model Optimizer sets it
|
||||
* `version` — the operation set (opset) name the operation belongs to. If not specified, the Model Optimizer sets it
|
||||
equal to `experimental`. Refer to [nGraph Basic Concepts](@ref openvino_docs_nGraph_DG_basic_concepts) for more
|
||||
information about operation sets. **Mandatory**.
|
||||
* `op` — Model Optimizer type of the operation. In many cases the value of `type` is equal to the value of `op`. But
|
||||
when the Model Optimizer cannot instantiate opset operation during model loading it creates an instance of an internal
|
||||
* `op` — Model Optimizer type of the operation. In many cases, the value of `type` is equal to the value of `op`. But
|
||||
when the Model Optimizer cannot instantiate the opset operation during model loading, it creates an instance of an internal
|
||||
operation and the attribute `op` is used as a type of this internal operation. Later in the pipeline the node created
|
||||
from an internal operation will be replaced during front, middle or back phase with node(s) created from the opset.
|
||||
* `infer` — the attribute defines a function calculating output tensor(s) shape and optionally value(s). The attribute
|
||||
may be set to `None` for internal Model Optimizer operations used during the front phase only. Refer to the
|
||||
[Partial Inference](#partial-inference) for more information about the shape inference function.
|
||||
* `type_infer` — the attribute defines a function calculating output tensor(s) data type. If the attribute is not
|
||||
defined then the default function is used. The function checks if the node attribute `data_type` is set and then
|
||||
propagates this type to the output tensor from the port 0, otherwise it propagates the data type of the tensor coming
|
||||
defined, the default function is used. The function checks if the node attribute `data_type` is set and then
|
||||
propagates this type to the output tensor from the port 0; otherwise, it propagates the data type of the tensor coming
|
||||
into the input port 0 to the output tensor from the port 0.
|
||||
* `in_ports_count` — default number of input ports to be created for the operation. Additional ports can be created or
|
||||
redundant ports can be removed using dedicated `Node` class API methods.
|
||||
@@ -556,7 +555,7 @@ redundant ports can be removed using dedicated `Node` class API methods.
|
||||
redundant ports can be removed using dedicated `Node` class API methods.
|
||||
|
||||
Here is an example of the Model Optimizer class for the operation [SoftMax](../../../ops/activation/SoftMax_1.md) from
|
||||
the file `mo/ops/softmax.py` with the in code comments.
|
||||
the `mo/ops/softmax.py` file with the comments in code.
|
||||
|
||||
```py
|
||||
class Softmax(Op):
|
||||
@@ -564,7 +563,7 @@ class Softmax(Op):
|
||||
# "Op.get_op_class_by_name()" static method
|
||||
op = 'SoftMax'
|
||||
|
||||
# the operation works as an extractor by default. This is a legacy behaviour not recommended for using currently,
|
||||
# the operation works as an extractor by default. This is a legacy behavior not recommended for use currently,
|
||||
# thus "enabled" class attribute is set to False. The recommended approach is to use dedicated extractor extension
|
||||
enabled = False
|
||||
|
||||
@@ -611,14 +610,14 @@ example from the `mo/ops/pooling.py` file:
|
||||
```
|
||||
|
||||
The `backend_attrs()` function returns a list of records. A record can be of one of the following formats:
|
||||
1. A string defining the attribute to be saved to the IR. If the value of the attribute is `None` then the attribute is
|
||||
not saved. Example of this case are `rounding_type` and `auto_pad`.
|
||||
1. A string defining the attribute to be saved to the IR. If the value of the attribute is `None`, the attribute is
|
||||
not saved. Examples of this case are `rounding_type` and `auto_pad`.
|
||||
2. A tuple where the first element is a string defining the name of the attribute as it will appear in the IR and the
|
||||
second element is a function to produce the value for this attribute. The function gets an instance of the `Node` as the
|
||||
only parameter and returns a string with the value to be saved to the IR. Example of this case are `strides`, `kernel`,
|
||||
only parameter and returns a string with the value to be saved to the IR. Examples of this case are `strides`, `kernel`,
|
||||
`pads_begin` and `pads_end`.
|
||||
3. A tuple where the first element is a string defining the name of the attribute as it will appear in the IR and the
|
||||
second element is the name of tha `Node` attribute to get the value from. Example of this case are `pool-method` and
|
||||
second element is the name of the `Node` attribute to get the value from. Examples of this case are `pool-method` and
|
||||
`exclude-pad`.
|
||||
|
||||
### Operation Extractor <a name="extension-extractor"></a>
|
||||
@@ -626,7 +625,7 @@ Model Optimizer runs specific extractor for each operation in the model during t
|
||||
[operations-attributes-extracting](#operations-attributes-extracting) for more information about this process.
|
||||
|
||||
There are several types of Model Optimizer extractor extensions:
|
||||
1. The generic one which is described in this section.
|
||||
1. The generic one, which is described in this section.
|
||||
2. The special extractor for Caffe\* models with Python layers. This kind of extractor is described in the
|
||||
[Extending the Model Optimizer with Caffe* Python Layers](Extending_Model_Optimizer_with_Caffe_Python_Layers.md).
|
||||
3. The special extractor for MXNet\* models with custom operations. This kind of extractor is described in the
|
||||
@@ -634,9 +633,9 @@ There are several types of Model Optimizer extractor extensions:
|
||||
4. The special extractor and fallback to Caffe\* for shape inference is described in the
|
||||
[Legacy Mode for Caffe* Custom Layers](Legacy_Mode_for_Caffe_Custom_Layers.md).
|
||||
|
||||
This chapter is focused on the option #1 which provides a generic mechanism for the operation extractor applicable for
|
||||
all frameworks. Model Optimizer provides class `mo.front.extractor.FrontExtractorOp` as a base class to implement the
|
||||
extractor. It has a class method `extract` which gets the only parameter `Node` which corresponds to the graph node to
|
||||
This chapter is focused on the option #1, which provides a generic mechanism for the operation extractor applicable for
|
||||
all frameworks. Model Optimizer provides the `mo.front.extractor.FrontExtractorOp` class as a base class to implement the
|
||||
extractor. It has a class method `extract`, which gets the only parameter `Node`, which corresponds to the graph node to
|
||||
extract data from. The operation description in the original framework format is stored in the attribute `pb` of the
|
||||
node. The extractor goal is to parse this attribute and save necessary attributes to the corresponding node of the
|
||||
graph. Consider the extractor for the TensorFlow\* operation `Const` (refer to the file
|
||||
@@ -716,7 +715,7 @@ used to parse operation attributes encoded with a framework-specific representat
|
||||
A common practice is to use `update_node_stat()` method of the dedicated `Op` class to update the node attributes. This
|
||||
method does the following:
|
||||
|
||||
1. Sets values for common attributes like `op`, `type`, `infer`, `in_ports_count`, `out_ports_count`, `version` etc to
|
||||
1. Sets values for common attributes like `op`, `type`, `infer`, `in_ports_count`, `out_ports_count`, `version` to
|
||||
values specific to the dedicated operation (`Const` operation in this case).
|
||||
2. Uses methods `supported_attrs()` and `backend_attrs()` defined in the `Op` class to update specific node attribute
|
||||
`IE`. The IR emitter uses the value stored in the `IE` attribute to pre-process attribute values and save them to IR.
|
||||
@@ -728,11 +727,11 @@ these attributes are parsed from the particular instance of the operation.
|
||||
|
||||
### Graph Transformation Extensions <a name="graph-transformations"></a>
|
||||
Model Optimizer provides various base classes to implement [Front Phase Transformations](#front-phase-transformations),
|
||||
[Middle Phase Transformations](#middle-phase-transformations) and [Back Phase Transformations](#back-phase-transformations).
|
||||
[Middle Phase Transformations](#middle-phase-transformations), and [Back Phase Transformations](#back-phase-transformations).
|
||||
All classes have the following common class attributes and methods:
|
||||
1. Attribute `enabled` specifies whether the transformation is enabled or not. The value can be changed during runtime
|
||||
to enable or disable execution of the transformation during a model conversion. Default value is `True`.
|
||||
2. Attribute `id` specifies a unique transformation string identifier. This transformation identified can be used to
|
||||
2. Attribute `id` specifies a unique transformation string identifier. This transformation identifier can be used to
|
||||
enable (disable) the transformation by setting environment variable `MO_ENABLED_TRANSFORMS` (`MO_DISABLED_TRANSFORMS`)
|
||||
with a comma separated list of `id`s. The environment variables override the value of the `enabled` attribute of the
|
||||
transformation. Instead of using `id` attribute value you can add fully defined class name to `MO_ENABLED_TRANSFORMS`
|
||||
@@ -747,21 +746,21 @@ graph cleanup removes nodes of the graph not reachable from the model inputs. De
|
||||
input(s) were changed during the transformation or developer can set this attribute manually in the transformation for
|
||||
the specific nodes. Default value is `False`.
|
||||
5. Attribute `graph_condition` specifies a list of functions with one parameter -- `Graph` object. The transformation
|
||||
is executed if and only if all functions return `True`. If the attribute is not set then no check is performed.
|
||||
7. Method `run_before()` returns a list of transformation classes which this transformation should be executed before.
|
||||
8. Method `run_after()` returns a list of transformation classes which this transformation should be executed after.
|
||||
is executed if and only if all functions return `True`. If the attribute is not set, no check is performed.
|
||||
1. Method `run_before()` returns a list of transformation classes which this transformation should be executed before.
|
||||
2. Method `run_after()` returns a list of transformation classes which this transformation should be executed after.
|
||||
|
||||
> **NOTE**: Some of the transformation types have specific class attributes and methods which are explained in the
|
||||
> **NOTE**: Some of the transformation types have specific class attributes and methods, which are explained in the
|
||||
> corresponding sections of this document.
|
||||
|
||||
Model Optimizer builds a graph of dependencies between registered transformations and executes them in the topological
|
||||
order. In order to execute the transformation during a proper model conversion phase the Model Optimizer defines several
|
||||
anchor transformations which does nothing. All transformations are ordered with respect to these anchor transformations.
|
||||
order. To execute the transformation during a proper model conversion phase, the Model Optimizer defines several
|
||||
anchor transformations that do nothing. All transformations are ordered with respect to these anchor transformations.
|
||||
The diagram below shows anchor transformations, some of built-in transformations and dependencies between them:
|
||||
|
||||

|
||||
|
||||
User defined transformations are executed after corresponding `Start` and before corresponding `Finish` anchor
|
||||
User-defined transformations are executed after the corresponding `Start` and before the corresponding `Finish` anchor
|
||||
transformations by default (if `run_before()` and `run_after()` methods have not been overridden).
|
||||
|
||||
> **NOTE**: The `PreMiddleStart` and `PostMiddleStart` anchors were introduced due to historical reasons to refactor
|
||||
@@ -801,10 +800,10 @@ works differently:
|
||||
The sub-graph pattern is defined in the `pattern()` function. This function should return a dictionary with two keys:
|
||||
`nodes` and `edges`:
|
||||
* The value for the `nodes` key is a list of tuples with two elements.
|
||||
* The first element is an alias name for a node which will be used to define edges between nodes and in the
|
||||
* The first element is an alias name for a node that will be used to define edges between nodes and in the
|
||||
transformation function.
|
||||
* The second element is a dictionary with attributes. The key is a name of an attribute which should exist in the
|
||||
node. The value for the attribute can be some specific value to match or a function which gets a single parameter -
|
||||
* The second element is a dictionary with attributes. The key is a name of an attribute that should exist in the
|
||||
node. The value for the attribute can be some specific value to match or a function that gets a single parameter -
|
||||
the attribute value from the node. The function should return the result of attribute comparison with a dedicated
|
||||
value.
|
||||
* The value for the `edges` key is a list of tuples with two or three elements.
|
||||
@@ -871,7 +870,7 @@ class MishFusion(FrontReplacementSubgraph):
|
||||
This type of transformation is implemented using `mo.front.common.replacement.FrontReplacementOp` as base class and
|
||||
works the following way.
|
||||
1. Developer defines an operation type to trigger the transformation.
|
||||
2. Model Optimizer search for all nodes in the graph with the attribute `op` equal to the specified value.
|
||||
2. Model Optimizer searches for all nodes in the graph with the attribute `op` equal to the specified value.
|
||||
3. Model Optimizer executes developer-defined function performing graph transformation for each instance of a matched
|
||||
node. Developer can override different functions in the base transformation class and the Model Optimizer works
|
||||
differently:
|
||||
@@ -921,7 +920,7 @@ class Pack(FrontReplacementOp):
|
||||
```
|
||||
|
||||
##### Generic Front Phase Transformations <a name="generic-front-phase-transformations"></a>
|
||||
Model Optimizer provides mechanism to implement generic front phase transformation. This type of transformation is
|
||||
Model Optimizer provides a mechanism to implement generic front phase transformation. This type of transformation is
|
||||
implemented using `mo.front.common.replacement.FrontReplacementSubgraph` or
|
||||
`mo.front.common.replacement.FrontReplacementPattern` as base classes. The only condition to execute the transformation
|
||||
is to check that it is enabled. Then the Model Optimizer executes the method `find_and_replace_pattern(self, graph)` and
|
||||
@@ -968,7 +967,7 @@ class SqueezeNormalize(FrontReplacementPattern):
|
||||
'attribute'.format(squeeze_node.soft_get('name')))
|
||||
```
|
||||
|
||||
Refer to the `mo/front/common/replacement.py` for the implementation details on how these front phase transformations
|
||||
Refer to `mo/front/common/replacement.py` for the implementation details on how these front phase transformations
|
||||
work.
|
||||
|
||||
##### Node Name Pattern Front Phase Transformations <a name="node-name-pattern-front-phase-transformations"></a>
|
||||
@@ -1104,10 +1103,10 @@ for more examples of this type of transformation.
|
||||
##### Front Phase Transformations Using Start and End Points <a name="start-end-points-front-phase-transformations"></a>
|
||||
This type of transformation is implemented using `mo.front.tf.replacement.FrontReplacementFromConfigFileSubGraph` as a
|
||||
base class and works the following way.
|
||||
1. Developer prepares a JSON configuration file which defines the sub-graph to match using two lists of node names:
|
||||
1. Developer prepares a JSON configuration file that defines the sub-graph to match using two lists of node names:
|
||||
"start" and "end" nodes.
|
||||
2. Model Optimizer executes developer-defined transformation **only** when an user specifies the path to the
|
||||
configuration file using the command line parameter `--transformations_config`.Model Optimizer performs the following
|
||||
2. Model Optimizer executes developer-defined transformation **only** when a user specifies the path to the
|
||||
configuration file using the command line parameter `--transformations_config`. Model Optimizer performs the following
|
||||
steps to match the sub-graph:
|
||||
1. Starts a graph traversal from every start node following the direction of the graph edges. The search stops in an
|
||||
end node or in case of a node without consumers. All visited nodes are added to the matched sub-graph.
|
||||
@@ -1115,9 +1114,9 @@ steps to match the sub-graph:
|
||||
"start" list. In this step the edges are traversed in the opposite edge direction. All newly visited nodes are added
|
||||
to the matched sub-graph. This step is needed to add nodes required for calculation values of internal nodes of the
|
||||
matched sub-graph.
|
||||
3. Checks that all "end" nodes were reached from "start" nodes. If no then exit with error.
|
||||
3. Checks that all "end" nodes were reached from "start" nodes. If no, exits with an error.
|
||||
4. Check that there are no [Parameter](../../../ops/infrastructure/Parameter_1.md) operations among added nodes. If
|
||||
they exist then the sub-graph depends on the inputs of the model. Such configuration is considered incorrect so the
|
||||
they exist, the sub-graph depends on the inputs of the model. Such configuration is considered incorrect so the
|
||||
Model Optimizer exits with an error.
|
||||
|
||||
This algorithm finds all nodes "between" start and end nodes and nodes needed for calculation of non-input nodes of the
|
||||
@@ -1160,7 +1159,7 @@ The example of a JSON configuration file for a transformation with start and end
|
||||
|
||||
The format of the file is similar to the one provided as an example in the
|
||||
[Node Name Pattern Front Phase Transformations](#node-name-pattern-front-phase-transformations). There difference is in
|
||||
the value of the `match_kind` parameter which should be equal to `points` and the format of the `instances` parameter
|
||||
the value of the `match_kind` parameter, which should be equal to `points` and the format of the `instances` parameter
|
||||
which should be a dictionary with two keys `start_points` and `end_points` defining start and end node names
|
||||
correspondingly.
|
||||
|
||||
@@ -1168,7 +1167,7 @@ correspondingly.
|
||||
> always equal to `true`.
|
||||
|
||||
> **NOTE**: This sub-graph match algorithm has a limitation that each start node must have only one input. Therefore, it
|
||||
> is not possible to specify, for example, [Convolution](../../../ops/convolution/Convolution_1.md) node as input
|
||||
> is not possible to specify, for example, the [Convolution](../../../ops/convolution/Convolution_1.md) node as input
|
||||
> because it has two inputs: data tensor and tensor with weights.
|
||||
|
||||
For other examples of transformations with points, please refer to the
|
||||
@@ -1259,7 +1258,7 @@ graph structure changes.
|
||||
Refer to the `extensions/middle/L2NormToNorm.py` for the example of a pattern-defined middle transformation.
|
||||
|
||||
##### Generic Middle Phase Transformations <a name="generic-middle-phase-transformations"></a>
|
||||
Model Optimizer provides mechanism to implement generic middle phase transformations. This type of transformation is
|
||||
Model Optimizer provides a mechanism to implement generic middle phase transformations. This type of transformation is
|
||||
implemented using `mo.middle.replacement.MiddleReplacementPattern` as a base class and works similarly to the
|
||||
[Generic Front Phase Transformations](#generic-front-phase-transformations). The only difference is that the
|
||||
transformation entry function name is `find_and_replace_pattern(self, graph: Graph)`.
|
||||
@@ -1290,7 +1289,7 @@ implemented using `mo.back.replacement.BackReplacementPattern` as a base class a
|
||||
|
||||
Refer to the `extensions/back/GatherNormalizer.py` for the example of a such type of transformation.
|
||||
|
||||
## See Also
|
||||
## See Also <a name="see-also"></a>
|
||||
* [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../../IR_and_opsets.md)
|
||||
* [Converting a Model to Intermediate Representation (IR)](../convert_model/Converting_Model.md)
|
||||
* [nGraph Basic Concepts](@ref openvino_docs_nGraph_DG_basic_concepts)
|
||||
|
||||
@@ -1,261 +1,12 @@
|
||||
# Get a Deep Learning Model Performance Boost with Intel® Platforms {#openvino_docs_performance_benchmarks}
|
||||
# Performance Benchmarks {#openvino_docs_performance_benchmarks}
|
||||
|
||||
## Increase Performance for Deep Learning Inference
|
||||
The [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html) helps accelerate deep learning inference across a variety of Intel® processors and accelerators.
|
||||
|
||||
The [Intel® Distribution of OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit) helps accelerate deep learning inference across a variety of Intel® processors and accelerators. Rather than a one-size-fits-all solution, Intel offers a powerful portfolio of scalable hardware and software solutions, powered by the Intel® Distribution of OpenVINO™ toolkit, to meet the various performance, power, and price requirements of any use case. The benchmarks below demonstrate high performance gains on several public neural networks for a streamlined, quick deployment on **Intel® CPU and VPU** platforms. Use this data to help you decide which hardware is best for your applications and solutions, or to plan your AI workload on the Intel computing already included in your solutions.
|
||||
The benchmarks below demonstrate high performance gains on several public neural networks on multiple Intel® CPUs, GPUs and VPUs covering a broad performance range. Use this data to help you decide which hardware is best for your applications and solutions, or to plan your AI workload on the Intel computing already included in your solutions.
|
||||
|
||||
Measuring inference performance involves many variables and is extremely use-case and application dependent. We use the below four parameters for measurements, which are key elements to consider for a successful deep learning inference application:
|
||||
Use the links below to review the benchmarking results for each alternative:
|
||||
|
||||
1. **Throughput** - Measures the number of inferences delivered within a latency threshold. (for example, number of Frames Per Second - FPS). When deploying a system with deep learning inference, select the throughput that delivers the best trade-off between latency and power for the price and performance that meets your requirements.
|
||||
2. **Value** - While throughput is important, what is more critical in edge AI deployments is the performance efficiency or performance-per-cost. Application performance in throughput per dollar of system cost is the best measure of value.
|
||||
3. **Efficiency** - System power is a key consideration from the edge to the data center. When selecting deep learning solutions, power efficiency (throughput/watt) is a critical factor to consider. Intel designs provide excellent power efficiency for running deep learning workloads.
|
||||
4. **Latency** - This measures the synchronous execution of inference requests and is reported in milliseconds. Each inference request (for example: preprocess, infer, postprocess) is allowed to complete before the next is started. This performance metric is relevant in usage scenarios where a single image input needs to be acted upon as soon as possible. An example would be the healthcare sector where medical personnel only request analysis of a single ultra sound scanning image or in real-time or near real-time applications for example an industrial robot's response to actions in its environment or obstacle avoidance for autonomous vehicles.
|
||||
* [Intel® Distribution of OpenVINO™ toolkit Benchmark Results](performance_benchmarks_openvino.md)
|
||||
* [OpenVINO™ Model Server Benchmark Results](performance_benchmarks_ovms.md)
|
||||
|
||||
\htmlonly
|
||||
<!-- these CDN links and scripts are required. Add them to the <head> of your website -->
|
||||
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@100;300;400;500;600;700;900&display=swap" rel="stylesheet" type="text/css">
|
||||
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css" type="text/css">
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js@2.9.3/dist/Chart.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-datalabels"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/chartjs-plugin-annotation/0.5.7/chartjs-plugin-annotation.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-barchart-background@1.3.0/build/Plugin.Barchart.Background.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-deferred@1"></script>
|
||||
<!-- download this file and place on your server (or include the styles inline) -->
|
||||
<link rel="stylesheet" href="ovgraphs.css" type="text/css">
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
\htmlonly
|
||||
<script src="bert-large-uncased-whole-word-masking-squad-int8-0001-ov-2021-2-185.js" id="bert-large-uncased-whole-word-masking-squad-int8-0001-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="deeplabv3-tf-ov-2021-2-185.js" id="deeplabv3-tf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="densenet-121-tf-ov-2021-2-185.js" id="densenet-121-tf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="faster-rcnn-resnet50-coco-tf-ov-2021-2-185.js" id="faster-rcnn-resnet50-coco-tf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="googlenet-v1-tf-ov-2021-2-185.js" id="googlenet-v1-tf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="inception-v3-tf-ov-2021-2-185.js" id="inception-v3-tf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="mobilenet-ssd-cf-ov-2021-2-185.js" id="mobilenet-ssd-cf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="mobilenet-v1-1-0-224-tf-ov-2021-2-185.js" id="mobilenet-v1-1-0-224-tf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="mobilenet-v2-pytorch-ov-2021-2-185.js" id="mobilenet-v2-pytorch-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="resnet-18-pytorch-ov-2021-2-185.js" id="resnet-18-pytorch-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="resnet-50-tf-ov-2021-2-185.js" id="resnet-50-tf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
\htmlonly
|
||||
<script src="se-resnext-50-cf-ov-2021-2-185.js" id="se-resnext-50-cf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="squeezenet1-1-cf-ov-2021-2-185.js" id="squeezenet1-1-cf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
\htmlonly
|
||||
<script src="ssd300-cf-ov-2021-2-185.js" id="ssd300-cf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="yolo-v3-tf-ov-2021-2-185.js" id="yolo-v3-tf-ov-2021-2-185"></script>
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
## Platform Configurations
|
||||
|
||||
Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are based on release 2021.2.
|
||||
|
||||
Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of December 9, 2020 and may not reflect all publicly available updates. See configuration disclosure for details. No product can be absolutely secure.
|
||||
|
||||
Performance varies by use, configuration and other factors. Learn more at [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex).
|
||||
|
||||
Your costs and results may vary.
|
||||
|
||||
© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others.
|
||||
|
||||
Intel optimizations, for Intel compilers or other products, may not optimize to the same degree for non-Intel products.
|
||||
|
||||
Testing by Intel done on: see test date for each HW platform below.
|
||||
|
||||
**CPU Inference Engines**
|
||||
|
||||
| | Intel® Xeon® E-2124G | Intel® Xeon® W1290P | Intel® Xeon® Silver 4216R |
|
||||
| ------------------------------- | ---------------------- | --------------------------- | ---------------------------- |
|
||||
| Motherboard | ASUS* WS C246 PRO | ASUS* WS W480-ACE | Intel® Server Board S2600STB |
|
||||
| CPU | Intel® Xeon® E-2124G CPU @ 3.40GHz | Intel® Xeon® W-1290P CPU @ 3.70GHz | Intel® Xeon® Silver 4216R CPU @ 2.20GHz |
|
||||
| Hyper Threading | OFF | ON | ON |
|
||||
| Turbo Setting | ON | ON | ON |
|
||||
| Memory | 2 x 16 GB DDR4 2666MHz | 4 x 16 GB DDR4 @ 2666MHz |12 x 32 GB DDR4 2666MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc. | Intel Corporation |
|
||||
| BIOS Version | 0904 | 607 | SE5C620.86B.02.01.<br>0009.092820190230 |
|
||||
| BIOS Release | April 12, 2019 | May 29, 2020 | September 28, 2019 |
|
||||
| BIOS Settings | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>change power policy <br>to "performance", <br>save & exit |
|
||||
| Batch size | 1 | 1 | 1
|
||||
| Precision | INT8 | INT8 | INT8
|
||||
| Number of concurrent inference requests | 4 | 5 | 32
|
||||
| Test Date | December 9, 2020 | December 9, 2020 | December 9, 2020
|
||||
| Power dissipation, TDP in Watt | [71](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html#tab-blade-1-0-1) | [125](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) | [125](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) |
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [213](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html) | [539](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) |[1,002](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html) |
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
| | Intel® Xeon® Gold 5218T | Intel® Xeon® Platinum 8270 |
|
||||
| ------------------------------- | ---------------------------- | ---------------------------- |
|
||||
| Motherboard | Intel® Server Board S2600STB | Intel® Server Board S2600STB |
|
||||
| CPU | Intel® Xeon® Gold 5218T CPU @ 2.10GHz | Intel® Xeon® Platinum 8270 CPU @ 2.70GHz |
|
||||
| Hyper Threading | ON | ON |
|
||||
| Turbo Setting | ON | ON |
|
||||
| Memory | 12 x 32 GB DDR4 2666MHz | 12 x 32 GB DDR4 2933MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | Intel Corporation | Intel Corporation |
|
||||
| BIOS Version | SE5C620.86B.02.01.<br>0009.092820190230 | SE5C620.86B.02.01.<br>0009.092820190230 |
|
||||
| BIOS Release | September 28, 2019 | September 28, 2019 |
|
||||
| BIOS Settings | Select optimized default settings, <br>change power policy to "performance", <br>save & exit | Select optimized default settings, <br>change power policy to "performance", <br>save & exit |
|
||||
| Batch size | 1 | 1 |
|
||||
| Precision | INT8 | INT8 |
|
||||
| Number of concurrent inference requests |32 | 52 |
|
||||
| Test Date | December 9, 2020 | December 9, 2020 |
|
||||
| Power dissipation, TDP in Watt | [105](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | [205](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html#tab-blade-1-0-1) |
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [1,349](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html) | [7,405](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html) |
|
||||
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
| | Intel® Core™ i7-8700T | Intel® Core™ i9-10920X | Intel® Core™ i9-10900TE<br>(iEi Flex BX210AI)| 11th Gen Intel® Core™ i7-1185G7 |
|
||||
| -------------------- | ----------------------------------- |--------------------------------------| ---------------------------------------------|---------------------------------|
|
||||
| Motherboard | GIGABYTE* Z370M DS3H-CF | ASUS* PRIME X299-A II | iEi / B595 | Intel Corporation<br>internal/Reference<br>Validation Platform |
|
||||
| CPU | Intel® Core™ i7-8700T CPU @ 2.40GHz | Intel® Core™ i9-10920X CPU @ 3.50GHz | Intel® Core™ i9-10900TE CPU @ 1.80GHz | 11th Gen Intel® Core™ i7-1185G7 @ 3.00GHz |
|
||||
| Hyper Threading | ON | ON | ON | ON |
|
||||
| Turbo Setting | ON | ON | ON | ON |
|
||||
| Memory | 4 x 16 GB DDR4 2400MHz | 4 x 16 GB DDR4 2666MHz | 2 x 8 GB DDR4 @ 2400MHz | 2 x 8 GB DDR4 3200MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.8.0-05-generic | 5.8.0-05-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | American Megatrends Inc.* | Intel Corporation |
|
||||
| BIOS Version | F11 | 505 | Z667AR10 | TGLSFWI1.R00.3425.<br>A00.2010162309 |
|
||||
| BIOS Release | March 13, 2019 | December 17, 2019 | July 15, 2020 | October 16, 2020 |
|
||||
| BIOS Settings | Select optimized default settings, <br>set OS type to "other", <br>save & exit | Default Settings | Default Settings | Default Settings |
|
||||
| Batch size | 1 | 1 | 1 | 1 |
|
||||
| Precision | INT8 | INT8 | INT8 | INT8 |
|
||||
| Number of concurrent inference requests |4 | 24 | 5 | 4 |
|
||||
| Test Date | December 9, 2020 | December 9, 2020 | December 9, 2020 | December 9, 2020 |
|
||||
| Power dissipation, TDP in Watt | [35](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html#tab-blade-1-0-1) | [165](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [35](https://ark.intel.com/content/www/us/en/ark/products/203901/intel-core-i9-10900te-processor-20m-cache-up-to-4-60-ghz.html) | [28](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html#tab-blade-1-0-1) |
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [303](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html) | [700](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [444](https://ark.intel.com/content/www/us/en/ark/products/203901/intel-core-i9-10900te-processor-20m-cache-up-to-4-60-ghz.html) | [426](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html#tab-blade-1-0-0) |
|
||||
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
| | Intel® Core™ i5-8500 | Intel® Core™ i5-10500TE | Intel® Core™ i5-10500TE<br>(iEi Flex-BX210AI)|
|
||||
| -------------------- | ---------------------------------- | ----------------------------------- |-------------------------------------- |
|
||||
| Motherboard | ASUS* PRIME Z370-A | GIGABYTE* Z490 AORUS PRO AX | iEi / B595 |
|
||||
| CPU | Intel® Core™ i5-8500 CPU @ 3.00GHz | Intel® Core™ i5-10500TE CPU @ 2.30GHz | Intel® Core™ i5-10500TE CPU @ 2.30GHz |
|
||||
| Hyper Threading | OFF | ON | ON |
|
||||
| Turbo Setting | ON | ON | ON |
|
||||
| Memory | 2 x 16 GB DDR4 2666MHz | 2 x 16 GB DDR4 @ 2666MHz | 1 x 8 GB DDR4 @ 2400MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | American Megatrends Inc.* |
|
||||
| BIOS Version | 2401 | F3 | Z667AR10 |
|
||||
| BIOS Release | July 12, 2019 | March 25, 2020 | July 17, 2020 |
|
||||
| BIOS Settings | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>set OS type to "other", <br>save & exit | Default Settings |
|
||||
| Batch size | 1 | 1 | 1 |
|
||||
| Precision | INT8 | INT8 | INT8 |
|
||||
| Number of concurrent inference requests | 3 | 4 | 4 |
|
||||
| Test Date | December 9, 2020 | December 9, 2020 | December 9, 2020 |
|
||||
| Power dissipation, TDP in Watt | [65](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html#tab-blade-1-0-1)| [35](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | [35](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) |
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [192](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html) | [195](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | [195](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) |
|
||||
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
| | Intel Atom® x5-E3940 | Intel® Core™ i3-8100 |
|
||||
| -------------------- | ---------------------------------- |----------------------------------- |
|
||||
| Motherboard | | GIGABYTE* Z390 UD |
|
||||
| CPU | Intel Atom® Processor E3940 @ 1.60GHz | Intel® Core™ i3-8100 CPU @ 3.60GHz |
|
||||
| Hyper Threading | OFF | OFF |
|
||||
| Turbo Setting | ON | OFF |
|
||||
| Memory | 1 x 8 GB DDR3 1600MHz | 4 x 8 GB DDR4 2400MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* |
|
||||
| BIOS Version | 5.12 | F8 |
|
||||
| BIOS Release | September 6, 2017 | May 24, 2019 |
|
||||
| BIOS Settings | Default settings | Select optimized default settings, <br> set OS type to "other", <br>save & exit |
|
||||
| Batch size | 1 | 1 |
|
||||
| Precision | INT8 | INT8 |
|
||||
| Number of concurrent inference requests | 4 | 4 |
|
||||
| Test Date | December 9, 2020 | December 9, 2020 |
|
||||
| Power dissipation, TDP in Watt | [9.5](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) | [65](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html#tab-blade-1-0-1)|
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [34](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) | [117](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html) |
|
||||
|
||||
|
||||
|
||||
**Accelerator Inference Engines**
|
||||
|
||||
| | Intel® Neural Compute Stick 2 | Intel® Vision Accelerator Design<br>with Intel® Movidius™ VPUs (Mustang-V100-MX8) |
|
||||
| --------------------------------------- | ------------------------------------- | ------------------------------------- |
|
||||
| VPU | 1 X Intel® Movidius™ Myriad™ X MA2485 | 8 X Intel® Movidius™ Myriad™ X MA2485 |
|
||||
| Connection | USB 2.0/3.0 | PCIe X4 |
|
||||
| Batch size | 1 | 1 |
|
||||
| Precision | FP16 | FP16 |
|
||||
| Number of concurrent inference requests | 4 | 32 |
|
||||
| Power dissipation, TDP in Watt | 2.5 | [30](https://www.mouser.com/ProductDetail/IEI/MUSTANG-V100-MX8-R10?qs=u16ybLDytRaZtiUUvsd36w%3D%3D) |
|
||||
| CPU Price, USD<br>Prices may vary | [69](https://ark.intel.com/content/www/us/en/ark/products/140109/intel-neural-compute-stick-2.html) (from December 9, 2020) | [214](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB) (from December 9, 2020) |
|
||||
| Host Computer | Intel® Core™ i7 | Intel® Core™ i5 |
|
||||
| Motherboard | ASUS* Z370-A II | Uzelinfo* / US-E1300 |
|
||||
| CPU | Intel® Core™ i7-8700 CPU @ 3.20GHz | Intel® Core™ i5-6600 CPU @ 3.30GHz |
|
||||
| Hyper Threading | ON | OFF |
|
||||
| Turbo Setting | ON | ON |
|
||||
| Memory | 4 x 16 GB DDR4 2666MHz | 2 x 16 GB DDR4 2400MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.0.0-23-generic | 5.0.0-23-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* |
|
||||
| BIOS Version | 411 | 5.12 |
|
||||
| BIOS Release | September 21, 2018 | September 21, 2018 |
|
||||
| Test Date | December 9, 2020 | December 9, 2020 |
|
||||
|
||||
Please follow this link for more detailed configuration descriptions: [Configuration Details](https://docs.openvinotoolkit.org/resources/benchmark_files/system_configurations_2021.2.html)
|
||||
|
||||
\htmlonly
|
||||
<style>
|
||||
.footer {
|
||||
display: none;
|
||||
}
|
||||
</style>
|
||||
<div class="opt-notice-wrapper">
|
||||
<p class="opt-notice">
|
||||
\endhtmlonly
|
||||
Results may vary. For workloads and configurations visit: [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex) and [Legal Information](../Legal_Information.md).
|
||||
\htmlonly
|
||||
</p>
|
||||
</div>
|
||||
\endhtmlonly
|
||||
Performance for a particular application can also be evaluated virtually using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/).
|
||||
|
||||
@@ -39,8 +39,10 @@ The image size used in the inference depends on the network being benchmarked. T
|
||||
| [squeezenet1.1-CF](https://github.com/opencv/open_model_zoo/tree/master/models/public/squeezenet1.1) | SqueezeNet_v1.1_ILSVRC-2012_Caffe | classification | 227x227 |
|
||||
| [ssd300-CF](https://github.com/opencv/open_model_zoo/tree/master/models/public/ssd300) | SSD (VGG-16)_VOC-2007_Caffe | object detection | 300x300 |
|
||||
| [yolo_v3-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v3-tf) | TF Keras YOLO v3 Modelset | object detection | 300x300 |
|
||||
| [yolo_v4-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/yolo-v4-tf) | Yolo-V4 TF | object detection | 608x608 |
|
||||
| [ssd_mobilenet_v1_coco-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssd_mobilenet_v1_coco) | ssd_mobilenet_v1_coco | object detection | 300x300 |
|
||||
| [ssdlite_mobilenet_v2-TF](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/ssdlite_mobilenet_v2) | ssd_mobilenet_v2 | object detection | 300x300 |
|
||||
| [unet-camvid-onnx-0001](https://github.com/openvinotoolkit/open_model_zoo/blob/master/models/intel/unet-camvid-onnx-0001/description/unet-camvid-onnx-0001.md) | U-Net | semantic segmentation | 368x480 |
|
||||
|
||||
#### 7. Where can I purchase the specific hardware used in the benchmarking?
|
||||
Intel partners with various vendors all over the world. Visit the [Intel® AI: In Production Partners & Solutions Catalog](https://www.intel.com/content/www/us/en/internet-of-things/ai-in-production/partners-solutions-catalog.html) for a list of Equipment Makers and the [Supported Devices](../IE_DG/supported_plugins/Supported_Devices.md) documentation. You can also remotely test and run models before purchasing any hardware by using [Intel® DevCloud for the Edge](http://devcloud.intel.com/edge/).
|
||||
|
||||
272
docs/benchmarks/performance_benchmarks_openvino.md
Normal file
272
docs/benchmarks/performance_benchmarks_openvino.md
Normal file
@@ -0,0 +1,272 @@
|
||||
# Intel® Distribution of OpenVINO™ toolkit Benchmark Results {#openvino_docs_performance_benchmarks_openvino}
|
||||
|
||||
This benchmark setup includes a single machine on which both the benchmark application and the OpenVINO™ installation reside.
|
||||
|
||||
The benchmark application loads the Inference Engine (SW) at run time and executes inferences on the specified hardware inference engine, (CPU, GPU or VPU). The benchmark application measures the time spent on actual inferencing (excluding any pre or post processing) and then reports on the inferences per second (or Frames Per Second). For more information on the benchmark application, please also refer to the entry 5 of the [FAQ section](performance_benchmarks_faq.md).
|
||||
|
||||
Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/).
|
||||
|
||||
Measuring inference performance involves many variables and is extremely use-case and application dependent. We use the below four parameters for measurements, which are key elements to consider for a successful deep learning inference application:
|
||||
|
||||
- **Throughput** - Measures the number of inferences delivered within a latency threshold. (for example, number of Frames Per Second - FPS). When deploying a system with deep learning inference, select the throughput that delivers the best trade-off between latency and power for the price and performance that meets your requirements.
|
||||
- **Value** - While throughput is important, what is more critical in edge AI deployments is the performance efficiency or performance-per-cost. Application performance in throughput per dollar of system cost is the best measure of value.
|
||||
- **Efficiency** - System power is a key consideration from the edge to the data center. When selecting deep learning solutions, power efficiency (throughput/watt) is a critical factor to consider. Intel designs provide excellent power efficiency for running deep learning workloads.
|
||||
- **Latency** - This measures the synchronous execution of inference requests and is reported in milliseconds. Each inference request (for example: preprocess, infer, postprocess) is allowed to complete before the next is started. This performance metric is relevant in usage scenarios where a single image input needs to be acted upon as soon as possible. An example would be the healthcare sector where medical personnel only request analysis of a single ultra sound scanning image or in real-time or near real-time applications for example an industrial robot's response to actions in its environment or obstacle avoidance for autonomous vehicles.
|
||||
|
||||
|
||||
\htmlonly
|
||||
<!-- these CDN links and scripts are required. Add them to the <head> of your website -->
|
||||
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@100;300;400;500;600;700;900&display=swap" rel="stylesheet" type="text/css">
|
||||
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css" type="text/css">
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js@2.9.3/dist/Chart.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-datalabels"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/chartjs-plugin-annotation/0.5.7/chartjs-plugin-annotation.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-barchart-background@1.3.0/build/Plugin.Barchart.Background.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-deferred@1"></script>
|
||||
<!-- download this file and place on your server (or include the styles inline) -->
|
||||
<link rel="stylesheet" href="ovgraphs.css" type="text/css">
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
\htmlonly
|
||||
<script src="bert-large-uncased-whole-word-masking-squad-int8-0001-ov-2021-3-338-5.js" id="bert-large-uncased-whole-word-masking-squad-int8-0001-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="deeplabv3-tf-ov-2021-3-338-5.js" id="deeplabv3-tf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="densenet-121-tf-ov-2021-3-338-5.js" id="densenet-121-tf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="faster-rcnn-resnet50-coco-tf-ov-2021-3-338-5.js" id="faster-rcnn-resnet50-coco-tf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="googlenet-v1-tf-ov-2021-3-338-5.js" id="googlenet-v1-tf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="inception-v3-tf-ov-2021-3-338-5.js" id="inception-v3-tf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="mobilenet-ssd-cf-ov-2021-3-338-5.js" id="mobilenet-ssd-cf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="mobilenet-v1-1-0-224-tf-ov-2021-3-338-5.js" id="mobilenet-v1-1-0-224-tf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="mobilenet-v2-pytorch-ov-2021-3-338-5.js" id="mobilenet-v2-pytorch-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="resnet-18-pytorch-ov-2021-3-338-5.js" id="resnet-18-pytorch-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="resnet-50-tf-ov-2021-3-338-5.js" id="resnet-50-tf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
\htmlonly
|
||||
<script src="se-resnext-50-cf-ov-2021-3-338-5.js" id="se-resnext-50-cf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="squeezenet1-1-cf-ov-2021-3-338-5.js" id="squeezenet1-1-cf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
\htmlonly
|
||||
<script src="ssd300-cf-ov-2021-3-338-5.js" id="ssd300-cf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="yolo-v3-tf-ov-2021-3-338-5.js" id="yolo-v3-tf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="yolo-v4-tf-ov-2021-3-338-5.js" id="yolo-v4-tf-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="unet-camvid-onnx-0001-ov-2021-3-338-5.js" id="unet-camvid-onnx-0001-ov-2021-3-338-5"></script>
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
## Platform Configurations
|
||||
|
||||
Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are based on release 2021.3.
|
||||
|
||||
Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of March 15, 2021 and may not reflect all publicly available updates. See configuration disclosure for details. No product can be absolutely secure.
|
||||
|
||||
Performance varies by use, configuration and other factors. Learn more at [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex).
|
||||
|
||||
Your costs and results may vary.
|
||||
|
||||
© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others.
|
||||
|
||||
Intel optimizations, for Intel compilers or other products, may not optimize to the same degree for non-Intel products.
|
||||
|
||||
Testing by Intel done on: see test date for each HW platform below.
|
||||
|
||||
**CPU Inference Engines**
|
||||
|
||||
| | Intel® Xeon® E-2124G | Intel® Xeon® W1290P | Intel® Xeon® Silver 4216R |
|
||||
| ------------------------------- | ---------------------- | --------------------------- | ---------------------------- |
|
||||
| Motherboard | ASUS* WS C246 PRO | ASUS* WS W480-ACE | Intel® Server Board S2600STB |
|
||||
| CPU | Intel® Xeon® E-2124G CPU @ 3.40GHz | Intel® Xeon® W-1290P CPU @ 3.70GHz | Intel® Xeon® Silver 4216R CPU @ 2.20GHz |
|
||||
| Hyper Threading | OFF | ON | ON |
|
||||
| Turbo Setting | ON | ON | ON |
|
||||
| Memory | 2 x 16 GB DDR4 2666MHz | 4 x 16 GB DDR4 @ 2666MHz |12 x 32 GB DDR4 2666MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc. | Intel Corporation |
|
||||
| BIOS Version | 0904 | 607 | SE5C620.86B.02.01.<br>0009.092820190230 |
|
||||
| BIOS Release | April 12, 2019 | May 29, 2020 | September 28, 2019 |
|
||||
| BIOS Settings | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>change power policy <br>to "performance", <br>save & exit |
|
||||
| Batch size | 1 | 1 | 1
|
||||
| Precision | INT8 | INT8 | INT8
|
||||
| Number of concurrent inference requests | 4 | 5 | 32
|
||||
| Test Date | March 15, 2021 | March 15, 2021 | March 15, 2021
|
||||
| Power dissipation, TDP in Watt | [71](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html#tab-blade-1-0-1) | [125](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) | [125](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) |
|
||||
| CPU Price on Mach 15th, 2021, USD<br>Prices may vary | [213](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html) | [539](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) |[1,002](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html) |
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
| | Intel® Xeon® Gold 5218T | Intel® Xeon® Platinum 8270 | Intel® Xeon® Platinum 8380 |
|
||||
| ------------------------------- | ---------------------------- | ---------------------------- | -----------------------------------------|
|
||||
| Motherboard | Intel® Server Board S2600STB | Intel® Server Board S2600STB | Intel Corporation / WilsonCity |
|
||||
| CPU | Intel® Xeon® Gold 5218T CPU @ 2.10GHz | Intel® Xeon® Platinum 8270 CPU @ 2.70GHz | Intel® Xeon® Platinum 8380 CPU @ 2.30GHz |
|
||||
| Hyper Threading | ON | ON | ON |
|
||||
| Turbo Setting | ON | ON | ON |
|
||||
| Memory | 12 x 32 GB DDR4 2666MHz | 12 x 32 GB DDR4 2933MHz | 16 x 16 GB DDR4 3200MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | Intel Corporation | Intel Corporation | Intel Corporation |
|
||||
| BIOS Version | SE5C620.86B.02.01.<br>0009.092820190230 | SE5C620.86B.02.01.<br>0009.092820190230 | WLYDCRB1.SYS.0020.<br>P86.2103050636 |
|
||||
| BIOS Release | September 28, 2019 | September 28, 2019 | March 5, 2021 |
|
||||
| BIOS Settings | Select optimized default settings, <br>change power policy to "performance", <br>save & exit | Select optimized default settings, <br>change power policy to "performance", <br>save & exit | Select optimized default settings, <br>change power policy to "performance", <br>save & exit |
|
||||
| Batch size | 1 | 1 | 1 |
|
||||
| Precision | INT8 | INT8 | INT8 |
|
||||
| Number of concurrent inference requests |32 | 52 | 80 |
|
||||
| Test Date | March 15, 2021 | March 15, 2021 | March 22, 2021 |
|
||||
| Power dissipation, TDP in Watt | [105](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | [205](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html#tab-blade-1-0-1) | [270](https://ark.intel.com/content/www/us/en/ark/products/212287/intel-xeon-platinum-8380-processor-60m-cache-2-30-ghz.html) |
|
||||
| CPU Price, USD<br>Prices may vary | [1,349](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html) (on Mach 15th, 2021) | [7,405](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html) (on Mach 15th, 2021) | [8,099](https://ark.intel.com/content/www/us/en/ark/products/212287/intel-xeon-platinum-8380-processor-60m-cache-2-30-ghz.html) (on March 26th, 2021) |
|
||||
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
| | Intel® Core™ i7-8700T | Intel® Core™ i9-10920X | 11th Gen Intel® Core™ i7-1185G7 |
|
||||
| -------------------- | ----------------------------------- |--------------------------------------| --------------------------------|
|
||||
| Motherboard | GIGABYTE* Z370M DS3H-CF | ASUS* PRIME X299-A II | Intel Corporation<br>internal/Reference<br>Validation Platform |
|
||||
| CPU | Intel® Core™ i7-8700T CPU @ 2.40GHz | Intel® Core™ i9-10920X CPU @ 3.50GHz | 11th Gen Intel® Core™ i7-1185G7 @ 3.00GHz |
|
||||
| Hyper Threading | ON | ON | ON |
|
||||
| Turbo Setting | ON | ON | ON |
|
||||
| Memory | 4 x 16 GB DDR4 2400MHz | 4 x 16 GB DDR4 2666MHz | 2 x 8 GB DDR4 3200MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.8.0-05-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | Intel Corporation |
|
||||
| BIOS Version | F11 | 505 | TGLSFWI1.R00.3425.<br>A00.2010162309 |
|
||||
| BIOS Release | March 13, 2019 | December 17, 2019 | October 16, 2020 |
|
||||
| BIOS Settings | Select optimized default settings, <br>set OS type to "other", <br>save & exit | Default Settings | Default Settings |
|
||||
| Batch size | 1 | 1 | 1 |
|
||||
| Precision | INT8 | INT8 | INT8 |
|
||||
| Number of concurrent inference requests |4 | 24 | 4 |
|
||||
| Test Date | March 15, 2021 | March 15, 2021 | March 15, 2021 |
|
||||
| Power dissipation, TDP in Watt | [35](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html#tab-blade-1-0-1) | [165](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [28](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html#tab-blade-1-0-1) |
|
||||
| CPU Price on Mach 15th, 2021, USD<br>Prices may vary | [303](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html) | [700](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [426](https://ark.intel.com/content/www/us/en/ark/products/208664/intel-core-i7-1185g7-processor-12m-cache-up-to-4-80-ghz-with-ipu.html#tab-blade-1-0-0) |
|
||||
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
| | Intel® Core™ i5-8500 | Intel® Core™ i5-10500TE |
|
||||
| -------------------- | ---------------------------------- | ----------------------------------- |
|
||||
| Motherboard | ASUS* PRIME Z370-A | GIGABYTE* Z490 AORUS PRO AX |
|
||||
| CPU | Intel® Core™ i5-8500 CPU @ 3.00GHz | Intel® Core™ i5-10500TE CPU @ 2.30GHz |
|
||||
| Hyper Threading | OFF | ON |
|
||||
| Turbo Setting | ON | ON |
|
||||
| Memory | 2 x 16 GB DDR4 2666MHz | 2 x 16 GB DDR4 @ 2666MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* |
|
||||
| BIOS Version | 2401 | F3 |
|
||||
| BIOS Release | July 12, 2019 | March 25, 2020 |
|
||||
| BIOS Settings | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>set OS type to "other", <br>save & exit |
|
||||
| Batch size | 1 | 1 |
|
||||
| Precision | INT8 | INT8 |
|
||||
| Number of concurrent inference requests | 3 | 4 |
|
||||
| Test Date | March 15, 2021 | March 15, 2021 |
|
||||
| Power dissipation, TDP in Watt | [65](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html#tab-blade-1-0-1)| [35](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) |
|
||||
| CPU Price on Mach 15th, 2021, USD<br>Prices may vary | [192](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html) | [195](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) |
|
||||
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
| | Intel Atom® x5-E3940 | Intel Atom® x6425RE | Intel® Core™ i3-8100 |
|
||||
| -------------------- | --------------------------------------|------------------------------- |----------------------------------- |
|
||||
| Motherboard | | Intel Corporation /<br>ElkhartLake LPDDR4x T3 CRB | GIGABYTE* Z390 UD |
|
||||
| CPU | Intel Atom® Processor E3940 @ 1.60GHz | Intel Atom® x6425RE<br>Processor @ 1.90GHz | Intel® Core™ i3-8100 CPU @ 3.60GHz |
|
||||
| Hyper Threading | OFF | OFF | OFF |
|
||||
| Turbo Setting | ON | ON | OFF |
|
||||
| Memory | 1 x 8 GB DDR3 1600MHz | 2 x 4GB DDR4 3200 MHz | 4 x 8 GB DDR4 2400MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.8.0-050800-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | Intel Corporation | American Megatrends Inc.* |
|
||||
| BIOS Version | 5.12 | EHLSFWI1.R00.2463.<br>A03.2011200425 | F8 |
|
||||
| BIOS Release | September 6, 2017 | November 22, 2020 | May 24, 2019 |
|
||||
| BIOS Settings | Default settings | Default settings | Select optimized default settings, <br> set OS type to "other", <br>save & exit |
|
||||
| Batch size | 1 | 1 | 1 |
|
||||
| Precision | INT8 | INT8 | INT8 |
|
||||
| Number of concurrent inference requests | 4 | 4 | 4 |
|
||||
| Test Date | March 15, 2021 | March 15, 2021 | March 15, 2021 |
|
||||
| Power dissipation, TDP in Watt | [9.5](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) | [12](https://ark.intel.com/content/www/us/en/ark/products/207899/intel-atom-x6425re-processor-1-5m-cache-1-90-ghz.html) | [65](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html#tab-blade-1-0-1)|
|
||||
| CPU Price, USD<br>Prices may vary | [34](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) (on March 15th, 2021) | [59](https://ark.intel.com/content/www/us/en/ark/products/207899/intel-atom-x6425re-processor-1-5m-cache-1-90-ghz.html) (on March 26th, 2021) | [117](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html) (on March 15th, 2021) |
|
||||
|
||||
|
||||
|
||||
**Accelerator Inference Engines**
|
||||
|
||||
| | Intel® Neural Compute Stick 2 | Intel® Vision Accelerator Design<br>with Intel® Movidius™ VPUs (Mustang-V100-MX8) |
|
||||
| --------------------------------------- | ------------------------------------- | ------------------------------------- |
|
||||
| VPU | 1 X Intel® Movidius™ Myriad™ X MA2485 | 8 X Intel® Movidius™ Myriad™ X MA2485 |
|
||||
| Connection | USB 2.0/3.0 | PCIe X4 |
|
||||
| Batch size | 1 | 1 |
|
||||
| Precision | FP16 | FP16 |
|
||||
| Number of concurrent inference requests | 4 | 32 |
|
||||
| Power dissipation, TDP in Watt | 2.5 | [30](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB) |
|
||||
| CPU Price, USD<br>Prices may vary | [69](https://ark.intel.com/content/www/us/en/ark/products/140109/intel-neural-compute-stick-2.html) (from March 15, 2021) | [1180](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB) (from March 15, 2021) |
|
||||
| Host Computer | Intel® Core™ i7 | Intel® Core™ i5 |
|
||||
| Motherboard | ASUS* Z370-A II | Uzelinfo* / US-E1300 |
|
||||
| CPU | Intel® Core™ i7-8700 CPU @ 3.20GHz | Intel® Core™ i5-6600 CPU @ 3.30GHz |
|
||||
| Hyper Threading | ON | OFF |
|
||||
| Turbo Setting | ON | ON |
|
||||
| Memory | 4 x 16 GB DDR4 2666MHz | 2 x 16 GB DDR4 2400MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.0.0-23-generic | 5.0.0-23-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* |
|
||||
| BIOS Version | 411 | 5.12 |
|
||||
| BIOS Release | September 21, 2018 | September 21, 2018 |
|
||||
| Test Date | March 15, 2021 | March 15, 2021 |
|
||||
|
||||
Please follow this link for more detailed configuration descriptions: [Configuration Details](https://docs.openvinotoolkit.org/resources/benchmark_files/system_configurations_2021.3.html)
|
||||
|
||||
\htmlonly
|
||||
<style>
|
||||
.footer {
|
||||
display: none;
|
||||
}
|
||||
</style>
|
||||
<div class="opt-notice-wrapper">
|
||||
<p class="opt-notice">
|
||||
\endhtmlonly
|
||||
Results may vary. For workloads and configurations visit: [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex) and [Legal Information](../Legal_Information.md).
|
||||
\htmlonly
|
||||
</p>
|
||||
</div>
|
||||
\endhtmlonly
|
||||
376
docs/benchmarks/performance_benchmarks_ovms.md
Normal file
376
docs/benchmarks/performance_benchmarks_ovms.md
Normal file
@@ -0,0 +1,376 @@
|
||||
# OpenVINO™ Model Server Benchmark Results {#openvino_docs_performance_benchmarks_ovms}
|
||||
|
||||
OpenVINO™ Model Server is an open-source, production-grade inference platform that exposes a set of models via a convenient inference API over gRPC or HTTP/REST. It employs the inference engine libraries for from the Intel® Distribution of OpenVINO™ toolkit to extend workloads across Intel® hardware including CPU, GPU and others.
|
||||
|
||||

|
||||
|
||||
## Measurement Methodology
|
||||
|
||||
OpenVINO™ Model Server is measured in multiple-client-single-server configuration using two hardware platforms connected by ethernet network. The network bandwidth depends on the platforms as well as models under investigation and it is set to not be a bottleneck for workload intensity. This connection is dedicated only to the performance measurements. The benchmark setup is consists of four main parts:
|
||||
|
||||

|
||||
|
||||
* **OpenVINO™ Model Server** is launched as a docker container on the server platform and it listens (and answers on) requests from clients. OpenVINO™ Model Server is run on the same machine as the OpenVINO™ toolkit benchmark application in corresponding benchmarking. Models served by OpenVINO™ Model Server are located in a local file system mounted into the docker container. The OpenVINO™ Model Server instance communicates with other components via ports over a dedicated docker network.
|
||||
|
||||
* **Clients** are run in separated physical machine referred to as client platform. Clients are implemented in Python3 programming language based on TensorFlow* API and they work as parallel processes. Each client waits for a response from OpenVINO™ Model Server before it will send a new next request. The role played by the clients is also verification of responses.
|
||||
|
||||
* **Load balancer** works on the client platform in a docker container. HAProxy is used for this purpose. Its main role is counting of requests forwarded from clients to OpenVINO™ Model Server, estimating its latency, and sharing this information by Prometheus service. The reason of locating the load balancer on the client site is to simulate real life scenario that includes impact of physical network on reported metrics.
|
||||
|
||||
* **Execution Controller** is launched on the client platform. It is responsible for synchronization of the whole measurement process, downloading metrics from the load balancer, and presenting the final report of the execution.
|
||||
|
||||
## 3D U-Net (FP32)
|
||||

|
||||
## resnet-50-TF (INT8)
|
||||

|
||||
## resnet-50-TF (FP32)
|
||||

|
||||
## bert-large-uncased-whole-word-masking-squad-int8-0001 (INT8)
|
||||

|
||||
|
||||
## bert-large-uncased-whole-word-masking-squad-0001 (FP32)
|
||||

|
||||
## Platform Configurations
|
||||
|
||||
OpenVINO™ Model Server performance benchmark numbers are based on release 2021.3. Performance results are based on testing as of March 15, 2021 and may not reflect all publicly available updates.
|
||||
|
||||
**Platform with Intel® Xeon® Gold 6252**
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th></th>
|
||||
<th><strong>Server Platform</strong></th>
|
||||
<th><strong>Client Platform</strong></th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Motherboard</strong></td>
|
||||
<td>Intel® Server Board S2600WF H48104-872</td>
|
||||
<td>Inspur YZMB-00882-104 NF5280M5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Memory</strong></td>
|
||||
<td>Hynix 16 x 16GB @ 2666 MT/s DDR4</td>
|
||||
<td>Samsung 16 x 16GB @ 2666 MT/s DDR4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>CPU</strong></td>
|
||||
<td>Intel® Xeon® Gold 6252 CPU @ 2.10GHz</td>
|
||||
<td>Intel® Xeon® Platinum 8260M CPU @ 2.40GHz</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Selected CPU Flags</strong></td>
|
||||
<td>Hyper Threading, Turbo Boost, DL Boost</td>
|
||||
<td>Hyper Threading, Turbo Boost, DL Boost</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>CPU Thermal Design Power</strong></td>
|
||||
<td>150 W</td>
|
||||
<td>162 W</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Operating System</strong></td>
|
||||
<td>Ubuntu 20.04.2 LTS</td>
|
||||
<td>Ubuntu 20.04.2 LTS</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Kernel Version</strong></td>
|
||||
<td>5.4.0-65-generic</td>
|
||||
<td>5.4.0-54-generic</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>BIOS Vendor</strong></td>
|
||||
<td>Intel® Corporation</td>
|
||||
<td>American Megatrends Inc.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>BIOS Version and Release Date</strong></td>
|
||||
<td>SE5C620.86B.02.01, date: 03/26/2020</td>
|
||||
<td>4.1.16, date: 06/23/2020</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Docker Version</strong></td>
|
||||
<td>20.10.3</td>
|
||||
<td>20.10.3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Network Speed</strong></td>
|
||||
<td colspan="2" align="center">40 Gb/s</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
**Platform with Intel® Core™ i9-10920X**
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th></th>
|
||||
<th><strong>Server Platform</strong></th>
|
||||
<th><strong>Client Platform</strong></th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Motherboard</strong></td>
|
||||
<td>ASUSTeK COMPUTER INC. PRIME X299-A II</td>
|
||||
<td>ASUSTeK COMPUTER INC. PRIME Z370-P</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Memory</strong></td>
|
||||
<td>Corsair 4 x 16GB @ 2666 MT/s DDR4</td>
|
||||
<td>Corsair 4 x 16GB @ 2133 MT/s DDR4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>CPU</strong></td>
|
||||
<td>Intel® Core™ i9-10920X CPU @ 3.50GHz</td>
|
||||
<td>Intel® Core™ i7-8700T CPU @ 2.40GHz</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Selected CPU Flags</strong></td>
|
||||
<td>Hyper Threading, Turbo Boost, DL Boost</td>
|
||||
<td>Hyper Threading, Turbo Boost</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>CPU Thermal Design Power</strong></td>
|
||||
<td>165 W</td>
|
||||
<td>35 W</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Operating System</strong></td>
|
||||
<td>Ubuntu 20.04.1 LTS</td>
|
||||
<td>Ubuntu 20.04.1 LTS</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td><strong>Kernel Version</strong></td>
|
||||
<td>5.4.0-52-generic</td>
|
||||
<td>5.4.0-56-generic</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>BIOS Vendor</strong></td>
|
||||
<td>American Megatrends Inc.</td>
|
||||
<td>American Megatrends Inc.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>BIOS Version and Release Date</strong></td>
|
||||
<td>0603, date: 03/05/2020</td>
|
||||
<td>2401, date: 07/15/2019</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Docker Version</strong></td>
|
||||
<td>19.03.13</td>
|
||||
<td>19.03.14</td>
|
||||
</tr>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Network Speed</strong></td>
|
||||
<td colspan="2" align="center">10 Gb/s</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
**Platform with Intel® Core™ i7-8700T**
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th></th>
|
||||
<th><strong>Server Platform</strong></th>
|
||||
<th><strong>Client Platform</strong></th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Motherboard</strong></td>
|
||||
<td>ASUSTeK COMPUTER INC. PRIME Z370-P</td>
|
||||
<td>ASUSTeK COMPUTER INC. PRIME X299-A II</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Memory</strong></td>
|
||||
<td>Corsair 4 x 16GB @ 2133 MT/s DDR4</td>
|
||||
<td>Corsair 4 x 16GB @ 2666 MT/s DDR4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>CPU</strong></td>
|
||||
<td>Intel® Core™ i7-8700T CPU @ 2.40GHz</td>
|
||||
<td>Intel® Core™ i9-10920X CPU @ 3.50GHz</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Selected CPU Flags</strong></td>
|
||||
<td>Hyper Threading, Turbo Boost</td>
|
||||
<td>Hyper Threading, Turbo Boost, DL Boost</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>CPU Thermal Design Power</strong></td>
|
||||
<td>35 W</td>
|
||||
<td>165 W</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Operating System</strong></td>
|
||||
<td>Ubuntu 20.04.1 LTS</td>
|
||||
<td>Ubuntu 20.04.1 LTS</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td><strong>Kernel Version</strong></td>
|
||||
<td>5.4.0-56-generic</td>
|
||||
<td>5.4.0-52-generic</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>BIOS Vendor</strong></td>
|
||||
<td>American Megatrends Inc.</td>
|
||||
<td>American Megatrends Inc.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>BIOS Version and Release Date</strong></td>
|
||||
<td>2401, date: 07/15/2019</td>
|
||||
<td>0603, date: 03/05/2020</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Docker Version</strong></td>
|
||||
<td>19.03.14</td>
|
||||
<td>19.03.13</td>
|
||||
</tr>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Network Speed</strong></td>
|
||||
<td colspan="2" align="center">10 Gb/s</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
**Platform with Intel® Core™ i5-8500**
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th></th>
|
||||
<th><strong>Server Platform</strong></th>
|
||||
<th><strong>Client Platform</strong></th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Motherboard</strong></td>
|
||||
<td>ASUSTeK COMPUTER INC. PRIME Z370-A</td>
|
||||
<td>Gigabyte Technology Co., Ltd. Z390 UD</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Memory</strong></td>
|
||||
<td>Corsair 2 x 16GB @ 2133 MT/s DDR4</td>
|
||||
<td>029E 4 x 8GB @ 2400 MT/s DDR4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>CPU</strong></td>
|
||||
<td>Intel® Core™ i5-8500 CPU @ 3.00GHz</td>
|
||||
<td>Intel® Core™ i3-8100 CPU @ 3.60GHz</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Selected CPU Flags</strong></td>
|
||||
<td>Turbo Boost</td>
|
||||
<td>-</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>CPU Thermal Design Power</strong></td>
|
||||
<td>65 W</td>
|
||||
<td>65 W</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Operating System</strong></td>
|
||||
<td>Ubuntu 20.04.1 LTS</td>
|
||||
<td>Ubuntu 20.04.1 LTS</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Kernel Version</strong></td>
|
||||
<td>5.4.0-52-generic</td>
|
||||
<td>5.4.0-52-generic</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>BIOS Vendor</strong></td>
|
||||
<td>American Megatrends Inc.</td>
|
||||
<td>American Megatrends Inc.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>BIOS Version and Release Date</strong></td>
|
||||
<td>2401, date: 07/12/2019</td>
|
||||
<td>F10j, date: 09/16/2020</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Docker Version</strong></td>
|
||||
<td>19.03.13</td>
|
||||
<td>20.10.0</td>
|
||||
</tr>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Network Speed</strong></td>
|
||||
<td colspan="2" align="center">40 Gb/s</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
**Platform with Intel® Core™ i3-8100**
|
||||
<table>
|
||||
<tr>
|
||||
<th></th>
|
||||
<th><strong>Server Platform</strong></th>
|
||||
<th><strong>Client Platform</strong></th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Motherboard</strong></td>
|
||||
<td>Gigabyte Technology Co., Ltd. Z390 UD</td>
|
||||
<td>ASUSTeK COMPUTER INC. PRIME Z370-A</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Memory</strong></td>
|
||||
<td>029E 4 x 8GB @ 2400 MT/s DDR4</td>
|
||||
<td>Corsair 2 x 16GB @ 2133 MT/s DDR4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>CPU</strong></td>
|
||||
<td>Intel® Core™ i3-8100 CPU @ 3.60GHz</td>
|
||||
<td>Intel® Core™ i5-8500 CPU @ 3.00GHz</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Selected CPU Flags</strong></td>
|
||||
<td>-</td>
|
||||
<td>Turbo Boost</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>CPU Thermal Design Power</strong></td>
|
||||
<td>65 W</td>
|
||||
<td>65 W</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Operating System</strong></td>
|
||||
<td>Ubuntu 20.04.1 LTS</td>
|
||||
<td>Ubuntu 20.04.1 LTS</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Kernel Version</strong></td>
|
||||
<td>5.4.0-52-generic</td>
|
||||
<td>5.4.0-52-generic</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>BIOS Vendor</strong></td>
|
||||
<td>American Megatrends Inc.</td>
|
||||
<td>American Megatrends Inc.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>BIOS Version and Release Date</strong></td>
|
||||
<td>F10j, date: 09/16/2020</td>
|
||||
<td>2401, date: 07/12/2019</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Docker Version</strong></td>
|
||||
<td>20.10.0</td>
|
||||
<td>19.03.13</td>
|
||||
</tr>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Network Speed</strong></td>
|
||||
<td colspan="2" align="center">40 Gb/s</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
||||
\htmlonly
|
||||
<style>
|
||||
.footer {
|
||||
display: none;
|
||||
}
|
||||
</style>
|
||||
<div class="opt-notice-wrapper">
|
||||
<p class="opt-notice">
|
||||
\endhtmlonly
|
||||
Results may vary. For workloads and configurations visit: [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex) and [Legal Information](../Legal_Information.md).
|
||||
\htmlonly
|
||||
</p>
|
||||
</div>
|
||||
\endhtmlonly
|
||||
|
||||
@@ -7,9 +7,9 @@ The table below illustrates the speed-up factor for the performance gain by swit
|
||||
<th></th>
|
||||
<th></th>
|
||||
<th>Intel® Core™ <br>i7-8700T</th>
|
||||
<th>Intel® Xeon® <br>Gold <br>5218T</th>
|
||||
<th>Intel® Xeon® <br>Platinum <br>8270</th>
|
||||
<th>Intel® Core™ <br>i7-1185G7</th>
|
||||
<th>Intel® Xeon® <br>W-1290P</th>
|
||||
<th>Intel® Xeon® <br>Platinum <br>8270</th>
|
||||
</tr>
|
||||
<tr align="left">
|
||||
<th>OpenVINO <br>benchmark <br>model name</th>
|
||||
@@ -20,161 +20,177 @@ The table below illustrates the speed-up factor for the performance gain by swit
|
||||
<td>bert-large-<br>uncased-whole-word-<br>masking-squad-0001</td>
|
||||
<td>SQuAD</td>
|
||||
<td>1.6</td>
|
||||
<td>2.7</td>
|
||||
<td>2.0</td>
|
||||
<td>2.6</td>
|
||||
<td>3.0</td>
|
||||
<td>1.6</td>
|
||||
<td>2.3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>brain-tumor-<br>segmentation-<br>0001-MXNET</td>
|
||||
<td>BraTS</td>
|
||||
<td>1.5</td>
|
||||
<td>1.6</td>
|
||||
<td>1.9</td>
|
||||
<td>1.7</td>
|
||||
<td>1.8</td>
|
||||
<td>1.7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>deeplabv3-TF</td>
|
||||
<td>VOC 2012<br>Segmentation</td>
|
||||
<td>1.5</td>
|
||||
<td>2.4</td>
|
||||
<td>2.8</td>
|
||||
<td>2.1</td>
|
||||
<td>3.1</td>
|
||||
<td>3.1</td>
|
||||
<td>3.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>densenet-121-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.6</td>
|
||||
<td>3.2</td>
|
||||
<td>3.2</td>
|
||||
<td>3.2</td>
|
||||
<td>1.8</td>
|
||||
<td>3.5</td>
|
||||
<td>1.9</td>
|
||||
<td>3.8</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>facenet-<br>20180408-<br>102900-TF</td>
|
||||
<td>LFW</td>
|
||||
<td>2.0</td>
|
||||
<td>3.6</td>
|
||||
<td>3.5</td>
|
||||
<td>3.4</td>
|
||||
<td>2.2</td>
|
||||
<td>3.7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>faster_rcnn_<br>resnet50_coco-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>1.7</td>
|
||||
<td>3.4</td>
|
||||
<td>3.4</td>
|
||||
<td>3.4</td>
|
||||
<td>1.9</td>
|
||||
<td>3.8</td>
|
||||
<td>2.0</td>
|
||||
<td>3.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>googlenet-v1-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.8</td>
|
||||
<td>3.6</td>
|
||||
<td>3.7</td>
|
||||
<td>3.5</td>
|
||||
<td>2.0</td>
|
||||
<td>3.9</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>inception-v3-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.8</td>
|
||||
<td>1.9</td>
|
||||
<td>3.8</td>
|
||||
<td>2.0</td>
|
||||
<td>4.0</td>
|
||||
<td>3.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-<br>ssd-CF</td>
|
||||
<td>VOC2012</td>
|
||||
<td>1.5</td>
|
||||
<td>1.7</td>
|
||||
<td>3.1</td>
|
||||
<td>1.8</td>
|
||||
<td>3.6</td>
|
||||
<td>3.1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v1-1.0-<br>224-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.5</td>
|
||||
<td>3.2</td>
|
||||
<td>4.1</td>
|
||||
<td>1.7</td>
|
||||
<td>3.1</td>
|
||||
<td>1.8</td>
|
||||
<td>4.1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v2-1.0-<br>224-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.3</td>
|
||||
<td>2.7</td>
|
||||
<td>4.3</td>
|
||||
<td>2.5</td>
|
||||
<td>1.5</td>
|
||||
<td>2.4</td>
|
||||
<td>1.8</td>
|
||||
<td>3.9</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v2-<br>pytorch</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.4</td>
|
||||
<td>2.8</td>
|
||||
<td>4.6</td>
|
||||
<td>1.6</td>
|
||||
<td>2.4</td>
|
||||
<td>1.9</td>
|
||||
<td>3.9</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>resnet-18-<br>pytorch</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.9</td>
|
||||
<td>3.7</td>
|
||||
<td>3.8</td>
|
||||
<td>3.6</td>
|
||||
<td>2.1</td>
|
||||
<td>4.2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>resnet-50-<br>pytorch</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.8</td>
|
||||
<td>3.6</td>
|
||||
<td>1.9</td>
|
||||
<td>3.7</td>
|
||||
<td>2.0</td>
|
||||
<td>3.9</td>
|
||||
<td>3.4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>resnet-50-<br>TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.8</td>
|
||||
<td>1.9</td>
|
||||
<td>3.6</td>
|
||||
<td>2.0</td>
|
||||
<td>3.9</td>
|
||||
<td>3.4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>squeezenet1.1-<br>CF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.6</td>
|
||||
<td>2.9</td>
|
||||
<td>3.4</td>
|
||||
<td>1.7</td>
|
||||
<td>3.2</td>
|
||||
<td>1.8</td>
|
||||
<td>3.4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssd_mobilenet_<br>v1_coco-tf</td>
|
||||
<td>VOC2012</td>
|
||||
<td>1.6</td>
|
||||
<td>3.1</td>
|
||||
<td>3.7</td>
|
||||
<td>1.7</td>
|
||||
<td>3.0</td>
|
||||
<td>1.9</td>
|
||||
<td>3.6</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssd300-CF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>1.8</td>
|
||||
<td>3.7</td>
|
||||
<td>3.7</td>
|
||||
<td>3.8</td>
|
||||
<td>4.4</td>
|
||||
<td>1.9</td>
|
||||
<td>3.9</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssdlite_<br>mobilenet_<br>v2-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>1.4</td>
|
||||
<td>2.3</td>
|
||||
<td>3.9</td>
|
||||
<td>1.7</td>
|
||||
<td>2.5</td>
|
||||
<td>2.2</td>
|
||||
<td>3.4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>yolo_v3-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>1.8</td>
|
||||
<td>3.8</td>
|
||||
<td>4.0</td>
|
||||
<td>1.9</td>
|
||||
<td>3.9</td>
|
||||
<td>3.6</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>yolo_v4-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>1.7</td>
|
||||
<td>3.4</td>
|
||||
<td>1.7</td>
|
||||
<td>2.8</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>unet-camvid-onnx-0001</td>
|
||||
<td>MS COCO</td>
|
||||
<td>1.6</td>
|
||||
<td>3.8</td>
|
||||
<td>1.6</td>
|
||||
<td>3.7</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
@@ -187,7 +203,7 @@ The following table shows the absolute accuracy drop that is calculated as the d
|
||||
<th></th>
|
||||
<th>Intel® Core™ <br>i9-10920X CPU<br>@ 3.50GHZ (VNNI)</th>
|
||||
<th>Intel® Core™ <br>i9-9820X CPU<br>@ 3.30GHz (AVX512)</th>
|
||||
<th>Intel® Core™ <br>i7-6700 CPU<br>@ 4.0GHz (AVX2)</th>
|
||||
<th>Intel® Core™ <br>i7-6700K CPU<br>@ 4.0GHz (AVX2)</th>
|
||||
<th>Intel® Core™ <br>i7-1185G7 CPU<br>@ 4.0GHz (TGL VNNI)</th>
|
||||
</tr>
|
||||
<tr align="left">
|
||||
@@ -196,176 +212,203 @@ The following table shows the absolute accuracy drop that is calculated as the d
|
||||
<th>Metric Name</th>
|
||||
<th colspan="4" align="center">Absolute Accuracy Drop, %</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>bert-large-uncased-whole-word-masking-squad-0001</td>
|
||||
<td>SQuAD</td>
|
||||
<td>F1</td>
|
||||
<td>0.62</td>
|
||||
<td>0.88</td>
|
||||
<td>0.52</td>
|
||||
<td>0.62</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>brain-tumor-<br>segmentation-<br>0001-MXNET</td>
|
||||
<td>BraTS</td>
|
||||
<td>Dice-index@ <br>Mean@ <br>Overall Tumor</td>
|
||||
<td>0.08</td>
|
||||
<td>0.08</td>
|
||||
<td>0.08</td>
|
||||
<td>0.08</td>
|
||||
<td>0.09</td>
|
||||
<td>0.10</td>
|
||||
<td>0.11</td>
|
||||
<td>0.09</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>deeplabv3-TF</td>
|
||||
<td>VOC 2012<br>Segmentation</td>
|
||||
<td>mean_iou</td>
|
||||
<td>0.73</td>
|
||||
<td>1.10</td>
|
||||
<td>1.10</td>
|
||||
<td>0.73</td>
|
||||
<td>0.09</td>
|
||||
<td>0.41</td>
|
||||
<td>0.41</td>
|
||||
<td>0.09</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>densenet-121-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.73</td>
|
||||
<td>0.72</td>
|
||||
<td>0.72</td>
|
||||
<td>0.73</td>
|
||||
<td>0.54</td>
|
||||
<td>0.57</td>
|
||||
<td>0.57</td>
|
||||
<td>0.54</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>facenet-<br>20180408-<br>102900-TF</td>
|
||||
<td>LFW</td>
|
||||
<td>pairwise_<br>accuracy<br>_subsets</td>
|
||||
<td>0.02</td>
|
||||
<td>0.02</td>
|
||||
<td>0.02</td>
|
||||
<td>0.47</td>
|
||||
<td>0.05</td>
|
||||
<td>0.12</td>
|
||||
<td>0.12</td>
|
||||
<td>0.05</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>faster_rcnn_<br>resnet50_coco-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>coco_<br>precision</td>
|
||||
<td>0.21</td>
|
||||
<td>0.20</td>
|
||||
<td>0.20</td>
|
||||
<td>0.21</td>
|
||||
<td>0.04</td>
|
||||
<td>0.04</td>
|
||||
<td>0.04</td>
|
||||
<td>0.04</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>googlenet-v1-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.03</td>
|
||||
<td>0.01</td>
|
||||
<td>0.00</td>
|
||||
<td>0.00</td>
|
||||
<td>0.01</td>
|
||||
<td>0.03</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>inception-v3-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.03</td>
|
||||
<td>0.01</td>
|
||||
<td>0.01</td>
|
||||
<td>0.03</td>
|
||||
<td>0.04</td>
|
||||
<td>0.00</td>
|
||||
<td>0.00</td>
|
||||
<td>0.04</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-<br>ssd-CF</td>
|
||||
<td>VOC2012</td>
|
||||
<td>mAP</td>
|
||||
<td>0.35</td>
|
||||
<td>0.34</td>
|
||||
<td>0.34</td>
|
||||
<td>0.35</td>
|
||||
<td>0.77</td>
|
||||
<td>0.77</td>
|
||||
<td>0.77</td>
|
||||
<td>0.77</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v1-1.0-<br>224-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.27</td>
|
||||
<td>0.20</td>
|
||||
<td>0.20</td>
|
||||
<td>0.27</td>
|
||||
<td>0.26</td>
|
||||
<td>0.28</td>
|
||||
<td>0.28</td>
|
||||
<td>0.26</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v2-1.0-<br>224-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.44</td>
|
||||
<td>0.92</td>
|
||||
<td>0.92</td>
|
||||
<td>0.44</td>
|
||||
<td>0.40</td>
|
||||
<td>0.76</td>
|
||||
<td>0.76</td>
|
||||
<td>0.40</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v2-<br>PYTORCH</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.25</td>
|
||||
<td>7.42</td>
|
||||
<td>7.42</td>
|
||||
<td>0.25</td>
|
||||
<td>0.36</td>
|
||||
<td>0.52</td>
|
||||
<td>0.52</td>
|
||||
<td>0.36</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>resnet-18-<br>pytorch</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.26</td>
|
||||
<td>0.25</td>
|
||||
<td>0.25</td>
|
||||
<td>0.26</td>
|
||||
<td>0.25</td>
|
||||
<td>0.25</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>resnet-50-<br>PYTORCH</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.18</td>
|
||||
<td>0.19</td>
|
||||
<td>0.21</td>
|
||||
<td>0.21</td>
|
||||
<td>0.19</td>
|
||||
<td>0.18</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>resnet-50-<br>TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.15</td>
|
||||
<td>0.11</td>
|
||||
<td>0.11</td>
|
||||
<td>0.15</td>
|
||||
<td>0.10</td>
|
||||
<td>0.08</td>
|
||||
<td>0.08</td>
|
||||
<td>0.10</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>squeezenet1.1-<br>CF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.63</td>
|
||||
<td>0.66</td>
|
||||
<td>0.64</td>
|
||||
<td>0.64</td>
|
||||
<td>0.66</td>
|
||||
<td>0.63</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssd_mobilenet_<br>v1_coco-tf</td>
|
||||
<td>VOC2012</td>
|
||||
<td>COCO mAp</td>
|
||||
<td>0.24</td>
|
||||
<td>3.07</td>
|
||||
<td>3.07</td>
|
||||
<td>0.24</td>
|
||||
<td>0.18</td>
|
||||
<td>3.06</td>
|
||||
<td>3.06</td>
|
||||
<td>0.18</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssd300-CF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>COCO mAp</td>
|
||||
<td>0.06</td>
|
||||
<td>0.05</td>
|
||||
<td>0.05</td>
|
||||
<td>0.06</td>
|
||||
<td>0.05</td>
|
||||
<td>0.05</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssdlite_<br>mobilenet_<br>v2-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>COCO mAp</td>
|
||||
<td>0.14</td>
|
||||
<td>0.11</td>
|
||||
<td>0.43</td>
|
||||
<td>0.43</td>
|
||||
<td>0.14</td>
|
||||
<td>0.11</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>yolo_v3-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>COCO mAp</td>
|
||||
<td>0.12</td>
|
||||
<td>0.35</td>
|
||||
<td>0.35</td>
|
||||
<td>0.12</td>
|
||||
<td>0.11</td>
|
||||
<td>0.24</td>
|
||||
<td>0.24</td>
|
||||
<td>0.11</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>yolo_v4-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>COCO mAp</td>
|
||||
<td>0.01</td>
|
||||
<td>0.09</td>
|
||||
<td>0.09</td>
|
||||
<td>0.01</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>unet-camvid-<br>onnx-0001</td>
|
||||
<td>MS COCO</td>
|
||||
<td>COCO mAp</td>
|
||||
<td>0.31</td>
|
||||
<td>0.31</td>
|
||||
<td>0.31</td>
|
||||
<td>0.31</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
||||
@@ -52,7 +52,14 @@ limitations under the License.
|
||||
<tab type="usergroup" title="Converting Your ONNX* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_ONNX">
|
||||
<tab type="user" title="Convert ONNX* Faster R-CNN Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Faster_RCNN"/>
|
||||
<tab type="user" title="Convert ONNX* Mask R-CNN Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_Mask_RCNN"/>
|
||||
<tab type="user" title="Converting DLRM ONNX* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_DLRM"/>
|
||||
<tab type="user" title="Convert ONNX* GPT-2 Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_GPT2"/>
|
||||
<tab type="user" title="Convert DLRM ONNX* Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_DLRM"/>
|
||||
<tab type="usergroup" title="Converting Your PyTorch* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch">
|
||||
<tab type="user" title="Convert PyTorch* QuartzNet Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet"/>
|
||||
<tab type="user" title="Convert PyTorch* RNN-T Model " url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT"/>
|
||||
<tab type="user" title="Convert PyTorch* YOLACT Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT"/>
|
||||
<tab type="user" title="Convert PyTorch* F3Net Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net"/>
|
||||
</tab>
|
||||
</tab>
|
||||
<tab type="user" title="Model Optimizations Techniques" url="@ref openvino_docs_MO_DG_prepare_model_Model_Optimization_Techniques"/>
|
||||
<tab type="user" title="Cutting off Parts of a Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model"/>
|
||||
@@ -132,6 +139,11 @@ limitations under the License.
|
||||
<tab type="user" title="Equal-1" url="@ref openvino_docs_ops_comparison_Equal_1"/>
|
||||
<tab type="user" title="Erf-1" url="@ref openvino_docs_ops_arithmetic_Erf_1"/>
|
||||
<tab type="user" title="Exp-1" url="@ref openvino_docs_ops_activation_Exp_1"/>
|
||||
<tab type="user" title="ExperimentalDetectronDetectionOutput-6" url="@ref openvino_docs_ops_detection_ExperimentalDetectronDetectionOutput_6"/>
|
||||
<tab type="user" title="ExperimentalDetectronGenerateProposalsSingleImage-6" url="@ref openvino_docs_ops_detection_ExperimentalDetectronGenerateProposalsSingleImage_6"/>
|
||||
<tab type="user" title="ExperimentalDetectronPriorGridGenerator-6" url="@ref openvino_docs_ops_detection_ExperimentalDetectronPriorGridGenerator_6"/>
|
||||
<tab type="user" title="ExperimentalDetectronROIFeatureExtractor-6" url="@ref openvino_docs_ops_detection_ExperimentalDetectronROIFeatureExtractor_6"/>
|
||||
<tab type="user" title="ExperimentalDetectronTopKROIs-6" url="@ref openvino_docs_ops_sort_ExperimentalDetectronTopKROIs_6"/>
|
||||
<tab type="user" title="ExtractImagePatches-3" url="@ref openvino_docs_ops_movement_ExtractImagePatches_3"/>
|
||||
<tab type="user" title="FakeQuantize-1" url="@ref openvino_docs_ops_quantization_FakeQuantize_1"/>
|
||||
<tab type="user" title="FloorMod-1" url="@ref openvino_docs_ops_arithmetic_FloorMod_1"/>
|
||||
@@ -254,7 +266,6 @@ limitations under the License.
|
||||
<tab id="deploying_inference" type="usergroup" title="Deploying Inference" url="@ref openvino_docs_IE_DG_Deep_Learning_Inference_Engine_DevGuide">
|
||||
<!-- Inference Engine Developer Guide -->
|
||||
<tab type="usergroup" title="Inference Engine Developer Guide" url="@ref openvino_docs_IE_DG_Deep_Learning_Inference_Engine_DevGuide">
|
||||
<tab type="user" title="Introduction to Inference Engine" url="@ref openvino_docs_IE_DG_inference_engine_intro"/>
|
||||
<tab type="user" title="Inference Engine API Changes History" url="@ref openvino_docs_IE_DG_API_Changes"/>
|
||||
<tab type="user" title="Inference Engine Memory primitives" url="@ref openvino_docs_IE_DG_Memory_primitives"/>
|
||||
<tab type="user" title="Inference Engine Device Query API" url="@ref openvino_docs_IE_DG_InferenceEngine_QueryAPI"/>
|
||||
@@ -283,6 +294,7 @@ limitations under the License.
|
||||
<tab type="usergroup" title="Utilities to Validate Your Converted Model" url="@ref openvino_inference_engine_tools_cross_check_tool_README">
|
||||
<tab type="user" title="Using Cross Check Tool for Per-Layer Comparison Between Plugins" url="@ref openvino_inference_engine_tools_cross_check_tool_README"/>
|
||||
</tab>
|
||||
<tab type="user" title="Introduction to OpenVINO state API" url="@ref openvino_docs_IE_DG_network_state_intro"/>
|
||||
<tab type="usergroup" title="Supported Devices" url="@ref openvino_docs_IE_DG_supported_plugins_Supported_Devices">
|
||||
<tab type="usergroup" title="GPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_CL_DNN">
|
||||
<tab type="user" title="RemoteBlob API of GPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_GPU_RemoteBlob_API"/>
|
||||
|
||||
@@ -100,11 +100,14 @@ limitations under the License.
|
||||
<!-- Tuning for Performance -->
|
||||
<tab type="usergroup" title="Tuning for Performance">
|
||||
<!-- Performance Benchmarks -->
|
||||
<tab type="usergroup" title="Performance Measures" url="@ref openvino_docs_performance_benchmarks">
|
||||
<tab type="user" title="Performance Information Frequently Asked Questions" url="@ref openvino_docs_performance_benchmarks_faq"/>
|
||||
<tab type="user" title="Download Performance Data Spreadsheet in MS Excel* Format" url="https://docs.openvinotoolkit.org/downloads/benchmark_files/OV-2021.2-Download-Excel.xlsx"/>
|
||||
<tab type="user" title="INT8 vs. FP32 Comparison on Select Networks and Platforms" url="@ref openvino_docs_performance_int8_vs_fp32"/>
|
||||
</tab>
|
||||
<tab type="usergroup" title="Performance Benchmark Results" url="@ref openvino_docs_performance_benchmarks">
|
||||
<tab type="usergroup" title="Intel® Distribution of OpenVINO™ toolkit Benchmark Results" url="@ref openvino_docs_performance_benchmarks_openvino">
|
||||
<tab type="user" title="Performance Information Frequently Asked Questions" url="@ref openvino_docs_performance_benchmarks_faq"/>
|
||||
<tab type="user" title="Download Performance Data Spreadsheet in MS Excel* Format" url="https://docs.openvinotoolkit.org/downloads/benchmark_files/OV-2021.3-Download-Excel.xlsx"/>
|
||||
<tab type="user" title="INT8 vs. FP32 Comparison on Select Networks and Platforms" url="@ref openvino_docs_performance_int8_vs_fp32"/>
|
||||
</tab>
|
||||
<tab type="user" title="OpenVINO™ Model Server Benchmark Results" url="@ref openvino_docs_performance_benchmarks_ovms"/>
|
||||
</tab>
|
||||
<tab type="user" title="Performance Optimization Guide" url="@ref openvino_docs_optimization_guide_dldt_optimization_guide"/>
|
||||
<!-- POT DevGuide -->
|
||||
<xi:include href="pot_docs.xml" xpointer="xpointer(//tab[@id='pot'])">
|
||||
@@ -166,6 +169,7 @@ limitations under the License.
|
||||
<tab type="user" title="Hello Classification C Sample" url="@ref openvino_inference_engine_ie_bridges_c_samples_hello_classification_README"/>
|
||||
<tab type="user" title="Image Classification Python* Sample" url="@ref openvino_inference_engine_ie_bridges_python_sample_classification_sample_README"/>
|
||||
<tab type="user" title="Hello Reshape SSD C++ Sample" url="@ref openvino_inference_engine_samples_hello_reshape_ssd_README"/>
|
||||
<tab type="user" title="Hello Reshape SSD Python Sample" url="@ref openvino_inference_engine_samples_python_hello_reshape_ssd_README"/>
|
||||
<tab type="user" title="Hello NV12 Input Classification C++ Sample" url="@ref openvino_inference_engine_samples_hello_nv12_input_classification_README"/>
|
||||
<tab type="user" title="Hello NV12 Input Classification C Sample" url="@ref openvino_inference_engine_ie_bridges_c_samples_hello_nv12_input_classification_README"/>
|
||||
<tab type="user" title="Hello Query Device C++ Sample" url="@ref openvino_inference_engine_samples_hello_query_device_README"/>
|
||||
@@ -181,7 +185,15 @@ limitations under the License.
|
||||
<tab type="user" title="Benchmark C++ Tool" url="@ref openvino_inference_engine_samples_benchmark_app_README"/>
|
||||
<tab type="user" title="Benchmark Python* Tool" url="@ref openvino_inference_engine_tools_benchmark_tool_README"/>
|
||||
</tab>
|
||||
|
||||
<!-- Reference Implementations -->
|
||||
<tab type="usergroup" title="Reference Implementations" url="">
|
||||
<tab type="usergroup" title="Speech Library and Speech Recognition Demos" url="@ref openvino_inference_engine_samples_speech_libs_and_demos_Speech_libs_and_demos">
|
||||
<tab type="user" title="Speech Library" url="@ref openvino_inference_engine_samples_speech_libs_and_demos_Speech_library"/>
|
||||
<tab type="user" title="Offline Speech Recognition Demo" url="@ref openvino_inference_engine_samples_speech_libs_and_demos_Offline_speech_recognition_demo"/>
|
||||
<tab type="user" title="Live Speech Recognition Demo" url="@ref openvino_inference_engine_samples_speech_libs_and_demos_Live_speech_recognition_demo"/>
|
||||
<tab type="user" title="Kaldi* Statistical Language Model Conversion Tool" url="@ref openvino_inference_engine_samples_speech_libs_and_demos_Kaldi_SLM_conversion_tool"/>
|
||||
</tab>
|
||||
</tab>
|
||||
<!-- DL Streamer Examples -->
|
||||
<tab type="usergroup" title="DL Streamer Examples" url="@ref gst_samples_README">
|
||||
<tab type="usergroup" title="Command Line Samples" url="">
|
||||
|
||||
@@ -12,11 +12,11 @@ This sample requires:
|
||||
|
||||
* PC with GNU/Linux* or Microsoft Windows* (Apple macOS* is supported but was not tested)
|
||||
* OpenCV 4.2 or higher built with [Intel® Distribution of OpenVINO™ Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html) (building with [Intel® TBB](https://www.threadingbuildingblocks.org/intel-tbb-tutorial) is a plus)
|
||||
* The following pre-trained models from the [Open Model Zoo](@ref omz_models_intel_index)
|
||||
* [face-detection-adas-0001](@ref omz_models_intel_face_detection_adas_0001_description_face_detection_adas_0001)
|
||||
* [facial-landmarks-35-adas-0002](@ref omz_models_intel_facial_landmarks_35_adas_0002_description_facial_landmarks_35_adas_0002)
|
||||
* The following pre-trained models from the [Open Model Zoo](@ref omz_models_group_intel)
|
||||
* [face-detection-adas-0001](@ref omz_models_model_face_detection_adas_0001)
|
||||
* [facial-landmarks-35-adas-0002](@ref omz_models_model_facial_landmarks_35_adas_0002)
|
||||
|
||||
To download the models from the Open Model Zoo, use the [Model Downloader](@ref omz_tools_downloader_README) tool.
|
||||
To download the models from the Open Model Zoo, use the [Model Downloader](@ref omz_tools_downloader) tool.
|
||||
|
||||
## Face Beautification Algorithm
|
||||
We will implement a simple face beautification algorithm using a combination of modern Deep Learning techniques and traditional Computer Vision. The general idea behind the algorithm is to make face skin smoother while preserving face features like eyes or a mouth contrast. The algorithm identifies parts of the face using a DNN inference, applies different filters to the parts found, and then combines it into the final result using basic image arithmetics:
|
||||
|
||||
@@ -11,12 +11,12 @@ This sample requires:
|
||||
|
||||
* PC with GNU/Linux* or Microsoft Windows* (Apple macOS* is supported but was not tested)
|
||||
* OpenCV 4.2 or higher built with [Intel® Distribution of OpenVINO™ Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html) (building with [Intel® TBB](https://www.threadingbuildingblocks.org/intel-tbb-tutorial)
|
||||
* The following pre-trained models from the [Open Model Zoo](@ref omz_models_intel_index):
|
||||
* [face-detection-adas-0001](@ref omz_models_intel_face_detection_adas_0001_description_face_detection_adas_0001)
|
||||
* [age-gender-recognition-retail-0013](@ref omz_models_intel_age_gender_recognition_retail_0013_description_age_gender_recognition_retail_0013)
|
||||
* [emotions-recognition-retail-0003](@ref omz_models_intel_emotions_recognition_retail_0003_description_emotions_recognition_retail_0003)
|
||||
* The following pre-trained models from the [Open Model Zoo](@ref omz_models_group_intel):
|
||||
* [face-detection-adas-0001](@ref omz_models_model_face_detection_adas_0001)
|
||||
* [age-gender-recognition-retail-0013](@ref omz_models_model_age_gender_recognition_retail_0013)
|
||||
* [emotions-recognition-retail-0003](@ref omz_models_model_emotions_recognition_retail_0003)
|
||||
|
||||
To download the models from the Open Model Zoo, use the [Model Downloader](@ref omz_tools_downloader_README) tool.
|
||||
To download the models from the Open Model Zoo, use the [Model Downloader](@ref omz_tools_downloader) tool.
|
||||
|
||||
## Introduction: Why G-API
|
||||
Many computer vision algorithms run on a video stream rather than on individual images. Stream processing usually consists of multiple steps – like decode, preprocessing, detection, tracking, classification (on detected objects), and visualization – forming a *video processing pipeline*. Moreover, many these steps of such pipeline can run in parallel – modern platforms have different hardware blocks on the same chip like decoders and GPUs, and extra accelerators can be plugged in as extensions, like Intel® Movidius™ Neural Compute Stick for deep learning offload.
|
||||
@@ -26,7 +26,7 @@ Given all this manifold of options and a variety in video analytics algorithms,
|
||||
Starting with version 4.2, OpenCV offers a solution to this problem. OpenCV G-API now can manage Deep Learning inference (a cornerstone of any modern analytics pipeline) with a traditional Computer Vision as well as video capturing/decoding, all in a single pipeline. G-API takes care of pipelining itself – so if the algorithm or platform changes, the execution model adapts to it automatically.
|
||||
|
||||
## Pipeline Overview
|
||||
Our sample application is based on [Interactive Face Detection](omz_demos_interactive_face_detection_demo_README) demo from Open Model Zoo. A simplified pipeline consists of the following steps:
|
||||
Our sample application is based on [Interactive Face Detection](@ref omz_demos_interactive_face_detection_demo_cpp) demo from Open Model Zoo. A simplified pipeline consists of the following steps:
|
||||
|
||||
1. Image acquisition and decode
|
||||
2. Detection with preprocessing
|
||||
|
||||
@@ -9,13 +9,13 @@ In this guide, you will:
|
||||
|
||||
[DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a web-based graphical environment that enables you to easily use various sophisticated
|
||||
OpenVINO™ toolkit components:
|
||||
* [Model Downloader](@ref omz_tools_downloader_README) to download models from the [Intel® Open Model Zoo](@ref omz_models_intel_index)
|
||||
* [Model Downloader](@ref omz_tools_downloader) to download models from the [Intel® Open Model Zoo](@ref omz_models_group_intel)
|
||||
with pretrained models for a range of different tasks
|
||||
* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) to transform models into
|
||||
the Intermediate Representation (IR) format
|
||||
* [Post-Training Optimization toolkit](@ref pot_README) to calibrate a model and then execute it in the
|
||||
INT8 precision
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker_README) to determine the accuracy of a model
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker) to determine the accuracy of a model
|
||||
* [Benchmark Tool](@ref openvino_inference_engine_samples_benchmark_app_README) to estimate inference performance on supported devices
|
||||
|
||||

|
||||
@@ -70,10 +70,10 @@ The simplified OpenVINO™ DL Workbench workflow is:
|
||||
|
||||
## Run Baseline Inference
|
||||
|
||||
This section illustrates a sample use case of how to infer a pretrained model from the [Intel® Open Model Zoo](@ref omz_models_intel_index) with an autogenerated noise dataset on a CPU device.
|
||||
|
||||
This section illustrates a sample use case of how to infer a pretrained model from the [Intel® Open Model Zoo](@ref omz_models_group_intel) with an autogenerated noise dataset on a CPU device.
|
||||
\htmlonly
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/9TRJwEmY0K4" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
|
||||
\endhtmlonly
|
||||
|
||||
Once you log in to the DL Workbench, create a project, which is a combination of a model, a dataset, and a target device. Follow the steps below:
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ In addition, demo scripts, code samples and demo applications are provided to he
|
||||
* **[Code Samples](../IE_DG/Samples_Overview.md)** - Small console applications that show you how to:
|
||||
* Utilize specific OpenVINO capabilities in an application
|
||||
* Perform specific tasks, such as loading a model, running inference, querying specific device capabilities, and more.
|
||||
* **[Demo Applications](@ref omz_demos_README)** - Console applications that provide robust application templates to help you implement specific deep learning scenarios. These applications involve increasingly complex processing pipelines that gather analysis data from several models that run inference simultaneously, such as detecting a person in a video stream along with detecting the person's physical attributes, such as age, gender, and emotional state.
|
||||
* **[Demo Applications](@ref omz_demos)** - Console applications that provide robust application templates to help you implement specific deep learning scenarios. These applications involve increasingly complex processing pipelines that gather analysis data from several models that run inference simultaneously, such as detecting a person in a video stream along with detecting the person's physical attributes, such as age, gender, and emotional state.
|
||||
|
||||
## <a name="openvino-installation"></a>Intel® Distribution of OpenVINO™ toolkit Installation and Deployment Tools Directory Structure
|
||||
This guide assumes you completed all Intel® Distribution of OpenVINO™ toolkit installation and configuration steps. If you have not yet installed and configured the toolkit, see [Install Intel® Distribution of OpenVINO™ toolkit for Linux*](../install_guides/installing-openvino-linux.md).
|
||||
@@ -46,9 +46,9 @@ The primary tools for deploying your models and applications are installed to th
|
||||
| `samples/` | Inference Engine samples. Contains source code for C++ and Python* samples and build scripts. See the [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md). |
|
||||
| `src/` | Source files for CPU extensions.|
|
||||
| `model_optimizer/` | Model Optimizer directory. Contains configuration scripts, scripts to run the Model Optimizer and other files. See the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
|
||||
| `open_model_zoo/` | Open Model Zoo directory. Includes the Model Downloader tool to download [pre-trained OpenVINO](@ref omz_models_intel_index) and public models, OpenVINO models documentation, demo applications and the Accuracy Checker tool to evaluate model accuracy.|
|
||||
| `open_model_zoo/` | Open Model Zoo directory. Includes the Model Downloader tool to download [pre-trained OpenVINO](@ref omz_models_group_intel) and public models, OpenVINO models documentation, demo applications and the Accuracy Checker tool to evaluate model accuracy.|
|
||||
| `demos/` | Demo applications for inference scenarios. Also includes documentation and build scripts.|
|
||||
| `intel_models/` | Pre-trained OpenVINO models and associated documentation. See the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index).|
|
||||
| `intel_models/` | Pre-trained OpenVINO models and associated documentation. See the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_group_intel).|
|
||||
| `tools/` | Model Downloader and Accuracy Checker tools. |
|
||||
| `tools/` | Contains a symbolic link to the Model Downloader folder and auxiliary tools to work with your models: Calibration tool, Benchmark and Collect Statistics tools.|
|
||||
|
||||
@@ -197,7 +197,7 @@ Each demo and code sample is a separate application, but they use the same behav
|
||||
|
||||
* [Code Samples](../IE_DG/Samples_Overview.md) - Small console applications that show how to utilize specific OpenVINO capabilities within an application and execute specific tasks such as loading a model, running inference, querying specific device capabilities, and more.
|
||||
|
||||
* [Demo Applications](@ref omz_demos_README) - Console applications that provide robust application templates to support developers in implementing specific deep learning scenarios. They may also involve more complex processing pipelines that gather analysis from several models that run inference simultaneously. For example concurrently detecting a person in a video stream and detecting attributes such as age, gender and/or emotions.
|
||||
* [Demo Applications](@ref omz_demos) - Console applications that provide robust application templates to support developers in implementing specific deep learning scenarios. They may also involve more complex processing pipelines that gather analysis from several models that run inference simultaneously. For example concurrently detecting a person in a video stream and detecting attributes such as age, gender and/or emotions.
|
||||
|
||||
Inputs you'll need to specify:
|
||||
- **A compiled OpenVINO™ code sample or demo application** that runs inferencing against a model that has been run through the Model Optimizer, resulting in an IR, using the other inputs you provide.
|
||||
@@ -209,7 +209,7 @@ Inputs you'll need to specify:
|
||||
|
||||
To perform sample inference, run the Image Classification code sample and Security Barrier Camera demo application that were automatically compiled when you ran the Image Classification and Inference Pipeline demo scripts. The binary files are in the `~/inference_engine_cpp_samples_build/intel64/Release` and `~/inference_engine_demos_build/intel64/Release` directories, respectively.
|
||||
|
||||
To run other sample code or demo applications, build them from the source files delivered as part of the OpenVINO toolkit. To learn how to build these, see the [Inference Engine Code Samples Overview](../IE_DG/Samples_Overview.md) and [Demo Applications Overview](@ref omz_demos_README) sections.
|
||||
To run other sample code or demo applications, build them from the source files delivered as part of the OpenVINO toolkit. To learn how to build these, see the [Inference Engine Code Samples Overview](../IE_DG/Samples_Overview.md) and [Demo Applications Overview](@ref omz_demos) sections.
|
||||
|
||||
### <a name="download-models"></a> Step 1: Download the Models
|
||||
|
||||
@@ -219,7 +219,7 @@ You must have a model that is specific for you inference task. Example model typ
|
||||
- Custom (Often based on SSD)
|
||||
|
||||
Options to find a model suitable for the OpenVINO™ toolkit are:
|
||||
- Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using [Model Downloader tool](@ref omz_tools_downloader_README).
|
||||
- Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using [Model Downloader tool](@ref omz_tools_downloader).
|
||||
- Download from GitHub*, Caffe* Zoo, TensorFlow* Zoo, etc.
|
||||
- Train your own model.
|
||||
|
||||
@@ -449,7 +449,7 @@ Throughput: 375.3339402 FPS
|
||||
|
||||
### <a name="run-security-barrier"></a>Step 5: Run the Security Barrier Camera Demo Application
|
||||
|
||||
> **NOTE**: The Security Barrier Camera Demo Application is automatically compiled when you ran the Inference Pipeline demo scripts. If you want to build it manually, see the [Demo Applications Overview](@ref omz_demos_README) section.
|
||||
> **NOTE**: The Security Barrier Camera Demo Application is automatically compiled when you ran the Inference Pipeline demo scripts. If you want to build it manually, see the [Demo Applications Overview](@ref omz_demos) section.
|
||||
|
||||
To run the **Security Barrier Camera Demo Application** using an input image on the prepared IRs:
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ In addition, demo scripts, code samples and demo applications are provided to he
|
||||
* **[Code Samples](../IE_DG/Samples_Overview.md)** - Small console applications that show you how to:
|
||||
* Utilize specific OpenVINO capabilities in an application.
|
||||
* Perform specific tasks, such as loading a model, running inference, querying specific device capabilities, and more.
|
||||
* **[Demo Applications](@ref omz_demos_README)** - Console applications that provide robust application templates to help you implement specific deep learning scenarios. These applications involve increasingly complex processing pipelines that gather analysis data from several models that run inference simultaneously, such as detecting a person in a video stream along with detecting the person's physical attributes, such as age, gender, and emotional state.
|
||||
* **[Demo Applications](@ref omz_demos)** - Console applications that provide robust application templates to help you implement specific deep learning scenarios. These applications involve increasingly complex processing pipelines that gather analysis data from several models that run inference simultaneously, such as detecting a person in a video stream along with detecting the person's physical attributes, such as age, gender, and emotional state.
|
||||
|
||||
## <a name="openvino-installation"></a>Intel® Distribution of OpenVINO™ toolkit Installation and Deployment Tools Directory Structure
|
||||
This guide assumes you completed all Intel® Distribution of OpenVINO™ toolkit installation and configuration steps. If you have not yet installed and configured the toolkit, see [Install Intel® Distribution of OpenVINO™ toolkit for macOS*](../install_guides/installing-openvino-macos.md).
|
||||
@@ -48,9 +48,9 @@ The primary tools for deploying your models and applications are installed to th
|
||||
| `~intel_models/` | Symbolic link to the `intel_models` subfolder of the `open_model_zoo` folder.|
|
||||
| `model_optimizer/` | Model Optimizer directory. Contains configuration scripts, scripts to run the Model Optimizer and other files. See the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).|
|
||||
| `ngraph/` | nGraph directory. Includes the nGraph header and library files. |
|
||||
| `open_model_zoo/` | Open Model Zoo directory. Includes the Model Downloader tool to download [pre-trained OpenVINO](@ref omz_models_intel_index) and public models, OpenVINO models documentation, demo applications and the Accuracy Checker tool to evaluate model accuracy.|
|
||||
| `open_model_zoo/` | Open Model Zoo directory. Includes the Model Downloader tool to download [pre-trained OpenVINO](@ref omz_models_group_intel) and public models, OpenVINO models documentation, demo applications and the Accuracy Checker tool to evaluate model accuracy.|
|
||||
| `demos/` | Demo applications for inference scenarios. Also includes documentation and build scripts.|
|
||||
| `intel_models/` | Pre-trained OpenVINO models and associated documentation. See the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index).|
|
||||
| `intel_models/` | Pre-trained OpenVINO models and associated documentation. See the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_group_intel).|
|
||||
| `models` | Intel's trained and public models that can be obtained with Model Downloader.|
|
||||
| `tools/` | Model Downloader and Accuracy Checker tools. |
|
||||
| `tools/` | Contains a symbolic link to the Model Downloader folder and auxiliary tools to work with your models: Calibration tool, Benchmark and Collect Statistics tools.|
|
||||
@@ -200,7 +200,7 @@ Inputs you need to specify when using a code sample or demo application:
|
||||
|
||||
To perform sample inference, run the Image Classification code sample and Security Barrier Camera demo application that are automatically compiled when you run the Image Classification and Inference Pipeline demo scripts. The binary files are in the `~/inference_engine_samples_build/intel64/Release` and `~/inference_engine_demos_build/intel64/Release` directories, respectively.
|
||||
|
||||
You can also build all available sample code and demo applications from the source files delivered with the OpenVINO toolkit. To learn how to do this, see the instructions in the [Inference Engine Code Samples Overview](../IE_DG/Samples_Overview.md) and [Demo Applications Overview](@ref omz_demos_README) sections.
|
||||
You can also build all available sample code and demo applications from the source files delivered with the OpenVINO toolkit. To learn how to do this, see the instructions in the [Inference Engine Code Samples Overview](../IE_DG/Samples_Overview.md) and [Demo Applications Overview](@ref omz_demos) sections.
|
||||
|
||||
### <a name="download-models"></a> Step 1: Download the Models
|
||||
|
||||
@@ -210,7 +210,7 @@ You must have a model that is specific for you inference task. Example model typ
|
||||
- Custom (Often based on SSD)
|
||||
|
||||
Options to find a model suitable for the OpenVINO™ toolkit are:
|
||||
- Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using [Model Downloader tool](@ref omz_tools_downloader_README).
|
||||
- Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using [Model Downloader tool](@ref omz_tools_downloader).
|
||||
- Download from GitHub*, Caffe* Zoo, TensorFlow* Zoo, and other resources.
|
||||
- Train your own model.
|
||||
|
||||
@@ -422,7 +422,7 @@ classid probability label
|
||||
|
||||
### <a name="run-security-barrier"></a>Step 5: Run the Security Barrier Camera Demo Application
|
||||
|
||||
> **NOTE**: The Security Barrier Camera Demo Application is automatically compiled when you run the Inference Pipeline demo scripts. If you want to build it manually, see the instructions in the [Demo Applications Overview](@ref omz_demos_README) section.
|
||||
> **NOTE**: The Security Barrier Camera Demo Application is automatically compiled when you run the Inference Pipeline demo scripts. If you want to build it manually, see the instructions in the [Demo Applications Overview](@ref omz_demos) section.
|
||||
|
||||
To run the **Security Barrier Camera Demo Application** using an input image on the prepared IRs:
|
||||
|
||||
|
||||
@@ -43,8 +43,8 @@ The primary tools for deploying your models and applications are installed to th
|
||||
The OpenVINO™ workflow on Raspbian* OS is as follows:
|
||||
1. **Get a pre-trained model** for your inference task. If you want to use your model for inference, the model must be converted to the `.bin` and `.xml` Intermediate Representation (IR) files, which are used as input by Inference Engine. On Raspberry PI, OpenVINO™ toolkit includes only the Inference Engine module. The Model Optimizer is not supported on this platform. To get the optimized models you can use one of the following options:
|
||||
|
||||
* Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using [Model Downloader tool](@ref omz_tools_downloader_README).
|
||||
<br> For more information on pre-trained models, see [Pre-Trained Models Documentation](@ref omz_models_intel_index)
|
||||
* Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using [Model Downloader tool](@ref omz_tools_downloader).
|
||||
<br> For more information on pre-trained models, see [Pre-Trained Models Documentation](@ref omz_models_group_intel)
|
||||
|
||||
* Convert a model using the Model Optimizer from a full installation of Intel® Distribution of OpenVINO™ toolkit on one of the supported platforms. Installation instructions are available:
|
||||
* [Installation Guide for macOS*](../install_guides/installing-openvino-macos.md)
|
||||
@@ -62,10 +62,10 @@ Follow the steps below to run pre-trained Face Detection network using Inference
|
||||
```
|
||||
2. Build the Object Detection Sample with the following command:
|
||||
```sh
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-march=armv7-a" /opt/intel/openvino/deployment_tools/inference_engine/samples/cpp
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-march=armv7-a" /opt/intel/openvino_2021/deployment_tools/inference_engine/samples/cpp
|
||||
make -j2 object_detection_sample_ssd
|
||||
```
|
||||
3. Download the pre-trained Face Detection model with the [Model Downloader tool](@ref omz_tools_downloader_README):
|
||||
3. Download the pre-trained Face Detection model with the [Model Downloader tool](@ref omz_tools_downloader):
|
||||
```sh
|
||||
git clone --depth 1 https://github.com/openvinotoolkit/open_model_zoo
|
||||
cd open_model_zoo/tools/downloader
|
||||
|
||||
@@ -19,7 +19,7 @@ In addition, demo scripts, code samples and demo applications are provided to he
|
||||
* **[Code Samples](../IE_DG/Samples_Overview.md)** - Small console applications that show you how to:
|
||||
* Utilize specific OpenVINO capabilities in an application.
|
||||
* Perform specific tasks, such as loading a model, running inference, querying specific device capabilities, and more.
|
||||
* **[Demo Applications](@ref omz_demos_README)** - Console applications that provide robust application templates to help you implement specific deep learning scenarios. These applications involve increasingly complex processing pipelines that gather analysis data from several models that run inference simultaneously, such as detecting a person in a video stream along with detecting the person's physical attributes, such as age, gender, and emotional state.
|
||||
* **[Demo Applications](@ref omz_demos)** - Console applications that provide robust application templates to help you implement specific deep learning scenarios. These applications involve increasingly complex processing pipelines that gather analysis data from several models that run inference simultaneously, such as detecting a person in a video stream along with detecting the person's physical attributes, such as age, gender, and emotional state.
|
||||
|
||||
## <a name="openvino-installation"></a>Intel® Distribution of OpenVINO™ toolkit Installation and Deployment Tools Directory Structure
|
||||
This guide assumes you completed all Intel® Distribution of OpenVINO™ toolkit installation and configuration steps. If you have not yet installed and configured the toolkit, see [Install Intel® Distribution of OpenVINO™ toolkit for Windows*](../install_guides/installing-openvino-windows.md).
|
||||
@@ -45,9 +45,9 @@ The primary tools for deploying your models and applications are installed to th
|
||||
| `~intel_models\` | Symbolic link to the `intel_models` subfolder of the `open_model_zoo` folder. |
|
||||
| `model_optimizer\` | Model Optimizer directory. Contains configuration scripts, scripts to run the Model Optimizer and other files. See the [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). |
|
||||
| `ngraph\` | nGraph directory. Includes the nGraph header and library files. |
|
||||
| `open_model_zoo\` | Open Model Zoo directory. Includes the Model Downloader tool to download [pre-trained OpenVINO](@ref omz_models_intel_index) and public models, OpenVINO models documentation, demo applications and the Accuracy Checker tool to evaluate model accuracy.|
|
||||
| `open_model_zoo\` | Open Model Zoo directory. Includes the Model Downloader tool to download [pre-trained OpenVINO](@ref omz_models_group_intel) and public models, OpenVINO models documentation, demo applications and the Accuracy Checker tool to evaluate model accuracy.|
|
||||
| `demos\` | Demo applications for inference scenarios. Also includes documentation and build scripts.|
|
||||
| `intel_models\` | Pre-trained OpenVINO models and associated documentation. See the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index).|
|
||||
| `intel_models\` | Pre-trained OpenVINO models and associated documentation. See the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_group_intel).|
|
||||
| `models` | Intel's trained and public models that can be obtained with Model Downloader.|
|
||||
| `tools\` | Model Downloader and Accuracy Checker tools. |
|
||||
| `tools\` | Contains a symbolic link to the Model Downloader folder and auxiliary tools to work with your models: Calibration tool, Benchmark and Collect Statistics tools.|
|
||||
@@ -199,7 +199,7 @@ Inputs you need to specify when using a code sample or demo application:
|
||||
|
||||
To perform sample inference, run the Image Classification code sample and Security Barrier Camera demo application that are automatically compiled when you run the Image Classification and Inference Pipeline demo scripts. The binary files are in the `C:\Users\<USER_ID>\Intel\OpenVINO\inference_engine_cpp_samples_build\intel64\Release` and `C:\Users\<USER_ID>\Intel\OpenVINO\inference_engine_demos_build\intel64\Release` directories, respectively.
|
||||
|
||||
You can also build all available sample code and demo applications from the source files delivered with the OpenVINO™ toolkit. To learn how to do this, see the instruction in the [Inference Engine Code Samples Overview](../IE_DG/Samples_Overview.md) and [Demo Applications Overview](@ref omz_demos_README) sections.
|
||||
You can also build all available sample code and demo applications from the source files delivered with the OpenVINO™ toolkit. To learn how to do this, see the instruction in the [Inference Engine Code Samples Overview](../IE_DG/Samples_Overview.md) and [Demo Applications Overview](@ref omz_demos) sections.
|
||||
|
||||
### <a name="download-models"></a> Step 1: Download the Models
|
||||
|
||||
@@ -209,7 +209,7 @@ You must have a model that is specific for you inference task. Example model typ
|
||||
- Custom (Often based on SSD)
|
||||
|
||||
Options to find a model suitable for the OpenVINO™ toolkit are:
|
||||
- Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using the [Model Downloader tool](@ref omz_tools_downloader_README).
|
||||
- Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using the [Model Downloader tool](@ref omz_tools_downloader).
|
||||
- Download from GitHub*, Caffe* Zoo, TensorFlow* Zoo, and other resources.
|
||||
- Train your own model.
|
||||
|
||||
@@ -425,7 +425,7 @@ classid probability label
|
||||
|
||||
### <a name="run-security-barrier"></a>Step 5: Run the Security Barrier Camera Demo Application
|
||||
|
||||
> **NOTE**: The Security Barrier Camera Demo Application is automatically compiled when you run the Inference Pipeline demo scripts. If you want to build it manually, see the instructions in the [Demo Applications Overview](@ref omz_demos_README) section.
|
||||
> **NOTE**: The Security Barrier Camera Demo Application is automatically compiled when you run the Inference Pipeline demo scripts. If you want to build it manually, see the instructions in the [Demo Applications Overview](@ref omz_demos) section.
|
||||
|
||||
To run the **Security Barrier Camera Demo Application** using an input image on the prepared IRs:
|
||||
|
||||
|
||||
@@ -44,7 +44,6 @@ To learn about what is *custom operation* and how to work with them in the Deep
|
||||
[](https://www.youtube.com/watch?v=Kl1ptVb7aI8)
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/Kl1ptVb7aI8" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
|
||||
|
||||
## Computer Vision with Intel
|
||||
|
||||
[](https://www.youtube.com/watch?v=FZZD4FCvO9c)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0109b9cbc2908f786f6593de335c725f8ce5c800f37a7d79369408cc47eb8471
|
||||
size 25725
|
||||
oid sha256:e14f77f61f12c96ccf302667d51348a1e03579679155199910e3ebdf7d6adf06
|
||||
size 37915
|
||||
|
||||
3
docs/img/performance_benchmarks_ovms_01.png
Normal file
3
docs/img/performance_benchmarks_ovms_01.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d86125db1e295334c04e92d0645c773f679d21bf52e25dce7c887fdf972b7a28
|
||||
size 19154
|
||||
3
docs/img/performance_benchmarks_ovms_02.png
Normal file
3
docs/img/performance_benchmarks_ovms_02.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bf8b156026d35b023e57c5cb3ea9136c93a819c1e2aa77be57d1619db4151065
|
||||
size 373890
|
||||
3
docs/img/throughput_ovms_3dunet.png
Normal file
3
docs/img/throughput_ovms_3dunet.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e5a472a62de53998194bc1471539139807e00cbb75fd9edc605e7ed99b5630af
|
||||
size 18336
|
||||
3
docs/img/throughput_ovms_bertlarge_fp32.png
Normal file
3
docs/img/throughput_ovms_bertlarge_fp32.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2f7c58da93fc7966e154bdade48d408401b097f4b0306b7c85aa4256ad72b59d
|
||||
size 18118
|
||||
3
docs/img/throughput_ovms_bertlarge_int8.png
Normal file
3
docs/img/throughput_ovms_bertlarge_int8.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:104d8cd5eac2d1714db85df9cba5c2cfcc113ec54d428cd6e979e75e10473be6
|
||||
size 17924
|
||||
3
docs/img/throughput_ovms_resnet50_fp32.png
Normal file
3
docs/img/throughput_ovms_resnet50_fp32.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3ad19ace847da73176f20f21052f9dd23fd65779f4e1027b2debdaf8fc772c00
|
||||
size 18735
|
||||
3
docs/img/throughput_ovms_resnet50_int8.png
Normal file
3
docs/img/throughput_ovms_resnet50_int8.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:32116d6d1acc20d8cb2fa10e290e052e3146ba1290f1c5e4aaf16a85388b6ec6
|
||||
size 19387
|
||||
@@ -19,7 +19,7 @@ The following diagram illustrates the typical OpenVINO™ workflow (click to see
|
||||
### Model Preparation, Conversion and Optimization
|
||||
|
||||
You can use your framework of choice to prepare and train a Deep Learning model or just download a pretrained model from the Open Model Zoo. The Open Model Zoo includes Deep Learning solutions to a variety of vision problems, including object recognition, face recognition, pose estimation, text detection, and action recognition, at a range of measured complexities.
|
||||
Several of these pretrained models are used also in the [code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos_README). To download models from the Open Model Zoo, the [Model Downloader](@ref omz_tools_downloader_README) tool is used.
|
||||
Several of these pretrained models are used also in the [code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos). To download models from the Open Model Zoo, the [Model Downloader](@ref omz_tools_downloader) tool is used.
|
||||
|
||||
One of the core component of the OpenVINO™ toolkit is the [Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) a cross-platform command-line
|
||||
tool that converts a trained neural network from its source framework to an open-source, nGraph-compatible [Intermediate Representation (IR)](MO_DG/IR_and_opsets.md) for use in inference operations. The Model Optimizer imports models trained in popular frameworks such as Caffe*, TensorFlow*, MXNet*, Kaldi*, and ONNX* and performs a few optimizations to remove excess layers and group operations when possible into simpler, faster graphs.
|
||||
@@ -27,16 +27,17 @@ tool that converts a trained neural network from its source framework to an open
|
||||
|
||||
If your neural network model contains layers that are not in the list of known layers for supported frameworks, you can adjust the conversion and optimization process through use of [Custom Layers](HOWTO/Custom_Layers_Guide.md).
|
||||
|
||||
Run the [Accuracy Checker utility](@ref omz_tools_accuracy_checker_README) either against source topologies or against the output representation to evaluate the accuracy of inference. The Accuracy Checker is also part of the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction), an integrated web-based performance analysis studio.
|
||||
Run the [Accuracy Checker utility](@ref omz_tools_accuracy_checker) either against source topologies or against the output representation to evaluate the accuracy of inference. The Accuracy Checker is also part of the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction), an integrated web-based performance analysis studio.
|
||||
|
||||
Useful documents for model optimization:
|
||||
* [Model Optimizer Developer Guide](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
* [Intermediate Representation and Opsets](MO_DG/IR_and_opsets.md)
|
||||
* [Custom Layers Guide](HOWTO/Custom_Layers_Guide.md)
|
||||
* [Accuracy Checker utility](@ref omz_tools_accuracy_checker_README)
|
||||
* [Accuracy Checker utility](@ref omz_tools_accuracy_checker)
|
||||
* [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction)
|
||||
* [Model Downloader](@ref omz_tools_downloader_README) utility
|
||||
* [Pretrained Models (Open Model Zoo)](@ref omz_models_public_index)
|
||||
* [Model Downloader](@ref omz_tools_downloader) utility
|
||||
* [Intel's Pretrained Models (Open Model Zoo)](@ref omz_models_group_intel)
|
||||
* [Public Pretrained Models (Open Model Zoo)](@ref omz_models_group_public)
|
||||
|
||||
### Running and Tuning Inference
|
||||
The other core component of OpenVINO™ is the [Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md), which manages the loading and compiling of the optimized neural network model, runs inference operations on input data, and outputs the results. Inference Engine can execute synchronously or asynchronously, and its plugin architecture manages the appropriate compilations for execution on multiple Intel® devices, including both workhorse CPUs and specialized graphics and video processing platforms (see below, Packaging and Deployment).
|
||||
@@ -46,7 +47,7 @@ You can use OpenVINO™ Tuning Utilities with the Inference Engine to trial and
|
||||
For a full browser-based studio integrating these other key tuning utilities, try the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction).
|
||||

|
||||
|
||||
OpenVINO™ toolkit includes a set of [inference code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos_README) showing how inference is run and output processed for use in retail environments, classrooms, smart camera applications, and other solutions.
|
||||
OpenVINO™ toolkit includes a set of [inference code samples](IE_DG/Samples_Overview.md) and [application demos](@ref omz_demos) showing how inference is run and output processed for use in retail environments, classrooms, smart camera applications, and other solutions.
|
||||
|
||||
OpenVINO also makes use of open-Source and Intel™ tools for traditional graphics processing and performance management. Intel® Media SDK supports accelerated rich-media processing, including transcoding. OpenVINO™ optimizes calls to the rich OpenCV and OpenVX libraries for processing computer vision workloads. And the new DL Streamer integration further accelerates video pipelining and performance.
|
||||
|
||||
@@ -54,7 +55,7 @@ Useful documents for inference tuning:
|
||||
* [Inference Engine Developer Guide](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
|
||||
* [Inference Engine API References](./api_references.html)
|
||||
* [Inference Code Samples](IE_DG/Samples_Overview.md)
|
||||
* [Application Demos](@ref omz_demos_README)
|
||||
* [Application Demos](@ref omz_demos)
|
||||
* [Post-Training Optimization Tool Guide](@ref pot_README)
|
||||
* [Deep Learning Workbench Guide](@ref workbench_docs_Workbench_DG_Introduction)
|
||||
* [Intel Media SDK](https://github.com/Intel-Media-SDK/MediaSDK)
|
||||
@@ -82,15 +83,15 @@ The Inference Engine's plug-in architecture can be extended to meet other specia
|
||||
Intel® Distribution of OpenVINO™ toolkit includes the following components:
|
||||
|
||||
- [Deep Learning Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) - A cross-platform command-line tool for importing models and preparing them for optimal execution with the Inference Engine. The Model Optimizer imports, converts, and optimizes models, which were trained in popular frameworks, such as Caffe*, TensorFlow*, MXNet*, Kaldi*, and ONNX*.
|
||||
- [Deep Learning Inference Engine](IE_DG/inference_engine_intro.md) - A unified API to allow high performance inference on many hardware types including Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, Intel® Vision Accelerator Design with Intel® Movidius™ vision processing unit (VPU).
|
||||
- [Deep Learning Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) - A unified API to allow high performance inference on many hardware types including Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, Intel® Vision Accelerator Design with Intel® Movidius™ vision processing unit (VPU).
|
||||
- [Inference Engine Samples](IE_DG/Samples_Overview.md) - A set of simple console applications demonstrating how to use the Inference Engine in your applications.
|
||||
- [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) - A web-based graphical environment that allows you to easily use various sophisticated OpenVINO™ toolkit components.
|
||||
- [Post-Training Optimization tool](@ref pot_README) - A tool to calibrate a model and then execute it in the INT8 precision.
|
||||
- Additional Tools - A set of tools to work with your models including [Benchmark App](../inference-engine/tools/benchmark_tool/README.md), [Cross Check Tool](../inference-engine/tools/cross_check_tool/README.md), [Compile tool](../inference-engine/tools/compile_tool/README.md).
|
||||
- [Open Model Zoo](@ref omz_models_intel_index)
|
||||
- [Demos](@ref omz_demos_README) - Console applications that provide robust application templates to help you implement specific deep learning scenarios.
|
||||
- Additional Tools - A set of tools to work with your models including [Accuracy Checker Utility](@ref omz_tools_accuracy_checker_README) and [Model Downloader](@ref omz_tools_downloader_README).
|
||||
- [Documentation for Pretrained Models](@ref omz_models_intel_index) - Documentation for pretrained models that are available in the [Open Model Zoo repository](https://github.com/opencv/open_model_zoo).
|
||||
- [Open Model Zoo](@ref omz_models_group_intel)
|
||||
- [Demos](@ref omz_demos) - Console applications that provide robust application templates to help you implement specific deep learning scenarios.
|
||||
- Additional Tools - A set of tools to work with your models including [Accuracy Checker Utility](@ref omz_tools_accuracy_checker) and [Model Downloader](@ref omz_tools_downloader).
|
||||
- [Documentation for Pretrained Models](@ref omz_models_group_intel) - Documentation for pretrained models that are available in the [Open Model Zoo repository](https://github.com/opencv/open_model_zoo).
|
||||
- Deep Learning Streamer (DL Streamer) – Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. DL Streamer can be installed by the Intel® Distribution of OpenVINO™ toolkit installer. Its open source version is available on [GitHub](https://github.com/opencv/gst-video-analytics). For the DL Streamer documentation, see:
|
||||
- [DL Streamer Samples](@ref gst_samples_README)
|
||||
- [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/)
|
||||
|
||||
@@ -6,6 +6,31 @@ This guide provides installation steps for Intel® Distribution of OpenVINO™ t
|
||||
|
||||
> **NOTE**: Intel® Graphics Compute Runtime for OpenCL™ is not a part of OpenVINO™ APT distribution. You can install it from the [Intel® Graphics Compute Runtime for OpenCL™ GitHub repo](https://github.com/intel/compute-runtime).
|
||||
|
||||
## Included with Runtime Package
|
||||
|
||||
The following components are installed with the OpenVINO runtime package:
|
||||
|
||||
| Component | Description|
|
||||
|-----------|------------|
|
||||
| [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)| The engine that runs a deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
|
||||
| [OpenCV*](https://docs.opencv.org/master/) | OpenCV* community version compiled for Intel® hardware. |
|
||||
| Deep Learning Streamer (DL Streamer) | Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. For the DL Streamer documentation, see [DL Streamer Samples](@ref gst_samples_README), [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/), [Elements](https://github.com/opencv/gst-video-analytics/wiki/Elements), [Tutorial](https://github.com/opencv/gst-video-analytics/wiki/DL%20Streamer%20Tutorial). |
|
||||
|
||||
## Included with Developer Package
|
||||
|
||||
The following components are installed with the OpenVINO developer package:
|
||||
|
||||
| Component | Description|
|
||||
|-----------|------------|
|
||||
| [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) | This tool imports, converts, and optimizes models that were trained in popular frameworks to a format usable by Intel tools, especially the Inference Engine. <br>Popular frameworks include Caffe\*, TensorFlow\*, MXNet\*, and ONNX\*. |
|
||||
| [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) | The engine that runs a deep learning model. It includes a set of libraries for an easy inference integration into your applications.|
|
||||
| [OpenCV*](https://docs.opencv.org/master/) | OpenCV\* community version compiled for Intel® hardware |
|
||||
| [Sample Applications](../IE_DG/Samples_Overview.md) | A set of simple console applications demonstrating how to use the Inference Engine in your applications. |
|
||||
| [Demo Applications](@ref omz_demos) | A set of console applications that demonstrate how you can use the Inference Engine in your applications to solve specific use cases. |
|
||||
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader) and other |
|
||||
| [Documentation for Pre-Trained Models ](@ref omz_models_group_intel) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo). |
|
||||
| Deep Learning Streamer (DL Streamer) | Streaming analytics framework, based on GStreamer\*, for constructing graphs of media analytics components. For the DL Streamer documentation, see [DL Streamer Samples](@ref gst_samples_README), [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/), [Elements](https://github.com/opencv/gst-video-analytics/wiki/Elements), [Tutorial](https://github.com/opencv/gst-video-analytics/wiki/DL%20Streamer%20Tutorial). |
|
||||
|
||||
## Set up the Repository
|
||||
### Install the GPG key for the repository
|
||||
|
||||
@@ -76,7 +101,7 @@ apt-cache search openvino
|
||||
## Install the runtime or developer packages using the APT Package Manager
|
||||
Intel® OpenVINO will be installed in: `/opt/intel/openvino_<VERSION>.<UPDATE>.<BUILD_NUM>`
|
||||
|
||||
A symlink will be created: `/opt/intel/openvino`
|
||||
A symlink will be created: `/opt/intel/openvino_<VERSION>`
|
||||
|
||||
---
|
||||
### To Install a specific version
|
||||
|
||||
@@ -10,8 +10,8 @@ This guide provides the steps for creating a Docker* image with Intel® Distribu
|
||||
|
||||
- Ubuntu\* 18.04 long-term support (LTS), 64-bit
|
||||
- Ubuntu\* 20.04 long-term support (LTS), 64-bit
|
||||
- CentOS\* 7
|
||||
- RHEL\* 8
|
||||
- CentOS\* 7.6
|
||||
- Red Hat* Enterprise Linux* 8.2 (64 bit)
|
||||
|
||||
**Host Operating Systems**
|
||||
|
||||
@@ -21,7 +21,8 @@ This guide provides the steps for creating a Docker* image with Intel® Distribu
|
||||
|
||||
Prebuilt images are available on:
|
||||
- [Docker Hub](https://hub.docker.com/u/openvino)
|
||||
- [Quay.io](https://quay.io/organization/openvino)
|
||||
- [Red Hat* Quay.io](https://quay.io/organization/openvino)
|
||||
- [Red Hat* Ecosystem Catalog](https://catalog.redhat.com/software/containers/intel/openvino-runtime/606ff4d7ecb5241699188fb3)
|
||||
|
||||
## Use Docker* Image for CPU
|
||||
|
||||
@@ -143,7 +144,7 @@ RUN /bin/mkdir -p '/usr/local/lib' && \
|
||||
|
||||
WORKDIR /opt/libusb-1.0.22/
|
||||
RUN /usr/bin/install -c -m 644 libusb-1.0.pc '/usr/local/lib/pkgconfig' && \
|
||||
cp /opt/intel/openvino/deployment_tools/inference_engine/external/97-myriad-usbboot.rules /etc/udev/rules.d/ && \
|
||||
cp /opt/intel/openvino_2021/deployment_tools/inference_engine/external/97-myriad-usbboot.rules /etc/udev/rules.d/ && \
|
||||
ldconfig
|
||||
```
|
||||
- **CentOS 7**:
|
||||
@@ -174,11 +175,11 @@ RUN /bin/mkdir -p '/usr/local/lib' && \
|
||||
/bin/mkdir -p '/usr/local/include/libusb-1.0' && \
|
||||
/usr/bin/install -c -m 644 libusb.h '/usr/local/include/libusb-1.0' && \
|
||||
/bin/mkdir -p '/usr/local/lib/pkgconfig' && \
|
||||
printf "\nexport LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:/usr/local/lib\n" >> /opt/intel/openvino/bin/setupvars.sh
|
||||
printf "\nexport LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:/usr/local/lib\n" >> /opt/intel/openvino_2021/bin/setupvars.sh
|
||||
|
||||
WORKDIR /opt/libusb-1.0.22/
|
||||
RUN /usr/bin/install -c -m 644 libusb-1.0.pc '/usr/local/lib/pkgconfig' && \
|
||||
cp /opt/intel/openvino/deployment_tools/inference_engine/external/97-myriad-usbboot.rules /etc/udev/rules.d/ && \
|
||||
cp /opt/intel/openvino_2021/deployment_tools/inference_engine/external/97-myriad-usbboot.rules /etc/udev/rules.d/ && \
|
||||
ldconfig
|
||||
```
|
||||
2. Run the Docker* image:
|
||||
|
||||
@@ -11,9 +11,9 @@ For Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, the followi
|
||||
|
||||
1. Set the environment variables:
|
||||
```sh
|
||||
source /opt/intel/openvino/bin/setupvars.sh
|
||||
source /opt/intel/openvino_2021/bin/setupvars.sh
|
||||
```
|
||||
> **NOTE**: The `HDDL_INSTALL_DIR` variable is set to `<openvino_install_dir>/deployment_tools/inference_engine/external/hddl`. If you installed the Intel® Distribution of OpenVINO™ to the default install directory, the `HDDL_INSTALL_DIR` was set to `/opt/intel/openvino//deployment_tools/inference_engine/external/hddl`.
|
||||
> **NOTE**: The `HDDL_INSTALL_DIR` variable is set to `<openvino_install_dir>/deployment_tools/inference_engine/external/hddl`. If you installed the Intel® Distribution of OpenVINO™ to the default install directory, the `HDDL_INSTALL_DIR` was set to `/opt/intel/openvino_2021//deployment_tools/inference_engine/external/hddl`.
|
||||
|
||||
2. Install dependencies:
|
||||
```sh
|
||||
@@ -30,7 +30,7 @@ Now the dependencies are installed and you are ready to use the Intel® Vision A
|
||||
|
||||
## Optional Steps
|
||||
|
||||
* For advanced configuration steps for your IEI Mustang-V100-MX8 accelerator, see [Intel® Movidius™ VPUs Setup Guide for Use with Intel® Distribution of OpenVINO™ toolkit](movidius-setup-guide.md).
|
||||
* For advanced configuration steps for your **IEI Mustang-V100-MX8-R10** accelerator, see [Intel® Movidius™ VPUs Setup Guide for Use with Intel® Distribution of OpenVINO™ toolkit](movidius-setup-guide.md). **IEI Mustang-V100-MX8-R11** accelerator doesn't require any additional steps.
|
||||
|
||||
* After you've configured your Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, see [Intel® Movidius™ VPUs Programming Guide for Use with Intel® Distribution of OpenVINO™ toolkit](movidius-programming-guide.md) to learn how to distribute a model across all 8 VPUs to maximize performance.
|
||||
|
||||
@@ -52,7 +52,7 @@ E: [ncAPI] [ 965618] [MainThread] ncDeviceOpen:677 Failed to find a device,
|
||||
```sh
|
||||
kill -9 $(pidof hddldaemon autoboot)
|
||||
pidof hddldaemon autoboot # Make sure none of them is alive
|
||||
source /opt/intel/openvino/bin/setupvars.sh
|
||||
source /opt/intel/openvino_2021/bin/setupvars.sh
|
||||
${HDDL_INSTALL_DIR}/bin/bsl_reset
|
||||
```
|
||||
|
||||
|
||||
@@ -22,24 +22,24 @@ The Intel® Distribution of OpenVINO™ toolkit for Linux\*:
|
||||
| Component | Description |
|
||||
|-----------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) | This tool imports, converts, and optimizes models that were trained in popular frameworks to a format usable by Intel tools, especially the Inference Engine. <br>Popular frameworks include Caffe\*, TensorFlow\*, MXNet\*, and ONNX\*. |
|
||||
| [Inference Engine](../IE_DG/inference_engine_intro.md) | This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
|
||||
| [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) | This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
|
||||
| Intel® Media SDK | Offers access to hardware accelerated video codecs and frame processing |
|
||||
| [OpenCV](https://docs.opencv.org/master/) | OpenCV\* community version compiled for Intel® hardware |
|
||||
| [Inference Engine Code Samples](../IE_DG/Samples_Overview.md) | A set of simple console applications demonstrating how to utilize specific OpenVINO capabilities in an application and how to perform specific tasks, such as loading a model, running inference, querying specific device capabilities, and more. |
|
||||
| [Demo Applications](@ref omz_demos_README) | A set of simple console applications that provide robust application templates to help you implement specific deep learning scenarios. |
|
||||
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker_README), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader_README) and other |
|
||||
| [Documentation for Pre-Trained Models ](@ref omz_models_intel_index) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo). |
|
||||
| [Demo Applications](@ref omz_demos) | A set of simple console applications that provide robust application templates to help you implement specific deep learning scenarios. |
|
||||
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader) and other |
|
||||
| [Documentation for Pre-Trained Models ](@ref omz_models_group_intel) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo). |
|
||||
| Deep Learning Streamer (DL Streamer) | Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. For the DL Streamer documentation, see [DL Streamer Samples](@ref gst_samples_README), [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/), [Elements](https://github.com/opencv/gst-video-analytics/wiki/Elements), [Tutorial](https://github.com/opencv/gst-video-analytics/wiki/DL%20Streamer%20Tutorial). |
|
||||
|
||||
**Could Be Optionally Installed**
|
||||
|
||||
[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture
|
||||
configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components:
|
||||
* [Model Downloader](@ref omz_tools_downloader_README)
|
||||
* [Intel® Open Model Zoo](@ref omz_models_intel_index)
|
||||
* [Model Downloader](@ref omz_tools_downloader)
|
||||
* [Intel® Open Model Zoo](@ref omz_models_group_intel)
|
||||
* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
* [Post-training Optimization Tool](@ref pot_README)
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker_README)
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker)
|
||||
* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md)
|
||||
|
||||
Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started.
|
||||
@@ -49,7 +49,6 @@ Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_I
|
||||
**Hardware**
|
||||
|
||||
* 6th to 11th generation Intel® Core™ processors and Intel® Xeon® processors
|
||||
* Intel® Xeon® processor E family (formerly code named Sandy Bridge, Ivy Bridge, Haswell, and Broadwell)
|
||||
* 3rd generation Intel® Xeon® Scalable processor (formerly code named Cooper Lake)
|
||||
* Intel® Xeon® Scalable processor (formerly Skylake and Cascade Lake)
|
||||
* Intel Atom® processor with support for Intel® Streaming SIMD Extensions 4.1 (Intel® SSE4.1)
|
||||
@@ -67,6 +66,7 @@ Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_I
|
||||
**Operating Systems**
|
||||
|
||||
- Ubuntu 18.04.x long-term support (LTS), 64-bit
|
||||
- Ubuntu 20.04.0 long-term support (LTS), 64-bit
|
||||
- CentOS 7.6, 64-bit (for target only)
|
||||
- Yocto Project v3.0, 64-bit (for target only and requires modifications)
|
||||
|
||||
@@ -415,7 +415,7 @@ trusted-host = mirrors.aliyun.com
|
||||
- [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
|
||||
- [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md).
|
||||
- For more information on Sample Applications, see the [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md).
|
||||
- For information on a set of pre-trained models, see the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index)
|
||||
- For information on a set of pre-trained models, see the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_group_intel)
|
||||
- For IoT Libraries and Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit).
|
||||
|
||||
To learn more about converting models, go to:
|
||||
|
||||
@@ -24,22 +24,22 @@ The following components are installed by default:
|
||||
| Component | Description |
|
||||
| :-------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) | This tool imports, converts, and optimizes models, which were trained in popular frameworks, to a format usable by Intel tools, especially the Inference Engine. <br> Popular frameworks include Caffe*, TensorFlow*, MXNet\*, and ONNX\*. |
|
||||
| [Inference Engine](../IE_DG/inference_engine_intro.md) | This is the engine that runs a deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
|
||||
| [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) | This is the engine that runs a deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
|
||||
| [OpenCV\*](https://docs.opencv.org/master/) | OpenCV\* community version compiled for Intel® hardware |
|
||||
| [Sample Applications](../IE_DG/Samples_Overview.md) | A set of simple console applications demonstrating how to use the Inference Engine in your applications. |
|
||||
| [Demos](@ref omz_demos_README) | A set of console applications that demonstrate how you can use the Inference Engine in your applications to solve specific use-cases |
|
||||
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker_README), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader_README) and other |
|
||||
| [Documentation for Pre-Trained Models ](@ref omz_models_intel_index) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo) |
|
||||
| [Demos](@ref omz_demos) | A set of console applications that demonstrate how you can use the Inference Engine in your applications to solve specific use-cases |
|
||||
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader) and other |
|
||||
| [Documentation for Pre-Trained Models ](@ref omz_models_group_intel) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo) |
|
||||
|
||||
**Could Be Optionally Installed**
|
||||
|
||||
[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture
|
||||
configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components:
|
||||
* [Model Downloader](@ref omz_tools_downloader_README)
|
||||
* [Intel® Open Model Zoo](@ref omz_models_intel_index)
|
||||
* [Model Downloader](@ref omz_tools_downloader)
|
||||
* [Intel® Open Model Zoo](@ref omz_models_group_intel)
|
||||
* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
* [Post-training Optimization Tool](@ref pot_README)
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker_README)
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker)
|
||||
* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md)
|
||||
|
||||
Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started.
|
||||
@@ -53,7 +53,6 @@ The development and target platforms have the same requirements, but you can sel
|
||||
> **NOTE**: The current version of the Intel® Distribution of OpenVINO™ toolkit for macOS* supports inference on Intel CPUs and Intel® Neural Compute Sticks 2 only.
|
||||
|
||||
* 6th to 11th generation Intel® Core™ processors and Intel® Xeon® processors
|
||||
* Intel® Xeon® processor E family (formerly code named Sandy Bridge, Ivy Bridge, Haswell, and Broadwell)
|
||||
* 3rd generation Intel® Xeon® Scalable processor (formerly code named Cooper Lake)
|
||||
* Intel® Xeon® Scalable processor (formerly Skylake and Cascade Lake)
|
||||
* Intel® Neural Compute Stick 2
|
||||
@@ -280,7 +279,7 @@ Follow the steps below to uninstall the Intel® Distribution of OpenVINO™ Tool
|
||||
|
||||
- To learn more about the verification applications, see `README.txt` in `/opt/intel/openvino_2021/deployment_tools/demo/`.
|
||||
|
||||
- For detailed description of the pre-trained models, go to the [Overview of OpenVINO toolkit Pre-Trained Models](@ref omz_models_intel_index) page.
|
||||
- For detailed description of the pre-trained models, go to the [Overview of OpenVINO toolkit Pre-Trained Models](@ref omz_models_group_intel) page.
|
||||
|
||||
- More information on [sample applications](../IE_DG/Samples_Overview.md).
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ The OpenVINO toolkit for Raspbian OS is an archive with pre-installed header fil
|
||||
|
||||
| Component | Description |
|
||||
| :-------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| [Inference Engine](../IE_DG/inference_engine_intro.md) | This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
|
||||
| [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) | This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
|
||||
| [OpenCV\*](https://docs.opencv.org/master/) | OpenCV\* community version compiled for Intel® hardware. |
|
||||
| [Sample Applications](../IE_DG/Samples_Overview.md) | A set of simple console applications demonstrating how to use Intel's Deep Learning Inference Engine in your applications. |
|
||||
|
||||
@@ -94,12 +94,12 @@ CMake is installed. Continue to the next section to set the environment variable
|
||||
|
||||
You must update several environment variables before you can compile and run OpenVINO toolkit applications. Run the following script to temporarily set the environment variables:
|
||||
```sh
|
||||
source /opt/intel/openvino/bin/setupvars.sh
|
||||
source /opt/intel/openvino_2021/bin/setupvars.sh
|
||||
```
|
||||
|
||||
**(Optional)** The OpenVINO environment variables are removed when you close the shell. As an option, you can permanently set the environment variables as follows:
|
||||
```sh
|
||||
echo "source /opt/intel/openvino/bin/setupvars.sh" >> ~/.bashrc
|
||||
echo "source /opt/intel/openvino_2021/bin/setupvars.sh" >> ~/.bashrc
|
||||
```
|
||||
|
||||
To test your change, open a new terminal. You will see the following:
|
||||
@@ -118,11 +118,11 @@ Continue to the next section to add USB rules for Intel® Neural Compute Stick 2
|
||||
Log out and log in for it to take effect.
|
||||
2. If you didn't modify `.bashrc` to permanently set the environment variables, run `setupvars.sh` again after logging in:
|
||||
```sh
|
||||
source /opt/intel/openvino/bin/setupvars.sh
|
||||
source /opt/intel/openvino_2021/bin/setupvars.sh
|
||||
```
|
||||
3. To perform inference on the Intel® Neural Compute Stick 2, install the USB rules running the `install_NCS_udev_rules.sh` script:
|
||||
```sh
|
||||
sh /opt/intel/openvino/install_dependencies/install_NCS_udev_rules.sh
|
||||
sh /opt/intel/openvino_2021/install_dependencies/install_NCS_udev_rules.sh
|
||||
```
|
||||
4. Plug in your Intel® Neural Compute Stick 2.
|
||||
|
||||
@@ -138,14 +138,13 @@ Follow the next steps to run pre-trained Face Detection network using Inference
|
||||
```
|
||||
2. Build the Object Detection Sample:
|
||||
```sh
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-march=armv7-a" /opt/intel/openvino/deployment_tools/inference_engine/samples/cpp
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-march=armv7-a" /opt/intel/openvino_2021/deployment_tools/inference_engine/samples/cpp
|
||||
```
|
||||
|
||||
```sh
|
||||
make -j2 object_detection_sample_ssd
|
||||
```
|
||||
3. Download the pre-trained Face Detection model with the Model Downloader or copy it from the host machine:
|
||||
```sh
|
||||
```sh
|
||||
git clone --depth 1 https://github.com/openvinotoolkit/open_model_zoo
|
||||
cd open_model_zoo/tools/downloader
|
||||
python3 -m pip install -r requirements.in
|
||||
@@ -165,9 +164,9 @@ Read the next topic if you want to learn more about OpenVINO workflow for Raspbe
|
||||
|
||||
If you want to use your model for inference, the model must be converted to the .bin and .xml Intermediate Representation (IR) files that are used as input by Inference Engine. OpenVINO™ toolkit support on Raspberry Pi only includes the Inference Engine module of the Intel® Distribution of OpenVINO™ toolkit. The Model Optimizer is not supported on this platform. To get the optimized models you can use one of the following options:
|
||||
|
||||
* Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using [Model Downloader tool](@ref omz_tools_downloader_README).
|
||||
* Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using [Model Downloader tool](@ref omz_tools_downloader).
|
||||
|
||||
For more information on pre-trained models, see [Pre-Trained Models Documentation](@ref omz_models_intel_index)
|
||||
For more information on pre-trained models, see [Pre-Trained Models Documentation](@ref omz_models_group_intel)
|
||||
|
||||
* Convert the model using the Model Optimizer from a full installation of Intel® Distribution of OpenVINO™ toolkit on one of the supported platforms. Installation instructions are available:
|
||||
|
||||
|
||||
@@ -16,11 +16,10 @@ Your installation is complete when these are all completed:
|
||||
|
||||
2. Install the dependencies:
|
||||
|
||||
- [Microsoft Visual Studio* with C++ **2019 or 2017** with MSBuild](http://visualstudio.microsoft.com/downloads/)
|
||||
- [CMake **3.10 or higher** 64-bit](https://cmake.org/download/)
|
||||
> **NOTE**: If you want to use Microsoft Visual Studio 2019, you are required to install CMake 3.14.
|
||||
- [Microsoft Visual Studio* 2019 with MSBuild](http://visualstudio.microsoft.com/downloads/)
|
||||
- [CMake 3.14 or higher 64-bit](https://cmake.org/download/)
|
||||
- [Python **3.6** - **3.8** 64-bit](https://www.python.org/downloads/windows/)
|
||||
> **IMPORTANT**: As part of this installation, make sure you click the option to add the application to your `PATH` environment variable.
|
||||
> **IMPORTANT**: As part of this installation, make sure you click the option **[Add Python 3.x to PATH](https://docs.python.org/3/using/windows.html#installation-steps)** to add Python to your `PATH` environment variable.
|
||||
|
||||
3. <a href="#set-the-environment-variables">Set Environment Variables</a>
|
||||
|
||||
@@ -58,22 +57,22 @@ The following components are installed by default:
|
||||
| Component | Description |
|
||||
|:---------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
|[Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) |This tool imports, converts, and optimizes models that were trained in popular frameworks to a format usable by Intel tools, especially the Inference Engine.<br><strong>NOTE</strong>: Popular frameworks include such frameworks as Caffe\*, TensorFlow\*, MXNet\*, and ONNX\*. |
|
||||
|[Inference Engine](../IE_DG/inference_engine_intro.md) |This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
|
||||
|[Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) |This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
|
||||
|[OpenCV\*](https://docs.opencv.org/master/) |OpenCV* community version compiled for Intel® hardware |
|
||||
|[Inference Engine Samples](../IE_DG/Samples_Overview.md) |A set of simple console applications demonstrating how to use Intel's Deep Learning Inference Engine in your applications. |
|
||||
| [Demos](@ref omz_demos_README) | A set of console applications that demonstrate how you can use the Inference Engine in your applications to solve specific use-cases |
|
||||
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker_README), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader_README) and other |
|
||||
| [Documentation for Pre-Trained Models ](@ref omz_models_intel_index) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo) |
|
||||
| [Demos](@ref omz_demos) | A set of console applications that demonstrate how you can use the Inference Engine in your applications to solve specific use-cases |
|
||||
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader) and other |
|
||||
| [Documentation for Pre-Trained Models ](@ref omz_models_group_intel) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo) |
|
||||
|
||||
**Could Be Optionally Installed**
|
||||
|
||||
[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture
|
||||
configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components:
|
||||
* [Model Downloader](@ref omz_tools_downloader_README)
|
||||
* [Intel® Open Model Zoo](@ref omz_models_intel_index)
|
||||
* [Model Downloader](@ref omz_tools_downloader)
|
||||
* [Intel® Open Model Zoo](@ref omz_models_group_intel)
|
||||
* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
* [Post-training Optimization Tool](@ref pot_README)
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker_README)
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker)
|
||||
* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md)
|
||||
|
||||
Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started.
|
||||
@@ -83,7 +82,6 @@ Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_I
|
||||
**Hardware**
|
||||
|
||||
* 6th to 11th generation Intel® Core™ processors and Intel® Xeon® processors
|
||||
* Intel® Xeon® processor E family (formerly code named Sandy Bridge, Ivy Bridge, Haswell, and Broadwell)
|
||||
* 3rd generation Intel® Xeon® Scalable processor (formerly code named Cooper Lake)
|
||||
* Intel® Xeon® Scalable processor (formerly Skylake and Cascade Lake)
|
||||
* Intel Atom® processor with support for Intel® Streaming SIMD Extensions 4.1 (Intel® SSE4.1)
|
||||
@@ -134,12 +132,9 @@ The screen example below indicates you are missing two dependencies:
|
||||
|
||||
You must update several environment variables before you can compile and run OpenVINO™ applications. Open the Command Prompt, and run the `setupvars.bat` batch file to temporarily set your environment variables:
|
||||
```sh
|
||||
cd C:\Program Files (x86)\Intel\openvino_2021\bin\
|
||||
```
|
||||
|
||||
```sh
|
||||
setupvars.bat
|
||||
"C:\Program Files (x86)\Intel\openvino_2021\bin\setupvars.bat"
|
||||
```
|
||||
> **IMPORTANT**: Windows PowerShell* is not recommended to run the configuration commands, please use the Command Prompt instead.
|
||||
|
||||
<strong>(Optional)</strong>: OpenVINO toolkit environment variables are removed when you close the Command Prompt window. As an option, you can permanently set the environment variables manually.
|
||||
|
||||
@@ -314,7 +309,7 @@ Use these steps to update your Windows `PATH` if a command you execute returns a
|
||||
|
||||
5. If you need to add CMake to the `PATH`, browse to the directory in which you installed CMake. The default directory is `C:\Program Files\CMake`.
|
||||
|
||||
6. If you need to add Python to the `PATH`, browse to the directory in which you installed Python. The default directory is `C:\Users\<USER_ID>\AppData\Local\Programs\Python\Python36\Python`.
|
||||
6. If you need to add Python to the `PATH`, browse to the directory in which you installed Python. The default directory is `C:\Users\<USER_ID>\AppData\Local\Programs\Python\Python36\Python`. Note that the `AppData` folder is hidden by default. To view hidden files and folders, see the [Windows 10 instructions](https://support.microsoft.com/en-us/windows/view-hidden-files-and-folders-in-windows-10-97fbc472-c603-9d90-91d0-1166d1d9f4b5).
|
||||
|
||||
7. Click **OK** repeatedly to close each screen.
|
||||
|
||||
@@ -350,11 +345,11 @@ To learn more about converting deep learning models, go to:
|
||||
|
||||
- [Intel Distribution of OpenVINO Toolkit home page](https://software.intel.com/en-us/openvino-toolkit)
|
||||
- [OpenVINO™ Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
|
||||
- [Introduction to Inference Engine](../IE_DG/inference_engine_intro.md)
|
||||
- [Introduction to Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
|
||||
- [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
|
||||
- [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
- [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md)
|
||||
- [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index)
|
||||
- [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_group_intel)
|
||||
- [Intel® Neural Compute Stick 2 Get Started](https://software.intel.com/en-us/neural-compute-stick/get-started)
|
||||
|
||||
|
||||
|
||||
@@ -1,30 +1,28 @@
|
||||
# Create a Yocto* Image with OpenVINO™ toolkit {#openvino_docs_install_guides_installing_openvino_yocto}
|
||||
This document provides instructions for creating a Yocto* image with OpenVINO™ toolkit.
|
||||
|
||||
Instructions were validated and tested for [Yocto OpenVINO 2020.4 release](http://git.yoctoproject.org/cgit/cgit.cgi/meta-intel).
|
||||
# Create a Yocto Image with Intel® Distribution of OpenVINO™ toolkit {#openvino_docs_install_guides_installing_openvino_yocto}
|
||||
This document provides instructions for creating a Yocto image with Intel® Distribution of OpenVINO™ toolkit.
|
||||
|
||||
## System Requirements
|
||||
Use the [Yocto Project* official documentation](https://www.yoctoproject.org/docs/latest/mega-manual/mega-manual.html#brief-compatible-distro) to set up and configure your host machine to be compatible with BitBake*.
|
||||
Use the [Yocto Project official documentation](https://docs.yoctoproject.org/brief-yoctoprojectqs/index.html#compatible-linux-distribution) to set up and configure your host machine to be compatible with BitBake.
|
||||
|
||||
## Setup
|
||||
## Step 1: Set Up Environment
|
||||
|
||||
### Set up Git repositories
|
||||
### Set Up Git Repositories
|
||||
The following Git repositories are required to build a Yocto image:
|
||||
|
||||
- [Poky](https://www.yoctoproject.org/docs/latest/mega-manual/mega-manual.html#poky)
|
||||
- [Meta-intel](http://git.yoctoproject.org/cgit/cgit.cgi/meta-intel/tree/README)
|
||||
- [Poky](https://git.yoctoproject.org/poky)
|
||||
- [Meta-intel](https://git.yoctoproject.org/meta-intel/tree/README)
|
||||
- [Meta-openembedded](http://cgit.openembedded.org/meta-openembedded/tree/README)
|
||||
- <a href="https://github.com/kraj/meta-clang/blob/master/README.md">Meta-clang</a>
|
||||
|
||||
Clone these Git repositories to your host machine:
|
||||
```sh
|
||||
git clone https://git.yoctoproject.org/git/poky
|
||||
git clone https://git.yoctoproject.org/git/meta-intel
|
||||
git clone https://git.openembedded.org/meta-openembedded
|
||||
git clone https://github.com/kraj/meta-clang.git
|
||||
git clone https://git.yoctoproject.org/git/poky --branch hardknott
|
||||
git clone https://git.yoctoproject.org/git/meta-intel --branch hardknott
|
||||
git clone https://git.openembedded.org/meta-openembedded --branch hardknott
|
||||
git clone https://github.com/kraj/meta-clang.git --branch hardknott
|
||||
```
|
||||
|
||||
### Set up BitBake* Layers
|
||||
### Set up BitBake Layers
|
||||
|
||||
```sh
|
||||
source poky/oe-init-build-env
|
||||
@@ -36,7 +34,7 @@ bitbake-layers add-layer ../meta-clang
|
||||
|
||||
### Set up BitBake Configurations
|
||||
|
||||
Include extra configuration in conf/local.conf in your build directory as required.
|
||||
Include extra configuration in `conf/local.conf` in your build directory as required.
|
||||
|
||||
```sh
|
||||
# Build with SSE4.2, AVX2 etc. extensions
|
||||
@@ -67,22 +65,22 @@ CORE_IMAGE_EXTRA_INSTALL_append = " openvino-inference-engine-vpu-firmware"
|
||||
CORE_IMAGE_EXTRA_INSTALL_append = " openvino-model-optimizer"
|
||||
```
|
||||
|
||||
## Build a Yocto Image with OpenVINO Packages
|
||||
## Step 2: Build a Yocto Image with OpenVINO Packages
|
||||
|
||||
Run BitBake to build the minimal image with OpenVINO packages:
|
||||
Run BitBake to build your image with OpenVINO packages. To build the minimal image, for example, run:
|
||||
```sh
|
||||
bitbake core-image-minimal
|
||||
```
|
||||
|
||||
## Verify the Created Yocto Image with OpenVINO Packages
|
||||
## Step 3: Verify the Yocto Image with OpenVINO Packages
|
||||
|
||||
Verify that OpenVINO packages were built successfully.
|
||||
Run 'oe-pkgdata-util list-pkgs | grep openvino' command.
|
||||
Run the following command:
|
||||
```sh
|
||||
oe-pkgdata-util list-pkgs | grep openvino
|
||||
```
|
||||
|
||||
Verify that it returns the list of packages below:
|
||||
If the image was built successfully, it will return the list of packages as below:
|
||||
```sh
|
||||
openvino-inference-engine
|
||||
openvino-inference-engine-dbg
|
||||
@@ -90,7 +88,6 @@ openvino-inference-engine-dev
|
||||
openvino-inference-engine-python3
|
||||
openvino-inference-engine-samples
|
||||
openvino-inference-engine-src
|
||||
openvino-inference-engine-staticdev
|
||||
openvino-inference-engine-vpu-firmware
|
||||
openvino-model-optimizer
|
||||
openvino-model-optimizer-dbg
|
||||
|
||||
@@ -6,6 +6,18 @@ This guide provides installation steps for the Intel® Distribution of OpenVINO
|
||||
|
||||
> **NOTE**: Intel® Graphics Compute Runtime for OpenCL™ is not a part of OpenVINO™ YUM distribution. You can install it from the [Intel® Graphics Compute Runtime for OpenCL™ GitHub repo](https://github.com/intel/compute-runtime).
|
||||
|
||||
> **NOTE**: Only runtime packages are available via the YUM repository.
|
||||
|
||||
## Included with Runtime Package
|
||||
|
||||
The following components are installed with the OpenVINO runtime package:
|
||||
|
||||
| Component | Description|
|
||||
|-----------|------------|
|
||||
| [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)| The engine that runs a deep learning model. It includes a set of libraries for an easy inference integration into your applications. |
|
||||
| [OpenCV*](https://docs.opencv.org/master/) | OpenCV* community version compiled for Intel® hardware. |
|
||||
| Deep Learning Stream (DL Streamer) | Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. For the DL Streamer documentation, see [DL Streamer Samples](@ref gst_samples_README), [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/), [Elements](https://github.com/opencv/gst-video-analytics/wiki/Elements), [Tutorial](https://github.com/opencv/gst-video-analytics/wiki/DL%20Streamer%20Tutorial). |
|
||||
|
||||
## Set up the Repository
|
||||
|
||||
> **NOTE:** You must be logged in as root to set up and install the repository.
|
||||
@@ -61,7 +73,7 @@ Results:
|
||||
intel-openvino-2021 Intel(R) Distribution of OpenVINO 2021
|
||||
```
|
||||
|
||||
### To list the available OpenVINO packages
|
||||
### To list available OpenVINO packages
|
||||
Use the following command:
|
||||
```sh
|
||||
yum list intel-openvino*
|
||||
@@ -69,11 +81,11 @@ yum list intel-openvino*
|
||||
|
||||
---
|
||||
|
||||
## Install the runtime packages Using the YUM Package Manager
|
||||
## Install Runtime Packages Using the YUM Package Manager
|
||||
|
||||
Intel® OpenVINO will be installed in: `/opt/intel/openvino_<VERSION>.<UPDATE>.<BUILD_NUM>`
|
||||
<br>
|
||||
A symlink will be created: `/opt/intel/openvino`
|
||||
A symlink will be created: `/opt/intel/openvino_<VERSION>`
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# Intel® Movidius™ VPUs Setup Guide for Use with Intel® Distribution of OpenVINO™ toolkit {#openvino_docs_install_guides_movidius_setup_guide}
|
||||
|
||||
> **NOTE**: These steps are only required for **IEI Mustang-V100-MX8-R10** card. **IEI Mustang-V100-MX8-R11** card doesn't require any additional steps and it's completely configured using the [general guidance](installing-openvino-linux-ivad-vpu.md).
|
||||
|
||||
## See Also
|
||||
|
||||
- [Intel® Movidius™ VPUs Programming Guide for use with the Intel® Distribution of OpenVINO™](movidius-programming-guide.md)
|
||||
@@ -9,7 +11,7 @@
|
||||
- <a class="download" href="<domain_placeholder>/downloads/Intel Vision Accelerator Design with Intel Movidius™ VPUs Errata.pdf">Intel® Vision Accelerator Design with Intel® Movidius™ VPUs Errata</a>
|
||||
|
||||
The IEI Mustang-V100-MX8 is an OEM version of the Intel® Vision Accelerator Design with Intel® Movidius™ VPUs.
|
||||
This guide assumes you have installed the [Mustang-V100-MX8](https://download.ieiworld.com/) and the [Intel® Distribution of OpenVINO™ Toolkit](https://software.intel.com/en-us/openvino-toolkit).
|
||||
This guide assumes you have installed the [Mustang-V100-MX8](https://download.ieiworld.com/) and the [Intel® Distribution of OpenVINO™ Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html).
|
||||
|
||||
Instructions in this guide for configuring your accelerator include:
|
||||
1. Installing the required IEI\* BSL reset software
|
||||
@@ -44,7 +46,7 @@ The `hddldaemon` is a system service, a binary executable that is run to manage
|
||||
`<IE>` refers to the following default OpenVINO™ Inference Engine directories:
|
||||
- **Linux:**
|
||||
```
|
||||
/opt/intel/openvino/inference_engine
|
||||
/opt/intel/openvino_2021/inference_engine
|
||||
```
|
||||
- **Windows:**
|
||||
```
|
||||
|
||||
@@ -13,7 +13,7 @@ OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applicatio
|
||||
| Component | Description |
|
||||
|-----------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| [Model Optimizer](https://docs.openvinotoolkit.org/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) | This tool imports, converts, and optimizes models that were trained in popular frameworks to a format usable by Intel tools, especially the Inference Engine. <br>Popular frameworks include Caffe\*, TensorFlow\*, MXNet\*, and ONNX\*. |
|
||||
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](https://docs.openvinotoolkit.org/latest/omz_tools_accuracy_checker_README.html), [Post-Training Optimization Tool](https://docs.openvinotoolkit.org/latest/pot_README.html) |
|
||||
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](https://docs.openvinotoolkit.org/latest/omz_tools_accuracy_checker.html), [Post-Training Optimization Tool](https://docs.openvinotoolkit.org/latest/pot_README.html) |
|
||||
|
||||
**The Runtime Package Includes the Following Components Installed by Dependency:**
|
||||
|
||||
@@ -24,14 +24,19 @@ OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applicatio
|
||||
|
||||
## System Requirements
|
||||
|
||||
* [Python* distribution](https://www.python.org/) 3.6, 3.7, 3.8
|
||||
* Supported Operating Systems:
|
||||
- Ubuntu* 18.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
|
||||
- Ubuntu* 20.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
|
||||
- macOS* 10.15.x versions
|
||||
- Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions
|
||||
- Windows Server* 2016 or higher
|
||||
> NOTE: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated.
|
||||
The table below lists the supported operating systems and Python* versions required to run the installation.
|
||||
|
||||
| Supported Operating System | [Python* Version (64-bit)](https://www.python.org/) |
|
||||
| :------------------------------------------------------------| :---------------------------------------------------|
|
||||
| Ubuntu* 18.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
|
||||
| Ubuntu* 20.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
|
||||
| Red Hat* Enterprise Linux* 8.2, 64-bit | 3.6, 3.7 |
|
||||
| CentOS* 7.4, 64-bit | 3.6, 3.7 |
|
||||
| macOS* 10.15.x versions | 3.6, 3.7, 3.8 |
|
||||
| Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions | 3.6, 3.7, 3.8 |
|
||||
| Windows Server* 2016 or higher | 3.6, 3.7, 3.8 |
|
||||
|
||||
> **NOTE**: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated.
|
||||
|
||||
## Install the Developer Package
|
||||
|
||||
@@ -46,7 +51,11 @@ python -m pip install --user virtualenv
|
||||
python -m venv openvino_env --system-site-packages
|
||||
```
|
||||
|
||||
Activate virtual environment:<br>
|
||||
> **NOTE**: On Linux and macOS, you may need to type `python3` instead of
|
||||
`python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/).
|
||||
|
||||
### Step 2. Activate Virtual Environment
|
||||
|
||||
On Linux and macOS:
|
||||
```sh
|
||||
source openvino_env/bin/activate
|
||||
@@ -56,14 +65,14 @@ On Windows:
|
||||
openvino_env\Scripts\activate
|
||||
```
|
||||
|
||||
### Step 2. Set Up and Update pip to the Highest Version
|
||||
### Step 3. Set Up and Update pip to the Highest Version
|
||||
|
||||
Run the command below:
|
||||
```sh
|
||||
python -m pip install --upgrade pip
|
||||
```
|
||||
|
||||
### Step 3. Install the Package
|
||||
### Step 4. Install the Package
|
||||
|
||||
Run the command below: <br>
|
||||
|
||||
@@ -71,13 +80,13 @@ Run the command below: <br>
|
||||
pip install openvino-dev
|
||||
```
|
||||
|
||||
### Step 4. Verify that the Package is Installed
|
||||
### Step 5. Verify that the Package is Installed
|
||||
|
||||
Run the command below:
|
||||
Run the command below (this may take a few seconds):
|
||||
```sh
|
||||
python -c "pot -h"
|
||||
pot -h
|
||||
```
|
||||
|
||||
|
||||
You will see the help message for Post-Training Optimization Tool if installation finished successfully.
|
||||
|
||||
## Additional Resources
|
||||
@@ -85,4 +94,3 @@ You will see the help message for Post-Training Optimization Tool if installatio
|
||||
- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
|
||||
- OpenVINO™ toolkit online documentation: [https://docs.openvinotoolkit.org](https://docs.openvinotoolkit.org)
|
||||
|
||||
|
||||
|
||||
@@ -21,14 +21,19 @@ The Intel® Distribution of OpenVINO™ toolkit for Linux\*:
|
||||
|
||||
## System Requirements
|
||||
|
||||
* [Python* distribution](https://www.python.org/) 3.6, 3.7, 3.8
|
||||
* Supported Operating Systems:
|
||||
- Ubuntu* 18.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
|
||||
- Ubuntu* 20.04 long-term support (LTS), 64-bit (python 3.6 or 3.7)
|
||||
- macOS* 10.15.x version
|
||||
- Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions
|
||||
- Windows Server* 2016 or higher
|
||||
> NOTE: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated
|
||||
The table below lists the supported operating systems and Python* versions required to run the installation.
|
||||
|
||||
| Supported Operating System | [Python* Version (64-bit)](https://www.python.org/) |
|
||||
| :------------------------------------------------------------| :---------------------------------------------------|
|
||||
| Ubuntu* 18.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
|
||||
| Ubuntu* 20.04 long-term support (LTS), 64-bit | 3.6, 3.7 |
|
||||
| Red Hat* Enterprise Linux* 8.2, 64-bit | 3.6, 3.7 |
|
||||
| CentOS* 7.4, 64-bit | 3.6, 3.7 |
|
||||
| macOS* 10.15.x versions | 3.6, 3.7, 3.8 |
|
||||
| Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or higher) editions | 3.6, 3.7, 3.8 |
|
||||
| Windows Server* 2016 or higher | 3.6, 3.7, 3.8 |
|
||||
|
||||
> **NOTE**: This package can be installed on other versions of Linux and Windows OSes, but only the specific versions above are fully validated.
|
||||
|
||||
## Install the Runtime Package
|
||||
|
||||
@@ -43,7 +48,11 @@ python -m pip install --user virtualenv
|
||||
python -m venv openvino_env --system-site-packages
|
||||
```
|
||||
|
||||
Activate virtual environment:<br>
|
||||
> **NOTE**: On Linux and macOS, you may need to type `python3` instead of
|
||||
`python`. You may also need to [install pip](https://pip.pypa.io/en/stable/installing/).
|
||||
|
||||
### Step 2. Activate Virtual Environment
|
||||
|
||||
On Linux and macOS:
|
||||
```sh
|
||||
source openvino_env/bin/activate
|
||||
@@ -53,14 +62,14 @@ On Windows:
|
||||
openvino_env\Scripts\activate
|
||||
```
|
||||
|
||||
### Step 2. Set Up and Update pip to the Highest Version
|
||||
### Step 3. Set Up and Update pip to the Highest Version
|
||||
|
||||
Run the command below:
|
||||
```sh
|
||||
python -m pip install --upgrade pip
|
||||
```
|
||||
|
||||
### Step 3. Install the Package
|
||||
### Step 4. Install the Package
|
||||
|
||||
Run the command below: <br>
|
||||
|
||||
@@ -68,7 +77,7 @@ Run the command below: <br>
|
||||
pip install openvino
|
||||
```
|
||||
|
||||
### Step 4. Verify that the Package is Installed
|
||||
### Step 5. Verify that the Package is Installed
|
||||
|
||||
Run the command below:
|
||||
```sh
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
**Detailed description**: For each element from the input tensor calculates corresponding
|
||||
element in the output tensor with the following formula:
|
||||
|
||||
\f[
|
||||
HSigmoid(x) = \frac{min(max(x + 3, 0), 6)}{6}
|
||||
\f]
|
||||
\f[
|
||||
HSigmoid(x) = \frac{min(max(x + 3, 0), 6)}{6}
|
||||
\f]
|
||||
|
||||
The HSigmoid operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf).
|
||||
|
||||
|
||||
193
docs/ops/detection/ExperimentalDetectronDetectionOutput_6.md
Normal file
193
docs/ops/detection/ExperimentalDetectronDetectionOutput_6.md
Normal file
@@ -0,0 +1,193 @@
|
||||
## ExperimentalDetectronDetectionOutput <a name="ExperimentalDetectronDetectionOutput"></a> {#openvino_docs_ops_detection_ExperimentalDetectronDetectionOutput_6}
|
||||
|
||||
**Versioned name**: *ExperimentalDetectronDetectionOutput-6*
|
||||
|
||||
**Category**: Object detection
|
||||
|
||||
**Short description**: The *ExperimentalDetectronDetectionOutput* operation performs non-maximum suppression to generate
|
||||
the detection output using information on location and score predictions.
|
||||
|
||||
**Detailed description**: The operation performs the following steps:
|
||||
|
||||
1. Applies deltas to boxes sizes [x<sub>1</sub>, y<sub>1</sub>, x<sub>2</sub>, y<sub>2</sub>] and takes coordinates of
|
||||
refined boxes according to the formulas:
|
||||
|
||||
`x1_new = ctr_x + (dx - 0.5 * exp(min(d_log_w, max_delta_log_wh))) * box_w`
|
||||
|
||||
`y0_new = ctr_y + (dy - 0.5 * exp(min(d_log_h, max_delta_log_wh))) * box_h`
|
||||
|
||||
`x1_new = ctr_x + (dx + 0.5 * exp(min(d_log_w, max_delta_log_wh))) * box_w - 1.0`
|
||||
|
||||
`y1_new = ctr_y + (dy + 0.5 * exp(min(d_log_h, max_delta_log_wh))) * box_h - 1.0`
|
||||
|
||||
* `box_w` and `box_h` are width and height of box, respectively:
|
||||
|
||||
`box_w = x1 - x0 + 1.0`
|
||||
|
||||
`box_h = y1 - y0 + 1.0`
|
||||
|
||||
* `ctr_x` and `ctr_y` are center location of a box:
|
||||
|
||||
`ctr_x = x0 + 0.5f * box_w`
|
||||
|
||||
`ctr_y = y0 + 0.5f * box_h`
|
||||
|
||||
* `dx`, `dy`, `d_log_w` and `d_log_h` are deltas calculated according to the formulas below, and `deltas_tensor` is a
|
||||
second input:
|
||||
|
||||
`dx = deltas_tensor[roi_idx, 4 * class_idx + 0] / deltas_weights[0]`
|
||||
|
||||
`dy = deltas_tensor[roi_idx, 4 * class_idx + 1] / deltas_weights[1]`
|
||||
|
||||
`d_log_w = deltas_tensor[roi_idx, 4 * class_idx + 2] / deltas_weights[2]`
|
||||
|
||||
`d_log_h = deltas_tensor[roi_idx, 4 * class_idx + 3] / deltas_weights[3]`
|
||||
|
||||
2. If *class_agnostic_box_regression* is `true` removes predictions for background classes.
|
||||
3. Clips boxes to the image.
|
||||
4. Applies *score_threshold* on detection scores.
|
||||
5. Applies non-maximum suppression class-wise with *nms_threshold* and returns *post_nms_count* or less detections per
|
||||
class.
|
||||
6. Returns *max_detections_per_image* detections if total number of detections is more than *max_detections_per_image*;
|
||||
otherwise, returns total number of detections and the output tensor is filled with undefined values for rest output
|
||||
tensor elements.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *score_threshold*
|
||||
|
||||
* **Description**: The *score_threshold* attribute specifies a threshold to consider only detections whose score are
|
||||
larger than the threshold.
|
||||
* **Range of values**: non-negative floating point number
|
||||
* **Type**: float
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *nms_threshold*
|
||||
|
||||
* **Description**: The *nms_threshold* attribute specifies a threshold to be used in the NMS stage.
|
||||
* **Range of values**: non-negative floating point number
|
||||
* **Type**: float
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *num_classes*
|
||||
|
||||
* **Description**: The *num_classes* attribute specifies the number of detected classes.
|
||||
* **Range of values**: non-negative integer number
|
||||
* **Type**: int
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *post_nms_count*
|
||||
|
||||
* **Description**: The *post_nms_count* attribute specifies the maximal number of detections per class.
|
||||
* **Range of values**: non-negative integer number
|
||||
* **Type**: int
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *max_detections_per_image*
|
||||
|
||||
* **Description**: The *max_detections_per_image* attribute specifies maximal number of detections per image.
|
||||
* **Range of values**: non-negative integer number
|
||||
* **Type**: int
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *class_agnostic_box_regression*
|
||||
|
||||
* **Description**: *class_agnostic_box_regression* attribute ia a flag specifies whether to delete background
|
||||
classes or not.
|
||||
* **Range of values**:
|
||||
* `true` means background classes should be deleted
|
||||
* `false` means background classes should not be deleted
|
||||
* **Type**: boolean
|
||||
* **Default value**: false
|
||||
* **Required**: *no*
|
||||
|
||||
* *max_delta_log_wh*
|
||||
|
||||
* **Description**: The *max_delta_log_wh* attribute specifies maximal delta of logarithms for width and height.
|
||||
* **Range of values**: floating point number
|
||||
* **Type**: float
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *deltas_weights*
|
||||
|
||||
* **Description**: The *deltas_weights* attribute specifies weights for bounding boxes sizes deltas.
|
||||
* **Range of values**: a list of non-negative floating point numbers
|
||||
* **Type**: float[]
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: A 2D tensor of type *T* with input ROIs, with shape `[number_of_ROIs, 4]` providing the ROIs as 4-tuples:
|
||||
[x<sub>1</sub>, y<sub>1</sub>, x<sub>2</sub>, y<sub>2</sub>]. The batch dimension of first, second, and third inputs
|
||||
should be the same. **Required.**
|
||||
|
||||
* **2**: A 2D tensor of type *T* with shape `[number_of_ROIs, num_classes * 4]` providing deltas for input boxes.
|
||||
**Required.**
|
||||
|
||||
* **3**: A 2D tensor of type *T* with shape `[number_of_ROIs, num_classes]` providing detections scores. **Required.**
|
||||
|
||||
* **4**: A 2D tensor of type *T* with shape `[1, 3]` contains three elements
|
||||
`[image_height, image_width, scale_height_and_width]` providing input image size info. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: A 2D tensor of type *T* with shape `[max_detections_per_image, 4]` providing boxes indices.
|
||||
|
||||
* **2**: A 1D tensor of type *T_IND* with shape `[max_detections_per_image]` providing classes indices.
|
||||
|
||||
* **3**: A 1D tensor of type *T* with shape `[max_detections_per_image]` providing scores indices.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any supported floating point type.
|
||||
|
||||
* *T_IND*: `int64` or `int32`.
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<layer ... type="ExperimentalDetectronDetectionOutput" version="opset6">
|
||||
<data class_agnostic_box_regression="false" deltas_weights="10.0,10.0,5.0,5.0" max_delta_log_wh="4.135166645050049" max_detections_per_image="100" nms_threshold="0.5" num_classes="81" post_nms_count="2000" score_threshold="0.05000000074505806"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1000</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1000</dim>
|
||||
<dim>324</dim>
|
||||
</port>
|
||||
<port id="2">
|
||||
<dim>1000</dim>
|
||||
<dim>81</dim>
|
||||
</port>
|
||||
<port id="3">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="4" precision="FP32">
|
||||
<dim>100</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="5" precision="I32">
|
||||
<dim>100</dim>
|
||||
</port>
|
||||
<port id="6" precision="FP32">
|
||||
<dim>100</dim>
|
||||
</port>
|
||||
<port id="7" precision="I32">
|
||||
<dim>100</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
@@ -0,0 +1,112 @@
|
||||
## ExperimentalDetectronGenerateProposalsSingleImage <a name="ExperimentalDetectronGenerateProposalsSingleImage"></a> {#openvino_docs_ops_detection_ExperimentalDetectronGenerateProposalsSingleImage_6}
|
||||
|
||||
**Versioned name**: *ExperimentalDetectronGenerateProposalsSingleImage-6*
|
||||
|
||||
**Category**: Object detection
|
||||
|
||||
**Short description**: The *ExperimentalDetectronGenerateProposalsSingleImage* operation computes ROIs and their scores
|
||||
based on input data.
|
||||
|
||||
**Detailed description**: The operation performs the following steps:
|
||||
|
||||
1. Transposes and reshapes predicted bounding boxes deltas and scores to get them into the same order as the anchors.
|
||||
2. Transforms anchors into proposals using deltas and clips proposals to an image.
|
||||
3. Removes predicted boxes with either height or width < *min_size*.
|
||||
4. Sorts all `(proposal, score)` pairs by score from highest to lowest; order of pairs with equal scores is undefined.
|
||||
5. Takes top *pre_nms_count* proposals, if total number of proposals is less than *pre_nms_count* takes all proposals.
|
||||
6. Applies non-maximum suppression with *nms_threshold*.
|
||||
7. Takes top *post_nms_count* proposals and returns these top proposals and their scores. If total number of proposals
|
||||
is less than *post_nms_count* returns output tensors filled with zeroes.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *min_size*
|
||||
|
||||
* **Description**: The *min_size* attribute specifies minimum box width and height.
|
||||
* **Range of values**: non-negative floating point number
|
||||
* **Type**: float
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *nms_threshold*
|
||||
|
||||
* **Description**: The *nms_threshold* attribute specifies threshold to be used in the NMS stage.
|
||||
* **Range of values**: non-negative floating point number
|
||||
* **Type**: float
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *pre_nms_count*
|
||||
|
||||
* **Description**: The *pre_nms_count* attribute specifies number of top-n proposals before NMS.
|
||||
* **Range of values**: non-negative integer number
|
||||
* **Type**: int
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *post_nms_count*
|
||||
|
||||
* **Description**: The *post_nms_count* attribute specifies number of top-n proposals after NMS.
|
||||
* **Range of values**: non-negative integer number
|
||||
* **Type**: int
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: A 1D tensor of type *T* with 3 elements `[image_height, image_width, scale_height_and_width]` providing input
|
||||
image size info. **Required.**
|
||||
|
||||
* **2**: A 2D tensor of type *T* with shape `[height * width * number_of_channels, 4]` providing anchors. **Required.**
|
||||
|
||||
* **3**: A 3D tensor of type *T* with shape `[number_of_channels * 4, height, width]` providing deltas for anchors.
|
||||
Height and width for third and fourth inputs should be equal. **Required.**
|
||||
|
||||
* **4**: A 3D tensor of type *T* with shape `[number_of_channels, height, width]` providing proposals scores.
|
||||
**Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: A 2D tensor of type *T* with shape `[post_nms_count, 4]` providing ROIs.
|
||||
|
||||
* **2**: A 1D tensor of type *T* with shape `[post_nms_count]` providing ROIs scores.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any supported floating point type.
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<layer ... type="ExperimentalDetectronGenerateProposalsSingleImage" version="opset6">
|
||||
<data min_size="0.0" nms_threshold="0.699999988079071" post_nms_count="1000" pre_nms_count="1000"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>12600</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="2">
|
||||
<dim>12</dim>
|
||||
<dim>50</dim>
|
||||
<dim>84</dim>
|
||||
</port>
|
||||
<port id="3">
|
||||
<dim>3</dim>
|
||||
<dim>50</dim>
|
||||
<dim>84</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="4" precision="FP32">
|
||||
<dim>1000</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="5" precision="FP32">
|
||||
<dim>1000</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
116
docs/ops/detection/ExperimentalDetectronPriorGridGenerator_6.md
Normal file
116
docs/ops/detection/ExperimentalDetectronPriorGridGenerator_6.md
Normal file
@@ -0,0 +1,116 @@
|
||||
## ExperimentalDetectronPriorGridGenerator <a name="ExperimentalDetectronPriorGridGenerator"></a> {#openvino_docs_ops_detection_ExperimentalDetectronPriorGridGenerator_6}
|
||||
|
||||
**Versioned name**: *ExperimentalDetectronPriorGridGenerator-6*
|
||||
|
||||
**Category**: Object detection
|
||||
|
||||
**Short description**: The *ExperimentalDetectronPriorGridGenerator* operation generates prior grids of specified sizes.
|
||||
|
||||
**Detailed description**: The operation takes coordinates of centres of boxes and adds strides with offset `0.5` to them to
|
||||
calculate coordinates of prior grids.
|
||||
|
||||
Numbers of generated cells is `featmap_height` and `featmap_width` if *h* and *w* are zeroes; otherwise, *h* and *w*,
|
||||
respectively. Steps of generated grid are `image_height` / `layer_height` and `image_width` / `layer_width` if
|
||||
*stride_h* and *stride_w* are zeroes; otherwise, *stride_h* and *stride_w*, respectively.
|
||||
|
||||
`featmap_height`, `featmap_width`, `image_height` and `image_width` are spatial dimensions values from second and third
|
||||
inputs, respectively.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *flatten*
|
||||
|
||||
* **Description**: The *flatten* attribute specifies whether the output tensor should be 2D or 4D.
|
||||
* **Range of values**:
|
||||
* `true` - the output tensor should be a 2D tensor
|
||||
* `false` - the output tensor should be a 4D tensor
|
||||
* **Type**: boolean
|
||||
* **Default value**: true
|
||||
* **Required**: *no*
|
||||
|
||||
* *h*
|
||||
|
||||
* **Description**: The *h* attribute specifies number of cells of the generated grid with respect to height.
|
||||
* **Range of values**: non-negative integer number less or equal than `featmap_height`
|
||||
* **Type**: int
|
||||
* **Default value**: 0
|
||||
* **Required**: *no*
|
||||
|
||||
* *w*
|
||||
|
||||
* **Description**: The *w* attribute specifies number of cells of the generated grid with respect to width.
|
||||
* **Range of values**: non-negative integer number less or equal than `featmap_width`
|
||||
* **Type**: int
|
||||
* **Default value**: 0
|
||||
* **Required**: *no*
|
||||
|
||||
* *stride_x*
|
||||
|
||||
* **Description**: The *stride_x* attribute specifies the step of generated grid with respect to x coordinate.
|
||||
* **Range of values**: non-negative float number
|
||||
* **Type**: float
|
||||
* **Default value**: 0.0
|
||||
* **Required**: *no*
|
||||
|
||||
* *stride_y*
|
||||
|
||||
* **Description**: The *stride_y* attribute specifies the step of generated grid with respect to y coordinate.
|
||||
* **Range of values**: non-negative float number
|
||||
* **Type**: float
|
||||
* **Default value**: 0.0
|
||||
* **Required**: *no*
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: A 2D tensor of type *T* with shape `[number_of_priors, 4]` contains priors. **Required.**
|
||||
|
||||
* **2**: A 4D tensor of type *T* with input feature map `[1, number_of_channels, featmap_height, featmap_width]`. This
|
||||
operation uses only sizes of this input tensor, not its data.**Required.**
|
||||
|
||||
* **3**: A 4D tensor of type *T* with input image `[1, number_of_channels, image_height, image_width]`. The number of
|
||||
channels of both feature map and input image tensors must match. This operation uses only sizes of this input tensor,
|
||||
not its data. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: A tensor of type *T* with priors grid with shape `[featmap_height * featmap_width * number_of_priors, 4]`
|
||||
if flatten is `true` or `[featmap_height, featmap_width, number_of_priors, 4]`, otherwise.
|
||||
If 0 < *h* < `featmap_height` and/or 0 < *w* < `featmap_width` the output data size is less than
|
||||
`featmap_height` * `featmap_width` * `number_of_priors` * 4 and the output tensor is filled with undefined values for
|
||||
rest output tensor elements.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any supported floating point type.
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<layer ... type="ExperimentalDetectronPriorGridGenerator" version="opset6">
|
||||
<data flatten="true" h="0" stride_x="32.0" stride_y="32.0" w="0"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>3</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>256</dim>
|
||||
<dim>25</dim>
|
||||
<dim>42</dim>
|
||||
</port>
|
||||
<port id="2">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
<dim>800</dim>
|
||||
<dim>1344</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="3" precision="FP32">
|
||||
<dim>3150</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
139
docs/ops/detection/ExperimentalDetectronROIFeatureExtractor_6.md
Normal file
139
docs/ops/detection/ExperimentalDetectronROIFeatureExtractor_6.md
Normal file
@@ -0,0 +1,139 @@
|
||||
## ExperimentalDetectronROIFeatureExtractor <a name="ExperimentalDetectronROIFeatureExtractor"></a> {#openvino_docs_ops_detection_ExperimentalDetectronROIFeatureExtractor_6}
|
||||
|
||||
**Versioned name**: *ExperimentalDetectronROIFeatureExtractor-6*
|
||||
|
||||
**Category**: Object detection
|
||||
|
||||
**Short description**: *ExperimentalDetectronROIFeatureExtractor* is the [ROIAlign](ROIAlign_3.md) operation applied
|
||||
over a feature pyramid.
|
||||
|
||||
**Detailed description**: *ExperimentalDetectronROIFeatureExtractor* maps input ROIs to the levels of the pyramid
|
||||
depending on the sizes of ROIs and parameters of the operation, and then extracts features via ROIAlign from
|
||||
corresponding pyramid levels.
|
||||
|
||||
Operation applies the *ROIAlign* algorithm to the pyramid layers:
|
||||
|
||||
`output[i, :, :, :] = ROIAlign(inputPyramid[j], rois[i])`
|
||||
|
||||
`j = PyramidLevelMapper(rois[i])`
|
||||
|
||||
PyramidLevelMapper maps the ROI to the pyramid level using the following formula:
|
||||
|
||||
`j = floor(2 + log2(sqrt(w * h) / 224)`
|
||||
|
||||
Here 224 is the canonical ImageNet pre-training size, 2 is the pyramid starting level, and `w`, `h` are the ROI width and height.
|
||||
|
||||
For more details please see the following source:
|
||||
[Feature Pyramid Networks for Object Detection](https://arxiv.org/pdf/1612.03144.pdf).
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *output_size*
|
||||
|
||||
* **Description**: The *output_size* attribute specifies the width and height of the output tensor.
|
||||
* **Range of values**: a positive integer number
|
||||
* **Type**: int
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *sampling_ratio*
|
||||
|
||||
* **Description**: The *sampling_ratio* attribute specifies the number of sampling points per the output value. If 0,
|
||||
then use adaptive number computed as `ceil(roi_width / output_width)`, and likewise for height.
|
||||
* **Range of values**: a non-negative integer number
|
||||
* **Type**: int
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *pyramid_scales*
|
||||
|
||||
* **Description**: The *pyramid_scales* enlists `image_size / layer_size[l]` ratios for pyramid layers `l=1,...,L`,
|
||||
where `L` is the number of pyramid layers, and `image_size` refers to network's input image. Note that pyramid's
|
||||
largest layer may have smaller size than input image, e.g. `image_size` is `800 x 1344` in the XML example below.
|
||||
* **Range of values**: a list of positive integer numbers
|
||||
* **Type**: int[]
|
||||
* **Default value**: None
|
||||
* **Required**: *yes*
|
||||
|
||||
* *aligned*
|
||||
|
||||
* **Description**: The *aligned* attribute specifies add offset (`-0.5`) to ROIs sizes or not.
|
||||
* **Range of values**:
|
||||
* `true` - add offset to ROIs sizes
|
||||
* `false` - do not add offset to ROIs sizes
|
||||
* **Type**: boolean
|
||||
* **Default value**: false
|
||||
* **Required**: *no*
|
||||
|
||||
**Inputs**:
|
||||
|
||||
* **1**: 2D input tensor of type *T* with shape `[number_of_ROIs, 4]` providing the ROIs as 4-tuples:
|
||||
[x<sub>1</sub>, y<sub>1</sub>, x<sub>2</sub>, y<sub>2</sub>]. Coordinates *x* and *y* are refer to the network's input
|
||||
*image_size*. **Required**.
|
||||
|
||||
* **2**, ..., **L**: Pyramid of 4D input tensors with feature maps. Shape must be
|
||||
`[1, number_of_channels, layer_size[l], layer_size[l]]`. The number of channels must be the same for all layers of the
|
||||
pyramid. The layer width and height must equal to the `layer_size[l] = image_size / pyramid_scales[l]`. **Required**.
|
||||
|
||||
**Outputs**:
|
||||
|
||||
* **1**: 4D output tensor of type *T* with ROIs features. Shape must be
|
||||
`[number_of_ROIs, number_of_channels, output_size, output_size]`. Channels number is the same as for all images in the
|
||||
input pyramid.
|
||||
|
||||
* **2**: 2D output tensor of type *T* with reordered ROIs according to their mapping to the pyramid levels. Shape
|
||||
must be the same as for 1 input: `[number_of_ROIs, 4]`.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any supported floating point type.
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<layer ... type="ExperimentalDetectronROIFeatureExtractor" version="opset6">
|
||||
<data aligned="false" output_size="7" pyramid_scales="4,8,16,32,64" sampling_ratio="2"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1000</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>256</dim>
|
||||
<dim>200</dim>
|
||||
<dim>336</dim>
|
||||
</port>
|
||||
<port id="2">
|
||||
<dim>1</dim>
|
||||
<dim>256</dim>
|
||||
<dim>100</dim>
|
||||
<dim>168</dim>
|
||||
</port>
|
||||
<port id="3">
|
||||
<dim>1</dim>
|
||||
<dim>256</dim>
|
||||
<dim>50</dim>
|
||||
<dim>84</dim>
|
||||
</port>
|
||||
<port id="4">
|
||||
<dim>1</dim>
|
||||
<dim>256</dim>
|
||||
<dim>25</dim>
|
||||
<dim>42</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="5" precision="FP32">
|
||||
<dim>1000</dim>
|
||||
<dim>256</dim>
|
||||
<dim>7</dim>
|
||||
<dim>7</dim>
|
||||
</port>
|
||||
<port id="6" precision="FP32">
|
||||
<dim>1000</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
@@ -50,6 +50,11 @@ declared in `namespace opset6`.
|
||||
* [Equal](comparison/Equal_1.md)
|
||||
* [Erf](arithmetic/Erf_1.md)
|
||||
* [Exp](activation/Exp_1.md)
|
||||
* [ExperimentalDetectronDetectionOutput_6](detection/ExperimentalDetectronDetectionOutput_6.md)
|
||||
* [ExperimentalDetectronGenerateProposalsSingleImage_6](detection/ExperimentalDetectronGenerateProposalsSingleImage_6.md)
|
||||
* [ExperimentalDetectronPriorGridGenerator_6](detection/ExperimentalDetectronPriorGridGenerator_6.md)
|
||||
* [ExperimentalDetectronROIFeatureExtractor_6](detection/ExperimentalDetectronROIFeatureExtractor_6.md)
|
||||
* [ExperimentalDetectronTopKROIs_6](sort/ExperimentalDetectronTopKROIs_6.md)
|
||||
* [ExtractImagePatches](movement/ExtractImagePatches_3.md)
|
||||
* [FakeQuantize](quantization/FakeQuantize_1.md)
|
||||
* [Floor](arithmetic/Floor_1.md)
|
||||
|
||||
61
docs/ops/sort/ExperimentalDetectronTopKROIs_6.md
Normal file
61
docs/ops/sort/ExperimentalDetectronTopKROIs_6.md
Normal file
@@ -0,0 +1,61 @@
|
||||
## ExperimentalDetectronTopKROIs <a name="ExperimentalDetectronTopKROIs"></a> {#openvino_docs_ops_sort_ExperimentalDetectronTopKROIs_6}
|
||||
|
||||
**Versioned name**: *ExperimentalDetectronTopKROIs-6*
|
||||
|
||||
**Category**: Sort
|
||||
|
||||
**Short description**: The *ExperimentalDetectronTopKROIs* operation is TopK operation applied to probabilities of input
|
||||
ROIs.
|
||||
|
||||
**Detailed description**: The operation performs probabilities descending sorting for input ROIs and returns *max_rois*
|
||||
number of ROIs. Order of sorted ROIs with equal probabilities is undefined. If the number of ROIs is less than *max_rois*
|
||||
then operation returns all ROIs descended sorted and the output tensor is filled with undefined values for the rest of
|
||||
output tensor elements.
|
||||
|
||||
**Attributes**:
|
||||
|
||||
* *max_rois*
|
||||
|
||||
* **Description**: The *max_rois* attribute specifies maximal numbers of output ROIs.
|
||||
* **Range of values**: non-negative integer number
|
||||
* **Type**: int
|
||||
* **Default value**: 0
|
||||
* **Required**: *no*
|
||||
|
||||
**Inputs**
|
||||
|
||||
* **1**: A 2D tensor of type *T* with shape `[number_of_ROIs, 4]` describing the ROIs as 4-tuples:
|
||||
[x<sub>1</sub>, y<sub>1</sub>, x<sub>2</sub>, y<sub>2</sub>]. **Required.**
|
||||
|
||||
* **2**: A 1D tensor of type *T* with shape `[number_of_input_ROIs]` contains probabilities for input ROIs. **Required.**
|
||||
|
||||
**Outputs**
|
||||
|
||||
* **1**: A 2D tensor of type *T* with shape `[max_rois, 4]` describing *max_rois* ROIs with highest probabilities.
|
||||
|
||||
**Types**
|
||||
|
||||
* *T*: any supported floating point type.
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<layer ... type="ExperimentalDetectronTopKROIs" version="opset6">
|
||||
<data max_rois="1000"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>5000</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>5000</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1000</dim>
|
||||
<dim>4</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
@@ -13,11 +13,11 @@ Deep Learning Inference Engine is a part of Intel® Deep Learning Deployment
|
||||
Below, there are the three main steps of the deployment process:
|
||||
|
||||
1. **Conversion**<br>
|
||||
Trained models are converted from a specific framework (like Caffe\* or TensorFlow\*) to a framework-agnostic Intermediate Representation (IR) format.
|
||||
Trained models are converted from a specific framework, like TensorFlow\*, or format, like ONNX\*, to the framework-agnostic Intermediate Representation (IR) format.
|
||||
|
||||
- *Performance flow*: This is an offline step where general topology-level optimizations happen automatically (see <a href="#mo-knobs-related-to-performance">Model Optimizer Knobs Related to Performance</a>).
|
||||
|
||||
- *Tools*: Intel DL Deployment Toolkit features the Model Optimizer that enables automatic and seamless transition from the training environment to the deployment environment.
|
||||
- *Tools*: OpenVINO™ features the Model Optimizer that enables automatic and seamless transition from a training to deployment environment.
|
||||
|
||||
2. **Model Inference/Execution**<br>
|
||||
After conversion, Inference Engine consumes the IR to perform inference. While Inference Engine API itself is target-agnostic, internally, it has a notion of plugins, which are device-specific libraries facilitating the hardware-assisted acceleration.
|
||||
@@ -55,14 +55,16 @@ In contrast, for the latency-oriented tasks, the time to a single frame is more
|
||||
|
||||
Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample, which allows latency vs. throughput measuring.
|
||||
|
||||
> **NOTE**: Most samples also support batching (automatically packing multiple input images into a single request). However, high batch size results in a latency penalty. So for more real-time oriented usages, lower batch sizes (as low as a single input) are usually used. However, devices like CPU, Intel® Movidius™ Myriad™ 2 VPU, Intel® Movidius™ Myriad™ X VPU, or Intel® Vision Accelerator Design with Intel® Movidius™ VPU require a number of parallel requests instead of batching to leverage the performance.
|
||||
> **NOTE**: The [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample also supports batching, that is automatically packing multiple input images into a single request. However, high batch size results in a latency penalty. So for more real-time oriented usages, batch sizes that are as low as a single input are usually used. Still, devices like CPU, Intel®Movidius™ Myriad™ 2 VPU, Intel® Movidius™ Myriad™ X VPU, or Intel® Vision Accelerator Design with Intel® Movidius™ VPU require a number of parallel requests instead of batching to leverage the performance. Running multiple requests should be coupled with a device configured to the corresponding number of streams. See <a href="#cpu-streams">details on CPU streams</a> for an example.
|
||||
|
||||
[OpenVINO™ Deep Learning Workbench tool](https://docs.openvinotoolkit.org/latest/workbench_docs_Workbench_DG_Introduction.html) provides throughput versus latency charts for different numbers of streams, requests, and batch sizes to find the performance sweet spot.
|
||||
|
||||
### Comparing Performance with Native/Framework Code <a name="comparing-performance-with-native-framework-code"></a>
|
||||
|
||||
When comparing the Inference Engine performance with the framework or another reference code, make sure that both versions are as similar as possible:
|
||||
|
||||
- Wrap exactly the inference execution (refer to the [Inference Engine Samples](../IE_DG/Samples_Overview.md) for examples).
|
||||
- Do not include model loading time.
|
||||
- Wrap exactly the inference execution (refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) sample for an example).
|
||||
- Track model loading time separately.
|
||||
- Ensure the inputs are identical for the Inference Engine and the framework. For example, Caffe\* allows to auto-populate the input with random values. Notice that it might give different performance than on real images.
|
||||
- Similarly, for correct performance comparison, make sure the access pattern, for example, input layouts, is optimal for Inference Engine (currently, it is NCHW).
|
||||
- Any user-side pre-processing should be tracked separately.
|
||||
@@ -77,7 +79,7 @@ You need to build your performance conclusions on reproducible data. Do the perf
|
||||
- If the warm-up run does not help or execution time still varies, you can try running a large number of iterations and then average or find a mean of the results.
|
||||
- For time values that range too much, use geomean.
|
||||
|
||||
Refer to the [Inference Engine Samples](../IE_DG/Samples_Overview.md) for code examples for the performance measurements. Almost every sample, except interactive demos, has a `-ni` option to specify the number of iterations.
|
||||
Refer to the [Benchmark App](../../inference-engine/samples/benchmark_app/README.md) for code examples of performance measurements. Almost every sample, except interactive demos, has the `-ni` option to specify the number of iterations.
|
||||
|
||||
## Model Optimizer Knobs Related to Performance <a name="mo-knobs-related-to-performance"></a>
|
||||
|
||||
@@ -443,7 +445,7 @@ There are important performance caveats though: for example, the tasks that run
|
||||
|
||||
Also, if the inference is performed on the graphics processing unit (GPU), it can take little gain to do the encoding, for instance, of the resulting video, on the same GPU in parallel, because the device is already busy.
|
||||
|
||||
Refer to the [Object Detection SSD Demo](@ref omz_demos_object_detection_demo_ssd_async_README) (latency-oriented Async API showcase) and [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md) (which has both latency and throughput-oriented modes) for complete examples of the Async API in action.
|
||||
Refer to the [Object Detection SSD Demo](@ref omz_demos_object_detection_demo_cpp) (latency-oriented Async API showcase) and [Benchmark App Sample](../../inference-engine/samples/benchmark_app/README.md) (which has both latency and throughput-oriented modes) for complete examples of the Async API in action.
|
||||
|
||||
## Using Tools <a name="using-tools"></a>
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ The OpenVINO™ Security Add-on consists of three components that run in Kernel-
|
||||
|
||||
- The Model Developer generates a access controlled model from the OpenVINO™ toolkit output. The access controlled model uses the model's Intermediate Representation (IR) files to create a access controlled output file archive that are distributed to Model Users. The Developer can also put the archive file in long-term storage or back it up without additional security.
|
||||
|
||||
- The Model Developer uses the OpenVINO™ Security Add-on Tool(`ovsatool`) to generate and manage cryptographic keys and related collateral for the access controlled models. Cryptographic material is only available in a virtual machine (VM) environment. The OpenVINO™ Security Add-on key management system lets the Model Developer to get external Certificate Authorities to generate certificates to add to a key-store.
|
||||
- The Model Developer uses the OpenVINO™ Security Add-on Tool (<code>ovsatool</code>) to generate and manage cryptographic keys and related collateral for the access controlled models. Cryptographic material is only available in a virtual machine (VM) environment. The OpenVINO™ Security Add-on key management system lets the Model Developer to get external Certificate Authorities to generate certificates to add to a key-store.
|
||||
|
||||
- The Model Developer generates user-specific licenses in a JSON format file for the access controlled model. The Model Developer can define global or user-specific licenses and attach licensing policies to the licenses. For example, the Model Developer can add a time limit for a model or limit the number of times a user can run a model.
|
||||
|
||||
@@ -31,7 +31,7 @@ The OpenVINO™ Security Add-on consists of three components that run in Kernel-
|
||||
|
||||
- The Independent Software Vendor hosts the OpenVINO™ Security Add-on License Service, which responds to license validation requests when a user attempts to load a access controlled model in a model server. The licenses are registered with the OpenVINO™ Security Add-on License Service.
|
||||
|
||||
- When a user loads the model, the OpenVINO™ Security Add-on Runtime contacts the License Service to make sure the license is valid and within the parameters that the Model Developer defined with the OpenVINO™ Security Add-on Tool(`ovsatool`). The user must be able to reach the Independent Software Vendor's License Service over the Internet.
|
||||
- When a user loads the model, the OpenVINO™ Security Add-on Runtime contacts the License Service to make sure the license is valid and within the parameters that the Model Developer defined with the OpenVINO™ Security Add-on Tool (<code>ovsatool</code>). The user must be able to reach the Independent Software Vendor's License Service over the Internet.
|
||||
|
||||
</details>
|
||||
|
||||
@@ -51,6 +51,8 @@ After the license is successfully validated, the OpenVINO™ Model Server loads
|
||||
|
||||

|
||||
|
||||
The binding between SWTPM (vTPM used in guest VM) and HW TPM (TPM on the host) is explained in [this document](https://github.com/openvinotoolkit/security_addon/blob/release_2021_3/docs/fingerprint-changes.md)
|
||||
|
||||
## About the Installation
|
||||
The Model Developer, Independent Software Vendor, and User each must prepare one physical hardware machine and one Kernel-based Virtual Machine (KVM). In addition, each person must prepare a Guest Virtual Machine (Guest VM) for each role that person plays.
|
||||
|
||||
@@ -248,8 +250,12 @@ See the QEMU documentation for more information about the QEMU network configura
|
||||
|
||||
Networking is set up on the Host Machine. Continue to the Step 3 to prepare a Guest VM for the combined role of Model Developer and Independent Software Vendor.
|
||||
|
||||
|
||||
### Step 3: Set Up one Guest VM for the combined roles of Model Developer and Independent Software Vendor<a name="dev-isv-vm"></a>
|
||||
### Step 3: Clone the OpenVINO™ Security Add-on
|
||||
|
||||
Download the [OpenVINO™ Security Add-on](https://github.com/openvinotoolkit/security_addon).
|
||||
|
||||
|
||||
### Step 4: Set Up one Guest VM for the combined roles of Model Developer and Independent Software Vendor<a name="dev-isv-vm"></a>.
|
||||
|
||||
For each separate role you play, you must prepare a virtual machine, called a Guest VM. Because in this release, the Model Developer and Independent Software Vendor roles are combined, these instructions guide you to set up one Guest VM, named `ovsa_isv`.
|
||||
|
||||
@@ -299,15 +305,28 @@ As an option, you can use `virsh` and the virtual machine manager to create and
|
||||
Installation information is at https://github.com/tpm2-software/tpm2-tools/blob/master/INSTALL.md
|
||||
4. Install the [Docker packages](https://docs.docker.com/engine/install/ubuntu/)
|
||||
5. Shut down the Guest VM.<br>
|
||||
9. On the host, create a directory to support the virtual TPM device. Only `root` should have read/write permission to this directory:
|
||||
9. On the host, create a directory to support the virtual TPM device and provision its certificates. Only `root` should have read/write permission to this directory:
|
||||
```sh
|
||||
sudo mkdir -p /var/OVSA/
|
||||
sudo mkdir /var/OVSA/vtpm
|
||||
sudo mkdir /var/OVSA/vtpm/vtpm_isv_dev
|
||||
|
||||
export XDG_CONFIG_HOME=~/.config
|
||||
/usr/share/swtpm/swtpm-create-user-config-files
|
||||
swtpm_setup --tpmstate /var/OVSA/vtpm/vtpm_isv_dev --create-ek-cert --create-platform-cert --overwrite --tpm2 --pcr-banks -
|
||||
```
|
||||
**NOTE**: For steps 10 and 11, you can copy and edit the script named `start_ovsa_isv_dev_vm.sh` in the `Scripts/reference` directory in the OpenVINO™ Security Add-on repository instead of manually running the commands. If using the script, select the script with `isv` in the file name regardless of whether you are playing the role of the Model Developer or the role of the Independent Software Vendor. Edit the script to point to the correct directory locations and increment `vnc` for each Guest VM.
|
||||
10. Start the vTPM on Host:
|
||||
10. Start the vTPM on Host, write the HW TPM data into its NVRAM and restart the vTPM for QEMU:
|
||||
```sh
|
||||
sudo swtpm socket --tpm2 --server port=8280 \
|
||||
--ctrl type=tcp,port=8281 \
|
||||
--flags not-need-init --tpmstate dir=/var/OVSA/vtpm/vtpm_isv_dev &
|
||||
|
||||
sudo tpm2_startup --clear -T swtpm:port=8280
|
||||
sudo tpm2_startup -T swtpm:port=8280
|
||||
python3 <path to Security-Addon source>/Scripts/host/OVSA_write_hwquote_swtpm_nvram.py 8280
|
||||
sudo pkill -f vtpm_isv_dev
|
||||
|
||||
swtpm socket --tpmstate dir=/var/OVSA/vtpm/vtpm_isv_dev \
|
||||
--tpm2 \
|
||||
--ctrl type=unixio,path=/var/OVSA/vtpm/vtpm_isv_dev/swtpm-sock \
|
||||
@@ -335,9 +354,9 @@ As an option, you can use `virsh` and the virtual machine manager to create and
|
||||
|
||||
12. Use a VNC client to log on to the Guest VM at `<host-ip-address>:1`
|
||||
|
||||
### Step 4: Set Up one Guest VM for the User role
|
||||
### Step 5: Set Up one Guest VM for the User role
|
||||
|
||||
1. Choose ONE of these options to create a Guest VM for the User role:<br>
|
||||
1. Choose **ONE** of these options to create a Guest VM for the User role:<br>
|
||||
**Option 1: Copy and Rename the `ovsa_isv_dev_vm_disk.qcow2` disk image**
|
||||
1. Copy the `ovsa_isv_dev_vm_disk.qcow2` disk image to a new image named `ovsa_runtime_vm_disk.qcow2`. You created the `ovsa_isv_dev_vm_disk.qcow2` disk image in <a href="#prerequisites">Step 3</a>.
|
||||
2. Boot the new image.
|
||||
@@ -383,7 +402,7 @@ As an option, you can use `virsh` and the virtual machine manager to create and
|
||||
-netdev tap,id=hostnet1,script=<path-to-scripts>/virbr0-qemu-ifup, downscript=<path-to-scripts>/virbr0-qemu-ifdown \
|
||||
-vnc :2
|
||||
```
|
||||
7. Choose ONE of these options to install additional required software:
|
||||
7. Choose **ONE** of these options to install additional required software:
|
||||
|
||||
**Option 1: Use a script to install additional software**
|
||||
1. Copy the script `install_guest_deps.sh` from the `Scripts/reference` directory of the OVSA repository to the Guest VM
|
||||
@@ -400,19 +419,32 @@ As an option, you can use `virsh` and the virtual machine manager to create and
|
||||
4. Install the [Docker packages](https://docs.docker.com/engine/install/ubuntu/)
|
||||
5. Shut down the Guest VM.<br><br>
|
||||
|
||||
2. Create a directory to support the virtual TPM device. Only `root` should have read/write permission to this directory:
|
||||
2. Create a directory to support the virtual TPM device and provision its certificates. Only `root` should have read/write permission to this directory:
|
||||
```sh
|
||||
sudo mkdir /var/OVSA/vtpm/vtpm_runtime
|
||||
|
||||
export XDG_CONFIG_HOME=~/.config
|
||||
/usr/share/swtpm/swtpm-create-user-config-files
|
||||
swtpm_setup --tpmstate /var/OVSA/vtpm/vtpm_runtime --create-ek-cert --create-platform-cert --overwrite --tpm2 --pcr-banks -
|
||||
```
|
||||
**NOTE**: For steps 3 and 4, you can copy and edit the script named `start_ovsa_runtime_vm.sh` in the scripts directory in the OpenVINO™ Security Add-on repository instead of manually running the commands. Edit the script to point to the correct directory locations and increment `vnc` for each Guest VM. This means that if you are creating a third Guest VM on the same Host Machine, change `-vnc :2` to `-vnc :3`
|
||||
3. Start the vTPM:
|
||||
**NOTE**: For steps 3 and 4, you can copy and edit the script named `start_ovsa_runtime_vm.sh` in the `Scripts/reference` directory in the OpenVINO™ Security Add-on repository instead of manually running the commands. Edit the script to point to the correct directory locations and increment `vnc` for each Guest VM. This means that if you are creating a third Guest VM on the same Host Machine, change `-vnc :2` to `-vnc :3`
|
||||
3. Start the vTPM, write the HW TPM data into its NVRAM and restart the vTPM for QEMU:
|
||||
```sh
|
||||
sudo swtpm socket --tpm2 --server port=8380 \
|
||||
--ctrl type=tcp,port=8381 \
|
||||
--flags not-need-init --tpmstate dir=/var/OVSA/vtpm/vtpm_runtime &
|
||||
|
||||
sudo tpm2_startup --clear -T swtpm:port=8380
|
||||
sudo tpm2_startup -T swtpm:port=8380
|
||||
python3 <path to Security-Addon source>/Scripts/host/OVSA_write_hwquote_swtpm_nvram.py 8380
|
||||
sudo pkill -f vtpm_runtime
|
||||
|
||||
swtpm socket --tpmstate dir=/var/OVSA/vtpm/vtpm_runtime \
|
||||
--tpm2 \
|
||||
--ctrl type=unixio,path=/var/OVSA/vtpm/vtpm_runtime/swtpm-sock \
|
||||
--log level=20
|
||||
```
|
||||
4. Start the Guest VM in a new terminal. To do so, either copy and edit the script named `start_ovsa_runtime_vm.sh` in the scripts directory in the OpenVINO™ Security Add-on repository or manually run the command:
|
||||
4. Start the Guest VM in a new terminal:
|
||||
```sh
|
||||
sudo qemu-system-x86_64 \
|
||||
-cpu host \
|
||||
@@ -450,13 +482,11 @@ Building OpenVINO™ Security Add-on depends on OpenVINO™ Model Server docker
|
||||
|
||||
This step is for the combined role of Model Developer and Independent Software Vendor, and the User
|
||||
|
||||
1. Download the [OpenVINO™ Security Add-on](https://github.com/openvinotoolkit/security_addon)
|
||||
|
||||
2. Go to the top-level OpenVINO™ Security Add-on source directory.
|
||||
1. Go to the top-level OpenVINO™ Security Add-on source directory cloned earlier.
|
||||
```sh
|
||||
cd security_addon
|
||||
```
|
||||
3. Build the OpenVINO™ Security Add-on:
|
||||
2. Build the OpenVINO™ Security Add-on:
|
||||
```sh
|
||||
make clean all
|
||||
sudo make package
|
||||
@@ -559,7 +589,7 @@ The Model Hosting components install the OpenVINO™ Security Add-on Runtime Doc
|
||||
|
||||
This section requires interactions between the Model Developer/Independent Software vendor and the User. All roles must complete all applicable <a href="#setup-host">set up steps</a> and <a href="#ovsa-install">installation steps</a> before beginning this section.
|
||||
|
||||
This document uses the [face-detection-retail-0004](@ref omz_models_intel_face_detection_retail_0004_description_face_detection_retail_0004) model as an example.
|
||||
This document uses the [face-detection-retail-0004](@ref omz_models_model_face_detection_retail_0044) model as an example.
|
||||
|
||||
The following figure describes the interactions between the Model Developer, Independent Software Vendor, and User.
|
||||
|
||||
@@ -577,7 +607,7 @@ The Model Developer creates model, defines access control and creates the user l
|
||||
```sh
|
||||
sudo -s
|
||||
cd /<username-home-directory>/OVSA/artefacts
|
||||
export OVSA_RUNTIME_ARTEFACTS=$PWD
|
||||
export OVSA_DEV_ARTEFACTS=$PWD
|
||||
source /opt/ovsa/scripts/setupvars.sh
|
||||
```
|
||||
2. Create files to request a certificate:<br>
|
||||
@@ -622,7 +652,7 @@ This example uses `curl` to download the `face-detection-retail-004` model from
|
||||
```
|
||||
3. Define and enable the model access control and master license:
|
||||
```sh
|
||||
/opt/ovsa/bin/ovsatool protect -i model/face-detection-retail-0004.xml model/face-detection-retail-0004.bin -n "face detection" -d "face detection retail" -v 0004 -p face_detection_model.dat -m face_detection_model.masterlic -k isv_keystore -g <output-of-uuidgen>
|
||||
/opt/ovsa/bin/ovsatool controlAccess -i model/face-detection-retail-0004.xml model/face-detection-retail-0004.bin -n "face detection" -d "face detection retail" -v 0004 -p face_detection_model.dat -m face_detection_model.masterlic -k isv_keystore -g <output-of-uuidgen>
|
||||
```
|
||||
The Intermediate Representation files for the `face-detection-retail-0004` model are encrypted as `face_detection_model.dat` and a master license is generated as `face_detection_model.masterlic`.
|
||||
|
||||
@@ -703,6 +733,7 @@ This example uses scp to share data between the ovsa_runtime and ovsa_dev Guest
|
||||
cd $OVSA_RUNTIME_ARTEFACTS
|
||||
scp custkeystore.csr.crt username@<developer-vm-ip-address>:/<username-home-directory>/OVSA/artefacts
|
||||
```
|
||||
|
||||
#### Step 3: Receive and load the access controlled model into the OpenVINO™ Model Server
|
||||
1. Receive the model as files named
|
||||
* `face_detection_model.dat`
|
||||
@@ -736,14 +767,15 @@ This example uses scp to share data between the ovsa_runtime and ovsa_dev Guest
|
||||
"model_config_list":[
|
||||
{
|
||||
"config":{
|
||||
"name":"protected-model",
|
||||
"name":"controlled-access-model",
|
||||
"base_path":"/sampleloader/model/fd",
|
||||
"custom_loader_options": {"loader_name": "ovsa", "keystore": "custkeystore", "protected_file": "face_detection_model"}
|
||||
"custom_loader_options": {"loader_name": "ovsa", "keystore": "custkeystore", "controlled_access_file": "face_detection_model"}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### Step 4: Start the NGINX Model Server
|
||||
The NGINX Model Server publishes the access controlled model.
|
||||
```sh
|
||||
@@ -773,11 +805,12 @@ For information about the NGINX interface, see https://github.com/openvinotoolki
|
||||
```sh
|
||||
curl --create-dirs https://raw.githubusercontent.com/openvinotoolkit/model_server/master/example_client/images/people/people1.jpeg -o images/people1.jpeg
|
||||
```
|
||||
|
||||
#### Step 6: Run Inference
|
||||
|
||||
Run the `face_detection.py` script:
|
||||
```sh
|
||||
python3 face_detection.py --grpc_port 3335 --batch_size 1 --width 300 --height 300 --input_images_dir images --output_dir results --tls --server_cert server.pem --client_cert client.pem --client_key client.key --model_name protected-model
|
||||
python3 face_detection.py --grpc_port 3335 --batch_size 1 --width 300 --height 300 --input_images_dir images --output_dir results --tls --server_cert server.pem --client_cert client.pem --client_key client.key --model_name controlled-access-model
|
||||
```
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -8,14 +8,14 @@
|
||||
|
||||
## Demos
|
||||
|
||||
- [Demos](@ref omz_demos_README)
|
||||
- [Demos](@ref omz_demos)
|
||||
|
||||
|
||||
## Additional Tools
|
||||
|
||||
- A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker_README), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader_README) and other
|
||||
- A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader) and other
|
||||
|
||||
## Pre-Trained Models
|
||||
|
||||
- [Intel's Pre-trained Models from Open Model Zoo](@ref omz_models_intel_index)
|
||||
- [Public Pre-trained Models Available with OpenVINO™ from Open Model Zoo](@ref omz_models_public_index)
|
||||
- [Intel's Pre-trained Models from Open Model Zoo](@ref omz_models_group_intel)
|
||||
- [Public Pre-trained Models Available with OpenVINO™ from Open Model Zoo](@ref omz_models_group_public)
|
||||
109
docs/snippets/InferenceEngine_network_with_state_infer.cpp
Normal file
109
docs/snippets/InferenceEngine_network_with_state_infer.cpp
Normal file
@@ -0,0 +1,109 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <iostream>
|
||||
#include <inference_engine.hpp>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
// --------------------------- 1. Load inference engine -------------------------------------
|
||||
std::cout << "Loading Inference Engine" << std::endl;
|
||||
Core ie;
|
||||
|
||||
// 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
|
||||
std::cout << "Loading network files" << std::endl;
|
||||
CNNNetwork network;
|
||||
network = ie.ReadNetwork(std::string("c:\\work\\git\\github_dldt3\\openvino\\model-optimizer\\summator.xml"));
|
||||
network.setBatchSize(1);
|
||||
|
||||
// 3. Load network to CPU
|
||||
ExecutableNetwork executableNet = ie.LoadNetwork(network, "CPU");
|
||||
// 4. Create Infer Request
|
||||
InferRequest inferRequest = executableNet.CreateInferRequest();
|
||||
|
||||
// 5. Prepare inputs
|
||||
ConstInputsDataMap cInputInfo = executableNet.GetInputsInfo();
|
||||
std::vector<Blob::Ptr> ptrInputBlobs;
|
||||
for (const auto& input : cInputInfo) {
|
||||
ptrInputBlobs.push_back(inferRequest.GetBlob(input.first));
|
||||
}
|
||||
InputsDataMap inputInfo;
|
||||
inputInfo = network.getInputsInfo();
|
||||
for (auto &item : inputInfo) {
|
||||
Precision inputPrecision = Precision::FP32;
|
||||
item.second->setPrecision(inputPrecision);
|
||||
}
|
||||
|
||||
// 6. Prepare outputs
|
||||
std::vector<Blob::Ptr> ptrOutputBlobs;
|
||||
ConstOutputsDataMap cOutputInfo = executableNet.GetOutputsInfo();
|
||||
for (const auto& output : cOutputInfo) {
|
||||
ptrOutputBlobs.push_back(inferRequest.GetBlob(output.first));
|
||||
}
|
||||
|
||||
// 7. Initialize memory state before starting
|
||||
for (auto &&state : inferRequest.QueryState()) {
|
||||
state.Reset();
|
||||
}
|
||||
|
||||
//! [part1]
|
||||
// input data
|
||||
std::vector<float> data = { 1,2,3,4,5,6};
|
||||
// infer the first utterance
|
||||
for (size_t next_input = 0; next_input < data.size()/2; next_input++) {
|
||||
MemoryBlob::Ptr minput = as<MemoryBlob>(ptrInputBlobs[0]);
|
||||
auto minputHolder = minput->wmap();
|
||||
|
||||
std::memcpy(minputHolder.as<void *>(),
|
||||
&data[next_input],
|
||||
sizeof(float));
|
||||
|
||||
inferRequest.Infer();
|
||||
// check states
|
||||
auto states = inferRequest.QueryState();
|
||||
auto mstate = as<MemoryBlob>(states[0].GetState());
|
||||
auto state_buf = mstate->rmap();
|
||||
float * state =state_buf.as<float*>();
|
||||
std::cout << state[0] << "\n";
|
||||
}
|
||||
|
||||
// resetting state between utterances
|
||||
std::cout<<"Reset state\n";
|
||||
for (auto &&state : inferRequest.QueryState()) {
|
||||
state.Reset();
|
||||
}
|
||||
|
||||
// infer the second utterance
|
||||
for (size_t next_input = data.size()/2; next_input < data.size(); next_input++) {
|
||||
MemoryBlob::Ptr minput = as<MemoryBlob>(ptrInputBlobs[0]);
|
||||
auto minputHolder = minput->wmap();
|
||||
|
||||
std::memcpy(minputHolder.as<void *>(),
|
||||
&data[next_input],
|
||||
sizeof(float));
|
||||
|
||||
inferRequest.Infer();
|
||||
// check states
|
||||
auto states = inferRequest.QueryState();
|
||||
auto mstate = as<MemoryBlob>(states[0].GetState());
|
||||
auto state_buf = mstate->rmap();
|
||||
float * state =state_buf.as<float*>();
|
||||
std::cout << state[0] << "\n";
|
||||
}
|
||||
//! [part1]
|
||||
}
|
||||
catch (const std::exception &error) {
|
||||
std::cerr << error.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
catch (...) {
|
||||
std::cerr << "Unknown/internal exception happened" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::cerr << "Execution successful" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
@@ -5,9 +5,9 @@
|
||||
|
||||
using namespace TemplateExtension;
|
||||
|
||||
constexpr ngraph::NodeTypeInfo Operation::type_info;
|
||||
|
||||
//! [op:ctor]
|
||||
NGRAPH_RTTI_DEFINITION(TemplateExtension::Operation, "Template", 0);
|
||||
|
||||
Operation::Operation(const ngraph::Output<ngraph::Node> &arg, int64_t add) : Op({arg}), add(add) {
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
@@ -11,8 +11,7 @@ namespace TemplateExtension {
|
||||
|
||||
class Operation : public ngraph::op::Op {
|
||||
public:
|
||||
static constexpr ngraph::NodeTypeInfo type_info{"Template", 0};
|
||||
const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
|
||||
Operation() = default;
|
||||
Operation(const ngraph::Output<ngraph::Node>& arg, int64_t add);
|
||||
|
||||
@@ -186,9 +186,9 @@ endif ()
|
||||
if (ENABLE_OPENCV)
|
||||
reset_deps_cache(OpenCV_DIR)
|
||||
|
||||
set(OPENCV_VERSION "4.5.1")
|
||||
set(OPENCV_BUILD "044")
|
||||
set(OPENCV_BUILD_YOCTO "337")
|
||||
set(OPENCV_VERSION "4.5.2")
|
||||
set(OPENCV_BUILD "076")
|
||||
set(OPENCV_BUILD_YOCTO "708")
|
||||
|
||||
if (AARCH64)
|
||||
if(DEFINED ENV{THIRDPARTY_SERVER_PATH})
|
||||
@@ -208,7 +208,7 @@ if (ENABLE_OPENCV)
|
||||
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}/opencv"
|
||||
ENVIRONMENT "OpenCV_DIR"
|
||||
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*"
|
||||
SHA256 "b5239e0e50b9009f95a29cb11f0840ec085fa07f6c4d3349adf090f1e51b0787")
|
||||
SHA256 "ee3e5255f381b8de5e6fffe4e43dae8c99035377d0380f9183bd7341f1d0f204")
|
||||
|
||||
unset(IE_PATH_TO_DEPS)
|
||||
endif()
|
||||
@@ -219,37 +219,37 @@ if (ENABLE_OPENCV)
|
||||
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}/opencv"
|
||||
ENVIRONMENT "OpenCV_DIR"
|
||||
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*"
|
||||
SHA256 "5250bfe5860c15eb1b31963c78804ee9b301a19d8d6e920c06ef41de681cb99e")
|
||||
SHA256 "a14f872e6b63b6ac12c7ff47fa49e578d14c14433b57f5d85ab5dd48a079938c")
|
||||
elseif(APPLE AND X86_64)
|
||||
RESOLVE_DEPENDENCY(OPENCV
|
||||
ARCHIVE_MAC "opencv/opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_osx.txz"
|
||||
TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_osx/opencv"
|
||||
ENVIRONMENT "OpenCV_DIR"
|
||||
VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*"
|
||||
SHA256 "f3ebc5cc72c86106c30cc711ac689e02281556bb43c09a89cd45cb99b6bef9a8")
|
||||
SHA256 "3e162f96e86cba8836618134831d9cf76df0438778b3e27e261dedad9254c514")
|
||||
elseif(LINUX)
|
||||
if (AARCH64)
|
||||
set(OPENCV_SUFFIX "yocto_kmb")
|
||||
set(OPENCV_BUILD "${OPENCV_BUILD_YOCTO}")
|
||||
elseif (ARM)
|
||||
set(OPENCV_SUFFIX "debian9arm")
|
||||
set(OPENCV_HASH "0e787d6738092993bc92bb55975f52caabae45dc73473b5196d15e65e87d6b9d")
|
||||
set(OPENCV_HASH "4274f8c40b17215f4049096b524e4a330519f3e76813c5a3639b69c48633d34e")
|
||||
elseif ((LINUX_OS_NAME STREQUAL "CentOS 7" OR
|
||||
CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9") AND X86_64)
|
||||
set(OPENCV_SUFFIX "centos7")
|
||||
set(OPENCV_HASH "9b813af064d463b31fa1603b11b6559532a031d59bb0782d234380955fd397e0")
|
||||
set(OPENCV_HASH "5fa76985c84fe7c64531682ef0b272510c51ac0d0565622514edf1c88b33404a")
|
||||
elseif (LINUX_OS_NAME MATCHES "CentOS 8" AND X86_64)
|
||||
set(OPENCV_SUFFIX "centos8")
|
||||
set(OPENCV_HASH "8ec3e3552500dee334162386b98cc54a5608de1f1a18f283523fc0cc13ee2f83")
|
||||
set(OPENCV_HASH "db087dfd412eedb8161636ec083ada85ff278109948d1d62a06b0f52e1f04202")
|
||||
elseif (LINUX_OS_NAME STREQUAL "Ubuntu 16.04" AND X86_64)
|
||||
set(OPENCV_SUFFIX "ubuntu16")
|
||||
set(OPENCV_HASH "cd46831b4d8d1c0891d8d22ff5b2670d0a465a8a8285243059659a50ceeae2c3")
|
||||
elseif (LINUX_OS_NAME STREQUAL "Ubuntu 18.04" AND X86_64)
|
||||
set(OPENCV_SUFFIX "ubuntu18")
|
||||
set(OPENCV_HASH "8ec3e3552500dee334162386b98cc54a5608de1f1a18f283523fc0cc13ee2f83")
|
||||
set(OPENCV_HASH "db087dfd412eedb8161636ec083ada85ff278109948d1d62a06b0f52e1f04202")
|
||||
elseif ((LINUX_OS_NAME STREQUAL "Ubuntu 20.04" OR LINUX_OS_NAME STREQUAL "LinuxMint 20.1") AND X86_64)
|
||||
set(OPENCV_SUFFIX "ubuntu20")
|
||||
set(OPENCV_HASH "2b7808d002864acdc5fc0b19cd30dadc31a37cc267931cad605f23f2383bfc21")
|
||||
set(OPENCV_HASH "2fe7bbc40e1186eb8d099822038cae2821abf617ac7a16fadf98f377c723e268")
|
||||
elseif(NOT DEFINED OpenCV_DIR AND NOT DEFINED ENV{OpenCV_DIR})
|
||||
message(FATAL_ERROR "OpenCV is not available on current platform (${LINUX_OS_NAME})")
|
||||
endif()
|
||||
|
||||
@@ -6,14 +6,14 @@ include_guard(GLOBAL)
|
||||
|
||||
set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x)
|
||||
set(VPU_SUPPORTED_FIRMWARES_HASH
|
||||
"87389cef2aff63197f7787fb9b0ef7bfc74119200ef6b9f0c2c763b3ea4aabe9"
|
||||
"eba4fabfd71f9c81db12886b05f559f1c6092f9b65dfb4493c205f493d816fab")
|
||||
"cfba5fc0895a564fa51a1438f1c4d4f06198be982b1c2fb973c5cb9ab0a3c1f3"
|
||||
"4176456c96b151470de3a723b603503306cff2e52975b739927e37d730c053be")
|
||||
|
||||
#
|
||||
# Default packages
|
||||
#
|
||||
|
||||
set(FIRMWARE_PACKAGE_VERSION 1633)
|
||||
set(FIRMWARE_PACKAGE_VERSION 1639)
|
||||
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2")
|
||||
|
||||
#
|
||||
|
||||
@@ -31,8 +31,13 @@ int image_read(const char *img_path, c_mat_t *img) {
|
||||
img->mat_width = mat.size().width;
|
||||
img->mat_height = mat.size().height;
|
||||
img->mat_type = mat.type();
|
||||
img->mat_data_size = img->mat_channels * img->mat_width * img->mat_height;
|
||||
img->mat_data_size = mat.elemSize() * img->mat_width * img->mat_height;
|
||||
img->mat_data = (unsigned char *)malloc(sizeof(unsigned char) * img->mat_data_size);
|
||||
|
||||
if (img->mat_data == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < img->mat_data_size; ++i) {
|
||||
img->mat_data[i] = mat.data[i];
|
||||
}
|
||||
@@ -54,8 +59,13 @@ int image_resize(const c_mat_t *src_img, c_mat_t *dst_img, const int width, cons
|
||||
dst_img->mat_width = mat_dst.size().width;
|
||||
dst_img->mat_height = mat_dst.size().height;
|
||||
dst_img->mat_type = mat_dst.type();
|
||||
dst_img->mat_data_size = dst_img->mat_channels * dst_img->mat_width * dst_img->mat_height;
|
||||
dst_img->mat_data_size = mat_dst.elemSize() * dst_img->mat_width * dst_img->mat_height;
|
||||
dst_img->mat_data = (unsigned char *)malloc(sizeof(unsigned char) * dst_img->mat_data_size);
|
||||
|
||||
if (dst_img->mat_data == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < dst_img->mat_data_size; ++i) {
|
||||
dst_img->mat_data[i] = mat_dst.data[i];
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ To properly demonstrate this API, it is required to run several networks in pipe
|
||||
|
||||
## Running
|
||||
|
||||
To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
|
||||
To run the sample, you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader).
|
||||
|
||||
> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
|
||||
>
|
||||
|
||||
@@ -39,6 +39,9 @@ struct classify_res *output_blob_to_classify_res(ie_blob_t *blob, size_t *n) {
|
||||
*n = output_dim.dims[1];
|
||||
|
||||
struct classify_res *cls = (struct classify_res *)malloc(sizeof(struct classify_res) * (*n));
|
||||
if (!cls) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ie_blob_buffer_t blob_cbuffer;
|
||||
status = ie_blob_get_cbuffer(blob, &blob_cbuffer);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user