Compare commits
10 Commits
2021.2
...
releases/v
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d56d921409 | ||
|
|
e17f43922e | ||
|
|
aa23fbb9d3 | ||
|
|
8690aac494 | ||
|
|
783af1e5ae | ||
|
|
8396e6473e | ||
|
|
1eeaad3c82 | ||
|
|
c4ac476d2a | ||
|
|
ed6e329774 | ||
|
|
fb1536f2be |
@@ -4,13 +4,13 @@ jobs:
|
||||
timeoutInMinutes: 90
|
||||
|
||||
pool:
|
||||
name: LIN_VMSS_VENV_F8S_WU2
|
||||
name: LIN_VMSS_VENV_F16S_WU2
|
||||
|
||||
variables:
|
||||
system.debug: true
|
||||
VSTS_HTTP_RETRY: 5
|
||||
VSTS_HTTP_TIMEOUT: 200
|
||||
WORKERS_NUMBER: 8
|
||||
WORKERS_NUMBER: 16
|
||||
BUILD_TYPE: Release
|
||||
REPO_DIR: $(Build.Repository.LocalPath)
|
||||
WORK_DIR: $(Pipeline.Workspace)/_w
|
||||
@@ -22,11 +22,10 @@ jobs:
|
||||
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
|
||||
whoami
|
||||
uname -a
|
||||
which python3
|
||||
python3 --version
|
||||
which java
|
||||
java -version
|
||||
gcc --version
|
||||
echo Python3 info ; which python3 ; python3 --version
|
||||
echo Python info ; which python ; python --version
|
||||
echo Java info ; which java ; java -version
|
||||
echo gcc info ; which gcc ; gcc --version
|
||||
lsb_release
|
||||
env
|
||||
cat /proc/cpuinfo
|
||||
@@ -35,6 +34,7 @@ jobs:
|
||||
vmstat -s
|
||||
df
|
||||
lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd"
|
||||
free -h
|
||||
displayName: 'System info'
|
||||
|
||||
- script: |
|
||||
|
||||
@@ -1,95 +0,0 @@
|
||||
jobs:
|
||||
- job: nGraph_ONNX_Lin
|
||||
|
||||
# About 150% of total time
|
||||
timeoutInMinutes: 60
|
||||
|
||||
pool:
|
||||
name: LIN_VMSS_VENV_F8S_WU2
|
||||
|
||||
variables:
|
||||
system.debug: true
|
||||
VSTS_HTTP_RETRY: 5
|
||||
VSTS_HTTP_TIMEOUT: 200
|
||||
WORKERS_NUMBER: 8
|
||||
BUILD_TYPE: Release
|
||||
REPO_DIR: $(Build.Repository.LocalPath)
|
||||
WORK_DIR: $(Pipeline.Workspace)/_w
|
||||
BUILD_DIR: $(WORK_DIR)/build
|
||||
BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE)
|
||||
INSTALL_DIR: $(WORK_DIR)/install
|
||||
|
||||
steps:
|
||||
- checkout: self
|
||||
clean: true
|
||||
lfs: false
|
||||
submodules: recursive
|
||||
path: openvino
|
||||
|
||||
- script: |
|
||||
curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
|
||||
whoami
|
||||
uname -a
|
||||
which python3
|
||||
python3 --version
|
||||
gcc --version
|
||||
lsb_release
|
||||
env
|
||||
cat /proc/cpuinfo
|
||||
cat /proc/meminfo
|
||||
vmstat -s
|
||||
df
|
||||
displayName: 'System info'
|
||||
|
||||
- script: |
|
||||
rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
|
||||
displayName: 'Make dir'
|
||||
|
||||
- script: |
|
||||
sudo apt --assume-yes install libusb-1.0-0-dev
|
||||
python3 -m pip install -r ./inference-engine/ie_bridges/python/requirements.txt
|
||||
# For running Python API tests
|
||||
python3 -m pip install -r ./inference-engine/ie_bridges/python/src/requirements-dev.txt
|
||||
displayName: 'Install dependencies'
|
||||
enabled: false
|
||||
|
||||
- script: |
|
||||
wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
|
||||
unzip ninja-linux.zip
|
||||
sudo cp -v ninja /usr/local/bin/
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Install Ninja'
|
||||
enabled: false
|
||||
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
# CMake must get Python 3.x version by default
|
||||
cmakeArgs: -GNinja -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_VPU=OFF -DENABLE_GNA=OFF -DENABLE_OPENCV=OFF -DENABLE_CPPLINT=OFF -DENABLE_TESTS=OFF -DENABLE_BEH_TESTS=OFF -DENABLE_FUNCTIONAL_TESTS=OFF -DENABLE_MKL_DNN=ON -DENABLE_CLDNN=OFF -DENABLE_PROFILING_ITT=OFF -DENABLE_SAMPLES=OFF -DENABLE_SPEECH_DEMO=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3.6 -DNGRAPH_ONNX_IMPORT_ENABLE=ON -DNGRAPH_INTERPRETER_ENABLE=ON -DNGRAPH_DEBUG_ENABLE=OFF -DNGRAPH_DYNAMIC_COMPONENTS_ENABLE=ON -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) $(REPO_DIR)
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
enabled: false
|
||||
|
||||
- script: ninja
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Build'
|
||||
enabled: false
|
||||
|
||||
- script: make install
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Install'
|
||||
enabled: false
|
||||
|
||||
- script: |
|
||||
ls -alR $(REPO_DIR)/bin/
|
||||
ls -alR $(INSTALL_DIR)
|
||||
displayName: 'List files'
|
||||
enabled: false
|
||||
|
||||
- script: docker build --tag=openvino-onnx-ci-image --file=$(REPO_DIR)/.ci/openvino-onnx/Dockerfile .
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Docker build'
|
||||
enabled: false
|
||||
|
||||
- script: docker run --name openvino-onnx-ci-container openvino-onnx-ci-image
|
||||
workingDirectory: $(BUILD_DIR)
|
||||
displayName: 'Docker run tests'
|
||||
enabled: false
|
||||
@@ -111,7 +111,7 @@ jobs:
|
||||
continueOnError: false
|
||||
|
||||
- script: |
|
||||
git clone https://github.com/openvinotoolkit/testdata.git
|
||||
git clone --single-branch --branch releases/2021/2 https://github.com/openvinotoolkit/testdata.git
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Clone testdata'
|
||||
|
||||
|
||||
@@ -53,16 +53,11 @@ jobs:
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: |
|
||||
certutil -urlcache -split -f https://incredibuilddiag1wu2.blob.core.windows.net/incredibuild/IBSetupConsole_9_5_0.exe IBSetupConsole_9_5_0.exe
|
||||
call IBSetupConsole_9_5_0.exe /Install /Components=Agent,oneuse /Coordinator=11.1.0.4 /AGENT:OPENFIREWALL=ON /AGENT:AUTOSELECTPORTS=ON /ADDTOPATH=ON /AGENT:INSTALLADDINS=OFF
|
||||
certutil -urlcache -split -f https://incredibuilddiag1wu2.blob.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
|
||||
call install_ib_console.bat
|
||||
workingDirectory: $(WORK_DIR)
|
||||
displayName: 'Install IncrediBuild'
|
||||
|
||||
- script: |
|
||||
echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
|
||||
reg add HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Xoreax\IncrediBuild\Builder /f /v LastEnabled /d 0 && echo Start IncrediBuild_Agent && net start IncrediBuild_Agent
|
||||
displayName: 'Start IncrediBuild'
|
||||
|
||||
- script: |
|
||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
||||
@@ -78,6 +73,7 @@ jobs:
|
||||
- script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
|
||||
displayName: Stop IncrediBuild
|
||||
continueOnError: true
|
||||
|
||||
- script: dir $(REPO_DIR)\bin\ /s
|
||||
displayName: 'List files'
|
||||
|
||||
@@ -149,7 +145,7 @@ jobs:
|
||||
# Add for gtest-parallel, it hangs now (CVS-33386)
|
||||
#python $(BUILD_DIR)\gtest-parallel\gtest-parallel $(BIN_DIR)\MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.0\opencv\bin;%PATH%
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.1\opencv\bin;%PATH%
|
||||
set DATA_PATH=$(BUILD_DIR)\testdata
|
||||
set MODELS_PATH=$(BUILD_DIR)\testdata
|
||||
$(BIN_DIR)\MklDnnFunctionalTests --gtest_filter=*smoke* --gtest_print_time=1 --gtest_output=xml:TEST-MklDnnFunctionalTests.xml
|
||||
@@ -157,7 +153,7 @@ jobs:
|
||||
continueOnError: false
|
||||
|
||||
- script: |
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.0\opencv\bin;%PATH%
|
||||
set PATH=$(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.1\opencv\bin;%PATH%
|
||||
set DATA_PATH=$(BUILD_DIR)\testdata
|
||||
set MODELS_PATH=$(BUILD_DIR)\testdata
|
||||
$(BIN_DIR)\InferenceEngineCAPITests --gtest_output=xml:TEST-InferenceEngineCAPITests.xml
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# [OpenVINO™ Toolkit](https://01.org/openvinotoolkit) - Deep Learning Deployment Toolkit repository
|
||||
[](https://github.com/openvinotoolkit/openvino/releases/tag/2021.2)
|
||||
[](https://github.com/openvinotoolkit/openvino/releases/tag/2021.1)
|
||||
[](LICENSE)
|
||||

|
||||
|
||||
|
||||
@@ -1,362 +1,200 @@
|
||||
# Custom Operations Guide {#openvino_docs_HOWTO_Custom_Layers_Guide}
|
||||
# Custom Layers Guide {#openvino_docs_HOWTO_Custom_Layers_Guide}
|
||||
|
||||
The Intel® Distribution of OpenVINO™ toolkit supports neural network models trained with multiple frameworks including
|
||||
TensorFlow*, Caffe*, MXNet*, Kaldi* and ONNX* file format. The list of supported operations (layers) is different for
|
||||
each of the supported frameworks. To see the operations supported by your framework, refer to
|
||||
[Supported Framework Layers](../MO_DG/prepare_model/Supported_Frameworks_Layers.md).
|
||||
The Intel® Distribution of OpenVINO™ toolkit supports neural network model layers in multiple frameworks including TensorFlow*, Caffe*, MXNet*, Kaldi* and ONNX*. The list of known layers is different for each of the supported frameworks. To see the layers supported by your framework, refer to [supported frameworks](../MO_DG/prepare_model/Supported_Frameworks_Layers.md).
|
||||
|
||||
Custom operations are operations that are not included in the list of known operations. If your model contains any
|
||||
operation that is not in the list of known operations, the Model Optimizer is not able to generate an Intermediate
|
||||
Representation (IR) for this model.
|
||||
Custom layers are layers that are not included in the list of known layers. If your topology contains any layers that are not in the list of known layers, the Model Optimizer classifies them as custom.
|
||||
|
||||
This guide illustrates the workflow for running inference on topologies featuring custom operations, allowing you to
|
||||
plug in your own implementation for existing or completely new operation.
|
||||
This guide illustrates the workflow for running inference on topologies featuring custom layers, allowing you to plug in your own implementation for existing or completely new layers.
|
||||
For a step-by-step example of creating and executing a custom layer, see the [Custom Layer Implementation Tutorials for Linux and Windows.](https://github.com/david-drew/OpenVINO-Custom-Layers/tree/master/2019.r2.0)
|
||||
|
||||
> **NOTE:** *Layer* — The legacy term for an *operation* which came from Caffe\* framework. Currently it is not used.
|
||||
> Refer to the [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../MO_DG/IR_and_opsets.md)
|
||||
> for more information on the topic.
|
||||
## Terms used in this guide
|
||||
|
||||
## Terms Used in This Guide
|
||||
- *Layer* — The abstract concept of a math function that is selected for a specific purpose (relu, sigmoid, tanh, convolutional). This is one of a sequential series of building blocks within the neural network.
|
||||
- *Kernel* — The implementation of a layer function, in this case, the math programmed (in C++ and Python) to perform the layer operation for target hardware (CPU or GPU).
|
||||
- *Intermediate Representation (IR)* — Neural Network used only by the Inference Engine in OpenVINO abstracting the different frameworks and describing topology, layer parameters and weights.
|
||||
The original format will be a supported framework such as TensorFlow, Caffe, or MXNet.
|
||||
|
||||
- *Intermediate Representation (IR)* — Neural Network used only by the Inference Engine in OpenVINO abstracting the
|
||||
different frameworks and describing the model topology, operations parameters and weights.
|
||||
|
||||
- *Operation* — The abstract concept of a math function that is selected for a specific purpose. Operations supported by
|
||||
OpenVINO™ are listed in the supported operation set provided in the [Available Operations Sets](../ops/opset.md).
|
||||
Examples of the operations are: [ReLU](../ops/activation/ReLU_1.md), [Convolution](../ops/convolution/Convolution_1.md),
|
||||
[Add](../ops/arithmetic/Add_1.md), etc.
|
||||
|
||||
- *Kernel* — The implementation of a operation function in the OpenVINO™ plugin, in this case, the math programmed (in
|
||||
C++ and OpenCL) to perform the operation for a target hardware (CPU or GPU).
|
||||
|
||||
- *Inference Engine Extension* — Device-specific module implementing custom operations (a set of kernels).
|
||||
|
||||
## Custom Operation Support Overview
|
||||
|
||||
There are three steps to support inference of a model with custom operation(s):
|
||||
1. Add support for a custom operation in the [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) so
|
||||
the Model Optimizer can generate the IR with the operation.
|
||||
2. Create an operation set and implement a custom nGraph operation in it as described in the
|
||||
[Custom nGraph Operation](../IE_DG/Extensibility_DG/AddingNGraphOps.md).
|
||||
3. Implement a customer operation in one of the [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
|
||||
plugins to support inference of this operation using a particular target hardware (CPU, GPU or VPU).
|
||||
|
||||
To see the operations that are supported by each device plugin for the Inference Engine, refer to the
|
||||
[Supported Devices](../IE_DG/supported_plugins/Supported_Devices.md).
|
||||
|
||||
> **NOTE:** If a device doesn't support a particular operation, an alternative to creating a new operation is to target
|
||||
> an additional device using the HETERO plugin. The [Heterogeneous Plugin](../IE_DG/supported_plugins/HETERO.md) may be
|
||||
> used to run an inference model on multiple devices allowing the unsupported operations on one device to "fallback" to
|
||||
> run on another device (e.g., CPU) that does support those operations.
|
||||
|
||||
### Custom Operation Support for the Model Optimizer
|
||||
|
||||
Model Optimizer model conversion pipeline is described in details in "Model Conversion Pipeline" section on the
|
||||
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md).
|
||||
It is recommended to read that article first for a better understanding of the following material.
|
||||
|
||||
Model Optimizer provides extensions mechanism to support new operations and implement custom model transformations to
|
||||
generate optimized IR. This mechanism is described in the "Model Optimizer Extensions" section on the
|
||||
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md).
|
||||
|
||||
Two types of the Model Optimizer extensions should be implemented to support custom operation at minimum:
|
||||
1. Operation class for a new operation. This class stores information about the operation, its attributes, shape
|
||||
inference function, attributes to be saved to an IR and some others internally used attributes. Refer to the
|
||||
"Model Optimizer Operation" section on the
|
||||
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for the
|
||||
detailed instruction on how to implement it.
|
||||
2. Operation attributes extractor. The extractor is responsible for parsing framework-specific representation of the
|
||||
operation and uses corresponding operation class to update graph node attributes with necessary attributes of the
|
||||
operation. Refer to the "Operation Extractor" section on the
|
||||
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for the
|
||||
detailed instruction on how to implement it.
|
||||
|
||||
> **NOTE:** In some cases you may need to implement some transformation to support the operation. This topic is covered
|
||||
> in the "Graph Transformation Extensions" section on the
|
||||
> [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md).
|
||||
|
||||
## Custom Operations Extensions for the Inference Engine
|
||||
|
||||
Inference Engine provides extensions mechanism to support new operations. This mechanism is described in the
|
||||
[Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md).
|
||||
|
||||
Each device plugin includes a library of optimized implementations to execute known operations which must be extended to
|
||||
execute a custom operation. The custom operation extension is implemented according to the target device:
|
||||
|
||||
- Custom Operation CPU Extension
|
||||
- A compiled shared library (`.so`, `.dylib` or `.dll`) needed by the CPU Plugin for executing the custom operation
|
||||
on a CPU. Refer to the [How to Implement Custom CPU Operations](../IE_DG/Extensibility_DG/CPU_Kernel.md) for more
|
||||
details.
|
||||
- Custom Operation GPU Extension
|
||||
- OpenCL source code (.cl) for the custom operation kernel that will be compiled to execute on the GPU along with a
|
||||
operation description file (.xml) needed by the GPU Plugin for the custom operation kernel. Refer to the
|
||||
[How to Implement Custom GPU Operations](../IE_DG/Extensibility_DG/GPU_Kernel.md) for more details.
|
||||
- Custom Operation VPU Extension
|
||||
- OpenCL source code (.cl) for the custom operation kernel that will be compiled to execute on the VPU along with a
|
||||
operation description file (.xml) needed by the VPU Plugin for the custom operation kernel. Refer to the
|
||||
[How to Implement Custom Operations for VPU](../IE_DG/Extensibility_DG/VPU_Kernel.md) for more details.
|
||||
|
||||
Also, it is necessary to implement nGraph custom operation according to the
|
||||
[Custom nGraph Operation](../IE_DG/Extensibility_DG/AddingNGraphOps.md) so the Inference Engine can read an IR with this
|
||||
operation and correctly infer output tensors shape and type.
|
||||
|
||||
## Enabling Magnetic Resonance Image Reconstruction Model
|
||||
This chapter provides a step-by-step instruction on how to enable the magnetic resonance image reconstruction model
|
||||
implemented in the [repository](https://github.com/rmsouza01/Hybrid-CS-Model-MRI/) using a custom operation on CPU. The
|
||||
example is prepared for a model generated from the repository with hash `2ede2f96161ce70dcdc922371fe6b6b254aafcc8`.
|
||||
|
||||
### Download and Convert the Model to a Frozen TensorFlow\* Model Format
|
||||
The original pre-trained model is provided in the hdf5 format which is not supported by OpenVINO directly and needs to
|
||||
be converted to TensorFlow\* frozen model format first.
|
||||
|
||||
1. Download repository `https://github.com/rmsouza01/Hybrid-CS-Model-MRI`:<br
|
||||
```bash
|
||||
git clone https://github.com/rmsouza01/Hybrid-CS-Model-MRI
|
||||
git checkout 2ede2f96161ce70dcdc922371fe6b6b254aafcc8
|
||||
```
|
||||
|
||||
2. Convert pre-trained `.hdf5` to a frozen `.pb` graph using the following script (tested with TensorFlow==1.15.0 and
|
||||
Keras==2.2.4) which should be executed from the root of the cloned repository:<br>
|
||||
```py
|
||||
import keras as K
|
||||
import numpy as np
|
||||
import Modules.frequency_spatial_network as fsnet
|
||||
import tensorflow as tf
|
||||
|
||||
under_rate = '20'
|
||||
|
||||
stats = np.load("Data/stats_fs_unet_norm_" + under_rate + ".npy")
|
||||
var_sampling_mask = np.load("Data/sampling_mask_" + under_rate + "perc.npy")
|
||||
|
||||
model = fsnet.wnet(stats[0], stats[1], stats[2], stats[3], kshape = (5,5), kshape2=(3,3))
|
||||
model_name = "Models/wnet_" + under_rate + ".hdf5"
|
||||
model.load_weights(model_name)
|
||||
|
||||
inp = np.random.standard_normal([1, 256, 256, 2]).astype(np.float32)
|
||||
np.save('inp', inp)
|
||||
|
||||
sess = K.backend.get_session()
|
||||
sess.as_default()
|
||||
graph_def = sess.graph.as_graph_def()
|
||||
graph_def = tf.graph_util.convert_variables_to_constants(sess, graph_def, ['conv2d_44/BiasAdd'])
|
||||
with tf.gfile.FastGFile('wnet_20.pb', 'wb') as f:
|
||||
f.write(graph_def.SerializeToString())
|
||||
```
|
||||
- *Model Extension Generator* — Generates template source code files for each of the extensions needed by the Model Optimizer and the Inference Engine.
|
||||
|
||||
As a result the TensorFlow\* frozen model file "wnet_20.pb" is generated.
|
||||
- *Inference Engine Extension* — Device-specific module implementing custom layers (a set of kernels).
|
||||
|
||||
### Convert the Frozen TensorFlow\* Model to Intermediate Representation
|
||||
|
||||
Firstly, open the model in the TensorBoard or other TensorFlow* model visualization tool. The model supports dynamic
|
||||
batch dimension because the value for the batch dimension is not hardcoded in the model. Model Optimizer need to set all
|
||||
dynamic dimensions to some specific value to create the IR, therefore specify the command line parameter `-b 1` to set
|
||||
the batch dimension equal to 1. The actual batch size dimension can be changed at runtime using the Inference Engine API
|
||||
described in the [Using Shape Inference](../IE_DG/ShapeInference.md). Also refer to
|
||||
[Converting a Model Using General Conversion Parameters](../MO_DG/prepare_model/convert_model/Converting_Model_General.md)
|
||||
and [Convert Your TensorFlow* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md)
|
||||
for more details and command line parameters used for the model conversion.
|
||||
## Custom Layer Overview
|
||||
|
||||
The [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) searches the list of known layers for each layer contained in the input model topology before building the model's internal representation, optimizing the model, and producing the Intermediate Representation files.
|
||||
|
||||
The [Inference Engine](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) loads the layers from the input model IR files into the specified device plugin, which will search a list of known layer implementations for the device. If your topology contains layers that are not in the list of known layers for the device, the Inference Engine considers the layer to be unsupported and reports an error. To see the layers that are supported by each device plugin for the Inference Engine, refer to the [Supported Devices](../IE_DG/supported_plugins/Supported_Devices.md) documentation.
|
||||
<br>
|
||||
> **NOTE:** If a device doesn't support a particular layer, an alternative to creating a new custom layer is to target an additional device using the HETERO plugin. The [Heterogeneous Plugin](../IE_DG/supported_plugins/HETERO.md) may be used to run an inference model on multiple devices allowing the unsupported layers on one device to "fallback" to run on another device (e.g., CPU) that does support those layers.
|
||||
|
||||
## Custom Layer Implementation Workflow
|
||||
|
||||
When implementing a custom layer for your pre-trained model in the Intel® Distribution of OpenVINO™ toolkit, you will need to add extensions to both the Model Optimizer and the Inference Engine.
|
||||
|
||||
## Custom Layer Extensions for the Model Optimizer
|
||||
|
||||
The following figure shows the basic processing steps for the Model Optimizer highlighting the two necessary custom layer extensions, the Custom Layer Extractor and the Custom Layer Operation.
|
||||
|
||||

|
||||
|
||||
|
||||
The Model Optimizer first extracts information from the input model which includes the topology of the model layers along with parameters, input and output format, etc., for each layer. The model is then optimized from the various known characteristics of the layers, interconnects, and data flow which partly comes from the layer operation providing details including the shape of the output for each layer. Finally, the optimized model is output to the model IR files needed by the Inference Engine to run the model.
|
||||
|
||||
The Model Optimizer starts with a library of known extractors and operations for each [supported model framework](../MO_DG/prepare_model/Supported_Frameworks_Layers.md) which must be extended to use each unknown custom layer. The custom layer extensions needed by the Model Optimizer are:
|
||||
|
||||
- Custom Layer Extractor
|
||||
- Responsible for identifying the custom layer operation and extracting the parameters for each instance of the custom layer. The layer parameters are stored per instance and used by the layer operation before finally appearing in the output IR. Typically the input layer parameters are unchanged, which is the case covered by this tutorial.
|
||||
- Custom Layer Operation
|
||||
- Responsible for specifying the attributes that are supported by the custom layer and computing the output shape for each instance of the custom layer from its parameters. <br> The `--mo-op` command-line argument shown in the examples below generates a custom layer operation for the Model Optimizer.
|
||||
|
||||
## Custom Layer Extensions for the Inference Engine
|
||||
|
||||
The following figure shows the basic flow for the Inference Engine highlighting two custom layer extensions for the CPU and GPU Plugins, the Custom Layer CPU extension and the Custom Layer GPU Extension.
|
||||
|
||||

|
||||
|
||||
Each device plugin includes a library of optimized implementations to execute known layer operations which must be extended to execute a custom layer. The custom layer extension is implemented according to the target device:
|
||||
|
||||
- Custom Layer CPU Extension
|
||||
- A compiled shared library (.so or .dll binary) needed by the CPU Plugin for executing the custom layer on the CPU.
|
||||
- Custom Layer GPU Extension
|
||||
- OpenCL source code (.cl) for the custom layer kernel that will be compiled to execute on the GPU along with a layer description file (.xml) needed by the GPU Plugin for the custom layer kernel.
|
||||
|
||||
## Model Extension Generator
|
||||
|
||||
Using answers to interactive questions or a *.json* configuration file, the Model Extension Generator tool generates template source code files for each of the extensions needed by the Model Optimizer and the Inference Engine. To complete the implementation of each extension, the template functions may need to be edited to fill-in details specific to the custom layer or the actual custom layer functionality itself.
|
||||
|
||||
### Command-line
|
||||
|
||||
The Model Extension Generator is included in the Intel® Distribution of OpenVINO™ toolkit installation and is run using the command (here with the "--help" option):
|
||||
|
||||
```bash
|
||||
./<MO_INSTALL_DIR>/mo.py --input_model <PATH_TO_MODEL>/wnet_20.pb -b 1
|
||||
python3 /opt/intel/openvino/deployment_tools/tools/extension_generator/extgen.py new --help
|
||||
```
|
||||
|
||||
Model Optimizer produces the following error:
|
||||
```bash
|
||||
[ ERROR ] List of operations that cannot be converted to Inference Engine IR:
|
||||
[ ERROR ] Complex (1)
|
||||
[ ERROR ] lambda_2/Complex
|
||||
[ ERROR ] IFFT2D (1)
|
||||
[ ERROR ] lambda_2/IFFT2D
|
||||
[ ERROR ] ComplexAbs (1)
|
||||
[ ERROR ] lambda_2/Abs
|
||||
[ ERROR ] Part of the nodes was not converted to IR. Stopped.
|
||||
where the output will appear similar to:
|
||||
|
||||
```
|
||||
usage: You can use any combination of the following arguments:
|
||||
|
||||
Arguments to configure extension generation in the interactive mode:
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--mo-caffe-ext generate a Model Optimizer Caffe* extractor
|
||||
--mo-mxnet-ext generate a Model Optimizer MXNet* extractor
|
||||
--mo-tf-ext generate a Model Optimizer TensorFlow* extractor
|
||||
--mo-op generate a Model Optimizer operation
|
||||
--ie-cpu-ext generate an Inference Engine CPU extension
|
||||
--ie-gpu-ext generate an Inference Engine GPU extension
|
||||
--output_dir OUTPUT_DIR
|
||||
set an output directory. If not specified, the current
|
||||
directory is used by default.
|
||||
```
|
||||
|
||||
The error means that the Model Optimizer doesn't know how to handle 3 types of TensorFlow\* operations: "Complex",
|
||||
"IFFT2D" and "ComplexAbs". In order to see more details about the conversion process run the model conversion with
|
||||
additional parameter `--log_level DEBUG`. It is worth to mention the following lines from the detailed output:
|
||||
The available command-line arguments are used to specify which extension(s) to generate templates for the Model Optimizer or Inference Engine. The generated extension files for each argument will appear starting from the top of the output directory as follows:
|
||||
|
||||
```bash
|
||||
[ INFO ] Called "tf_native_tf_node_infer" for node "lambda_2/Complex"
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ tf:228 ] Added placeholder with name 'lambda_2/lambda_3/strided_slice_port_0_ie_placeholder'
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ tf:228 ] Added placeholder with name 'lambda_2/lambda_4/strided_slice_port_0_ie_placeholder'
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ tf:241 ] update_input_in_pbs: replace input 'lambda_2/lambda_3/strided_slice' with input 'lambda_2/lambda_3/strided_slice_port_0_ie_placeholder'
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ tf:249 ] Replacing input '0' of the node 'lambda_2/Complex' with placeholder 'lambda_2/lambda_3/strided_slice_port_0_ie_placeholder'
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ tf:241 ] update_input_in_pbs: replace input 'lambda_2/lambda_4/strided_slice' with input 'lambda_2/lambda_4/strided_slice_port_0_ie_placeholder'
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ tf:249 ] Replacing input '1' of the node 'lambda_2/Complex' with placeholder 'lambda_2/lambda_4/strided_slice_port_0_ie_placeholder'
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ tf:148 ] Inferred shape of the output tensor with index '0' of the node 'lambda_2/Complex': '[ 1 256 256]'
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ infer:145 ] Outputs:
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ infer:32 ] output[0]: shape = [ 1 256 256], value = <UNKNOWN>
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ infer:129 ] --------------------
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ infer:130 ] Partial infer for lambda_2/IFFT2D
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ infer:131 ] Op: IFFT2D
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ infer:132 ] Inputs:
|
||||
[ <TIMESTAMP> ] [ DEBUG ] [ infer:32 ] input[0]: shape = [ 1 256 256], value = <UNKNOWN>
|
||||
```
|
||||
Command-line Argument | Output Directory Location |
|
||||
--------------------- | ------------------------------ |
|
||||
`--mo-caffe-ext` | user_mo_extensions/front/caffe |
|
||||
`--mo-mxnet-ext` | user_mo_extensions/front/mxnet |
|
||||
`--mo-tf-ext` | user_mo_extensions/front/tf |
|
||||
`--mo-op` | user_mo_extensions/ops |
|
||||
`--ie-cpu-ext` | user_ie_extensions/cpu |
|
||||
`--ie-gpu-ext` | user_ie_extensions/gpu |
|
||||
|
||||
This is a part of the log of the partial inference phase of the model conversion. See the "Partial Inference" section on
|
||||
the [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for
|
||||
more information about this phase. Model Optimizer inferred output shape for the unknown operation of type "Complex"
|
||||
using a "fallback" to TensorFlow\*. However, it is not enough to generate the IR because Model Optimizer doesn't know
|
||||
which attributes of the operation should be saved to IR. So it is necessary to implement Model Optimizer extensions to
|
||||
support these operations.
|
||||
### Extension Workflow
|
||||
|
||||
Before going into the extension development it is necessary to understand what these unsupported operations do according
|
||||
to the TensorFlow\* framework specification.
|
||||
The workflow for each generated extension follows the same basic steps:
|
||||
|
||||
* "Complex" - returns a tensor of complex type constructed from two real input tensors specifying real and imaginary
|
||||
part of a complex number.
|
||||
* "IFFT2D" - returns a tensor with inverse 2-dimensional discrete Fourier transform over the inner-most 2 dimensions of
|
||||
an input.
|
||||
* "ComplexAbs" - returns a tensor with absolute values of input tensor with complex numbers.
|
||||

|
||||
|
||||
The part of the model with all three unsupported operations is depicted below:
|
||||
**Step 1: Generate:** Use the Model Extension Generator to generate the Custom Layer Template Files.
|
||||
|
||||

|
||||
**Step 2: Edit:** Edit the Custom Layer Template Files as necessary to create the specialized Custom Layer Extension Source Code.
|
||||
|
||||
This model uses complex numbers during the inference but Inference Engine does not support tensors of this data type. So
|
||||
it is necessary to find a way how to avoid using tensors of such a type in the model. Fortunately, the complex tensor
|
||||
appear as a result of "Complex" operation, is used as input in the "IFFT2D" operation then is passed to "ComplexAbs"
|
||||
which produces real value tensor as output. So there are just 3 operations consuming/producing complex tensors in the
|
||||
model.
|
||||
**Step 3: Specify:** Specify the custom layer extension locations to be used by the Model Optimizer or Inference Engine.
|
||||
|
||||
Let's design an OpenVINO operation "FFT" which get a single real number tensor describing the complex number and
|
||||
produces a single real number tensor describing output complex tensor. This way the fact that the model uses complex
|
||||
numbers is hidden inside the "FFT" operation implementation. The operation gets a tensor of shape `[N, H, W, 2]` and
|
||||
produces the output tensor with the same shape, where the innermost dimension contains pairs of real numbers describing
|
||||
the complex number (its real and imaginary part). As we will see further this operation will allow us to support the
|
||||
model. The implementation of the Model Optimizer operation should be saved to `mo_extensions/ops/FFT.py` file:
|
||||
## Caffe\* Models with Custom Layers <a name="caffe-models-with-custom-layers"></a>
|
||||
|
||||
@snippet FFT.py fft:operation
|
||||
If your Caffe\* model has custom layers:
|
||||
|
||||
The attribute `inverse` is a flag specifying type of the FFT to apply: forward or inverse.
|
||||
**Register the custom layers as extensions to the Model Optimizer**. For instructions, see [Extending Model Optimizer with New Primitives](../MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md). When your custom layers are registered as extensions, the Model Optimizer generates a valid and optimized Intermediate Representation. You will need a bit of Python\* code that lets the Model Optimizer;
|
||||
|
||||
See the "Model Optimizer Operation" section on the
|
||||
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for the
|
||||
detailed instruction on how to implement the operation.
|
||||
- Generate a valid Intermediate Representation according to the rules you specified.
|
||||
- Be independent from the availability of Caffe on your computer.
|
||||
|
||||
If your model contains Custom Layers, it is important to understand the internal workflow of the Model Optimizer. Consider the following example.
|
||||
|
||||
Now it is necessary to implement extractor for the "IFFT2D" operation according to the
|
||||
"Operation Extractor" section on the
|
||||
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md). The
|
||||
following snippet provides two extractors: one for "IFFT2D", another one for "FFT2D", however only on of them is used
|
||||
in this example. The implementation should be saved to the file `mo_extensions/front/tf/FFT_ext.py`.
|
||||
**Example**:
|
||||
|
||||
@snippet FFT_ext.py fft_ext:extractor
|
||||
The network has:
|
||||
|
||||
> **NOTE:** The graph is in inconsistent state after extracting node attributes because according to original operation
|
||||
> "IFFT2D" semantic it should have an input consuming a tensor of complex numbers, but the extractor instantiated an
|
||||
> operation "FFT" which expects a real tensor with specific layout. But the inconsistency will be resolved during
|
||||
> applying front phase transformations discussed below.
|
||||
* One input layer (#1)
|
||||
* One output Layer (#5)
|
||||
* Three internal layers (#2, 3, 4)
|
||||
|
||||
The output shape of the operation "AddV2" from the picture above is `[N, H, W, 2]`. Where the innermost dimension
|
||||
contains pairs of real numbers describing the complex number (its real and imaginary part). The following "StridedSlice"
|
||||
operations split the input tensor into 2 parts to get a tensor of real and a tensor of imaginary parts which are then
|
||||
consumed with the "Complex" operation to produce a tensor of complex numbers. These "StridedSlice" and "Complex"
|
||||
operations can be removed so the "FFT" operation will get a real value tensor encoding complex numbers. To achieve this
|
||||
we implement the front phase transformation which searches for a pattern of two "StridedSlice" operations with specific
|
||||
attributes producing data to "Complex" operation and removes it from the graph. Refer to the
|
||||
"Pattern-Defined Front Phase Transformations" section on the
|
||||
[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for more
|
||||
information on how this type of transformation works. The code snippet should be saved to the file
|
||||
`mo_extensions/front/tf/Complex.py`.
|
||||
The custom and standard layer types are:
|
||||
|
||||
@snippet Complex.py complex:transformation
|
||||
* Layers #2 and #5 are implemented as Model Optimizer extensions.
|
||||
* Layers #1 and #4 are supported in Model Optimizer out-of-the box.
|
||||
* Layer #3 is neither in the list of supported layers nor in extensions, but is specified in CustomLayersMapping.xml.
|
||||
|
||||
> **NOTE:** The graph is in inconsistent state because the "ComplexAbs" operation consumes complex value tensor but
|
||||
> "FFT" produces real value tensor.
|
||||
> **NOTE**: If any of the layers are not in one of three categories described above, the Model Optimizer fails with an appropriate message and a link to the corresponding question in [Model Optimizer FAQ](../MO_DG/prepare_model/Model_Optimizer_FAQ.md).
|
||||
|
||||
Now lets implement a transformation which replace a "ComplexAbs" operation with a sub-graph of primitive operations
|
||||
which calculate the result using the following formulae: \f$module(z) = \sqrt{real(z) \cdot real(z) + imag(z) \cdot imag(z)}\f$.
|
||||
Original "IFFT2D" operation produces tensor of complex values, but the "FFT" operation produces a real value tensor with
|
||||
the same format and shape as the input for the operation. So the input shape for the "ComplexAbs" will be `[N, H, W, 2]`
|
||||
with the innermost dimension containing tuple with real and imaginary part of a complex number. In order to calculate
|
||||
absolute values for the complex tensor we do the following:
|
||||
1. Raise all elements in the power of 2.
|
||||
2. Calculate a reduced sum over the innermost dimension.
|
||||
3. Calculate a square root.
|
||||
The general process is as shown:
|
||||
|
||||
The implementation should be saved to the file `mo_extensions/front/tf/ComplexAbs.py` and provided below:
|
||||

|
||||
<br>
|
||||
|
||||
@snippet ComplexAbs.py complex_abs:transformation
|
||||
**Step 1:** The example model is fed to the Model Optimizer that **loads the model** with the special parser built on top of the `caffe.proto` file. In case of failure, the Model Optimizer asks you to prepare the parser that can read the model. For more information, refer to the Model Optimizer, <a href="MO_FAQ.html#FAQ1">FAQ #1</a>.
|
||||
|
||||
Now it is possible to convert the model using the following command line:
|
||||
```bash
|
||||
./<MO_INSTALL_DIR>/mo.py --input_model <PATH_TO_MODEL>/wnet_20.pb -b 1 --extensions mo_extensions/
|
||||
```
|
||||
**Step 2:** The Model Optimizer **extracts the attributes of all layers** by going through the list of layers and attempting to find the appropriate extractor. In order of priority, the Model Optimizer checks if the layer is:
|
||||
|
||||
* A. Registered as a Model Optimizer extension
|
||||
* B. Registered as a standard Model Optimizer layer
|
||||
|
||||
When the Model Optimizer finds a satisfying condition from the list above, it extracts the attributes according to the following rules:
|
||||
|
||||
* For A. - takes only the parameters specified in the extension
|
||||
* For B. - takes only the parameters specified in the standard extractor
|
||||
<br>
|
||||
|
||||
The sub-graph corresponding to the originally non-supported one is depicted on the image below:
|
||||
**Step 3:** The Model Optimizer **calculates the output shape of all layers**. The logic is the same as it is for the priorities. **Important:** the Model Optimizer always takes the first available option.
|
||||
|
||||

|
||||
**Step 4:** The Model Optimizer **optimizes the original model and produces the two Intermediate Representation (IR) files in .xml and .bin**.
|
||||
<br>
|
||||
|
||||
> **NOTE:** Model Optimizer performed conversion of the model from NHWC to NCHW layout that is why the dimension with
|
||||
> the value 2 moved to another position.
|
||||
## TensorFlow\* Models with Custom Layers <a name="Tensorflow-models-with-custom-layers"></a>
|
||||
|
||||
### Inference Engine Extension Implementation
|
||||
Now it is necessary to implement the extension for the CPU plugin with operation "FFT" introduced previously. The code
|
||||
below is based on the template extension described on the
|
||||
[Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md).
|
||||
You have two options for TensorFlow\* models with custom layers:
|
||||
<br>
|
||||
|
||||
#### CMake Build File
|
||||
The first step is to create a CMake configuration file which builds the extension. The content of the "CMakeLists.txt"
|
||||
file is the following:
|
||||
* **Register those layers as extensions to the Model Optimizer.** In this case, the Model Optimizer generates a valid and optimized Intermediate Representation.
|
||||
* **If you have sub-graphs that should not be expressed with the analogous sub-graph in the Intermediate Representation, but another sub-graph should appear in the model, the Model Optimizer provides such an option.** This feature is helpful for many TensorFlow models. To read more, see [Sub-graph Replacement in the Model Optimizer](../MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md).
|
||||
|
||||
## MXNet\* Models with Custom Layers <a name="mxnet-models-with-custom-layers"></a>
|
||||
|
||||
@snippet ie_cpu_extension/CMakeLists.txt fft_cmake_list:cmake
|
||||
There are two options to convert your MXNet* model that contains custom layers:
|
||||
|
||||
The CPU FFT kernel implementation uses OpenCV to perform the FFT that is why the extension library is linked with
|
||||
"opencv_core" which comes with the OpenVINO.
|
||||
1. Register the custom layers as extensions to the Model Optimizer. For instructions, see [Extending MXNet Model Optimizer with New Primitives](../MO_DG/prepare_model/customize_model_optimizer/Extending_MXNet_Model_Optimizer_with_New_Primitives.md). When your custom layers are registered as extensions, the Model Optimizer generates a valid and optimized Intermediate Representation. You can create Model Optimizer extensions for both MXNet layers with op `Custom` and layers which are not standard MXNet layers.
|
||||
|
||||
#### Custom nGraph Operation "FFT" Implementation
|
||||
The next step is to create the nGraph operation FFT. The header file "fft_op.hpp" has the following content:
|
||||
2. If you have sub-graphs that should not be expressed with the analogous sub-graph in the Intermediate Representation, but another sub-graph should appear in the model, the Model Optimizer provides such an option. In MXNet the function is actively used for ssd models provides an opportunity to for the necessary subgraph sequences and replace them. To read more, see [Sub-graph Replacement in the Model Optimizer](../MO_DG/prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md).
|
||||
|
||||
@snippet fft_op.hpp fft_op:header
|
||||
## Kaldi\* Models with Custom Layers <a name="Kaldi-models-with-custom-layers"></a>
|
||||
For information on converting your Kaldi* model containing custom layers see [Converting a Kaldi Model in the Model Optimizer Developer Guide](../MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md).
|
||||
|
||||
The operation has just one boolean attribute `inverse`. Implementation of the necessary nGraph operation functions are
|
||||
in the "fft_op.cpp" file with the following content:
|
||||
## ONNX\* Models with Custom Layers <a name="ONNX-models-with-custom-layers"></a>
|
||||
For information on converting your ONNX* model containing custom layers see [Converting an ONNX Model in the Model Optimizer Developer Guide](../MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md).
|
||||
|
||||
@snippet fft_op.cpp fft_op:implementation
|
||||
|
||||
Refer to the [Custom nGraph Operation](../IE_DG/Extensibility_DG/AddingNGraphOps.md) for more details.
|
||||
|
||||
#### CPU FFT Kernel Implementation
|
||||
The operation implementation for CPU plugin uses OpenCV to perform the FFT. The header file "fft_kernel.hpp" has the
|
||||
following content:
|
||||
|
||||
@snippet fft_kernel.hpp fft_kernel:header
|
||||
|
||||
The "fft_kernel.cpp" with the implementation of the CPU has the following content:
|
||||
|
||||
@snippet fft_kernel.cpp fft_kernel:implementation
|
||||
|
||||
Refer to the [How to Implement Custom CPU Operations](../IE_DG/Extensibility_DG/CPU_Kernel.md) for more details.
|
||||
|
||||
### Building and Running the Custom Extension
|
||||
In order to build the extension run the following:<br>
|
||||
```bash
|
||||
mkdir build && cd build
|
||||
source /opt/intel/openvino/bin/setupvars.sh
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release
|
||||
make --jobs=$(nproc)
|
||||
```
|
||||
|
||||
The result of this command is a compiled shared library (`.so`, `.dylib` or `.dll`). It should be loaded in the
|
||||
application using `Core` class instance method `AddExtension` like this
|
||||
`core.AddExtension(make_so_pointer<IExtension>(compiled_library_file_name), "CPU");`.
|
||||
|
||||
To test that the extension is implemented correctly we can run the [Benchmark App](../../inference-engine/tools/benchmark_tool/README.md)
|
||||
the following way:
|
||||
```bash
|
||||
python3 $INTEL_OPENVINO_DIR/deployment_tools/tools/benchmark_tool/benchmark_app.py \
|
||||
-m <PATH_TO_IR>/wnet_20.xml \
|
||||
-l <PATH_TO_BUILD_DIR>/libfft_cpu_extension.so \
|
||||
-d CPU
|
||||
```
|
||||
## Step-by-Step Custom Layers Tutorial
|
||||
For a step-by-step walk-through creating and executing a custom layer, see [Custom Layer Implementation Tutorial for Linux and Windows.](https://github.com/david-drew/OpenVINO-Custom-Layers/tree/master/2019.r2.0)
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit)
|
||||
- OpenVINO™ toolkit online documentation: [https://docs.openvinotoolkit.org](https://docs.openvinotoolkit.org)
|
||||
- [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
- [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md)
|
||||
- [Inference Engine Extensibility Mechanism](../IE_DG/Extensibility_DG/Intro.md)
|
||||
- [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md)
|
||||
- [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_intel_index)
|
||||
@@ -366,7 +204,9 @@ python3 $INTEL_OPENVINO_DIR/deployment_tools/tools/benchmark_tool/benchmark_app.
|
||||
## Converting Models:
|
||||
|
||||
- [Convert Your Caffe* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md)
|
||||
- [Convert Your Kaldi* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md)
|
||||
- [Convert Your TensorFlow* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md)
|
||||
- [Convert Your MXNet* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md)
|
||||
- [Convert Your ONNX* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md)
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018-2019 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# ===============================================================================
|
||||
# Generated file for building library with user generated CPU extensions
|
||||
#
|
||||
# Contains implementation of the basic layer methods
|
||||
#
|
||||
# Refer to the section "Adding Your Own Kernels to the Inference Engine" in
|
||||
# OpenVINO* documentation (either online or offline in
|
||||
# <INSTALL_DIR>/deployment_tools/documentation/docs/index.html an then navigate
|
||||
# to the corresponding section).
|
||||
# ===============================================================================
|
||||
|
||||
# [fft_cmake_list:cmake]
|
||||
set(CPU_EXTENSIONS_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE INTERNAL "")
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
find_package(ngraph REQUIRED OPTIONAL_COMPONENTS onnx_importer)
|
||||
find_package(InferenceEngine REQUIRED)
|
||||
find_package(OpenCV REQUIRED COMPONENTS core)
|
||||
|
||||
set(TARGET_NAME fft_cpu_extension)
|
||||
|
||||
file(GLOB SRC ${CPU_EXTENSIONS_BASE_DIR}/*.cpp)
|
||||
|
||||
add_library(${TARGET_NAME} SHARED ${SRC})
|
||||
|
||||
target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_INFERENCE_EXTENSION_API)
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE ${InferenceEngine_LIBRARIES}
|
||||
${NGRAPH_LIBRARIES}
|
||||
opencv_core)
|
||||
# [fft_cmake_list:cmake]
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// source: https://github.com/openvinotoolkit/openvino/tree/master/docs/template_extension
|
||||
|
||||
//! [fft_extension:implementation]
|
||||
#include "extension.hpp"
|
||||
#include "fft_kernel.hpp"
|
||||
#include "fft_op.hpp"
|
||||
#include <ngraph/factory.hpp>
|
||||
#include <ngraph/opsets/opset.hpp>
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
using namespace FFTExtension;
|
||||
|
||||
void Extension::GetVersion(const InferenceEngine::Version *&versionInfo) const noexcept {
|
||||
static InferenceEngine::Version ExtensionDescription = {
|
||||
{1, 0}, // extension API version
|
||||
"1.0",
|
||||
"The CPU plugin extension with FFT operation" // extension description message
|
||||
};
|
||||
|
||||
versionInfo = &ExtensionDescription;
|
||||
}
|
||||
|
||||
std::map<std::string, ngraph::OpSet> Extension::getOpSets() {
|
||||
std::map<std::string, ngraph::OpSet> opsets;
|
||||
ngraph::OpSet opset;
|
||||
opset.insert<FFTOp>();
|
||||
opsets["fft_extension"] = opset;
|
||||
return opsets;
|
||||
}
|
||||
|
||||
std::vector<std::string> Extension::getImplTypes(const std::shared_ptr<ngraph::Node> &node) {
|
||||
if (std::dynamic_pointer_cast<FFTOp>(node)) {
|
||||
return {"CPU"};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ptr<ngraph::Node> &node, const std::string &implType) {
|
||||
if (std::dynamic_pointer_cast<FFTOp>(node) && implType == "CPU") {
|
||||
return std::make_shared<FFTImpl>(node);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
INFERENCE_EXTENSION_API(InferenceEngine::StatusCode) InferenceEngine::CreateExtension(InferenceEngine::IExtension *&ext,
|
||||
InferenceEngine::ResponseDesc *resp) noexcept {
|
||||
try {
|
||||
ext = new Extension();
|
||||
return OK;
|
||||
} catch (std::exception &ex) {
|
||||
if (resp) {
|
||||
std::string err = ((std::string) "Couldn't create extension: ") + ex.what();
|
||||
err.copy(resp->msg, 255);
|
||||
}
|
||||
return InferenceEngine::GENERAL_ERROR;
|
||||
}
|
||||
}
|
||||
//! [fft_extension:implementation]
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// source: https://github.com/openvinotoolkit/openvino/tree/master/docs/template_extension
|
||||
|
||||
//! [fft_extension:header]
|
||||
#pragma once
|
||||
|
||||
#include <ie_iextension.h>
|
||||
#include <ie_api.h>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
namespace FFTExtension {
|
||||
|
||||
class Extension : public InferenceEngine::IExtension {
|
||||
public:
|
||||
Extension() = default;
|
||||
void GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept override;
|
||||
void Unload() noexcept override {}
|
||||
void Release() noexcept override { delete this; }
|
||||
|
||||
std::map<std::string, ngraph::OpSet> getOpSets() override;
|
||||
std::vector<std::string> getImplTypes(const std::shared_ptr<ngraph::Node>& node) override;
|
||||
InferenceEngine::ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) override;
|
||||
};
|
||||
|
||||
}
|
||||
//! [fft_extension:header]
|
||||
@@ -1,119 +0,0 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
//! [fft_kernel:implementation]
|
||||
#include "fft_kernel.hpp"
|
||||
#include "fft_op.hpp"
|
||||
#include <details/ie_exception.hpp>
|
||||
#include <ie_layouts.h>
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
|
||||
using namespace FFTExtension;
|
||||
|
||||
FFTImpl::FFTImpl(const std::shared_ptr<ngraph::Node> &node) {
|
||||
auto castedNode = std::dynamic_pointer_cast<FFTOp>(node);
|
||||
if (!castedNode)
|
||||
THROW_IE_EXCEPTION << "Cannot create implementation for unknown operation!";
|
||||
if (castedNode->inputs().size() != 1 || castedNode->outputs().size() != 1)
|
||||
THROW_IE_EXCEPTION << "Cannot create implementation for operation with incorrect number of inputs or outputs!";
|
||||
if (castedNode->get_input_partial_shape(0).is_dynamic() || castedNode->get_output_partial_shape(0).is_dynamic())
|
||||
THROW_IE_EXCEPTION << "Cannot create implementation for op with dynamic shapes!";
|
||||
if (castedNode->get_input_element_type(0) != ngraph::element::f32 || castedNode->get_output_element_type(0) != ngraph::element::f32)
|
||||
THROW_IE_EXCEPTION << "Operation supports only FP32 tensors.";
|
||||
inpShape = castedNode->get_input_shape(0);
|
||||
outShape = castedNode->get_output_shape(0);
|
||||
inverse = castedNode->inverse;
|
||||
}
|
||||
|
||||
InferenceEngine::StatusCode FFTImpl::getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig> &conf,
|
||||
InferenceEngine::ResponseDesc *resp) noexcept {
|
||||
std::vector<InferenceEngine::DataConfig> inDataConfig;
|
||||
std::vector<InferenceEngine::DataConfig> outDataConfig;
|
||||
InferenceEngine::SizeVector order(inpShape.size());
|
||||
std::iota(order.begin(), order.end(), 0);
|
||||
|
||||
// Allow any offset before data
|
||||
size_t offset((std::numeric_limits<size_t>::max)());
|
||||
|
||||
// Input shape
|
||||
InferenceEngine::DataConfig inpConf;
|
||||
inpConf.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, inpShape, {inpShape, order, offset});
|
||||
inDataConfig.push_back(inpConf);
|
||||
|
||||
// Output shape
|
||||
InferenceEngine::DataConfig outConf;
|
||||
outConf.desc = InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, outShape, {outShape, order, offset});
|
||||
outDataConfig.push_back(outConf);
|
||||
|
||||
InferenceEngine::LayerConfig layerConfig;
|
||||
layerConfig.inConfs = inDataConfig;
|
||||
layerConfig.outConfs = outDataConfig;
|
||||
|
||||
conf.push_back(layerConfig);
|
||||
return InferenceEngine::StatusCode::OK;
|
||||
}
|
||||
|
||||
InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig &config, InferenceEngine::ResponseDesc *resp) noexcept {
|
||||
try {
|
||||
if (config.inConfs.size() != 1 || config.outConfs.size() != 1) {
|
||||
THROW_IE_EXCEPTION << "Operation cannot be initialized with incorrect number of inputs/outputs!";
|
||||
}
|
||||
|
||||
if (config.outConfs[0].desc.getPrecision() != InferenceEngine::Precision::FP32 ||
|
||||
config.inConfs[0].desc.getPrecision() != InferenceEngine::Precision::FP32) {
|
||||
THROW_IE_EXCEPTION << "Operation supports only FP32 precisions!";
|
||||
}
|
||||
} catch (InferenceEngine::details::InferenceEngineException& ex) {
|
||||
if (resp) {
|
||||
strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1);
|
||||
resp->msg[sizeof(resp->msg)-1] = 0;
|
||||
}
|
||||
return InferenceEngine::GENERAL_ERROR;
|
||||
}
|
||||
return InferenceEngine::OK;
|
||||
}
|
||||
|
||||
static cv::Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
|
||||
{
|
||||
// NOTE: Inference Engine sizes are reversed.
|
||||
std::vector<size_t> dims = blob->getTensorDesc().getDims();
|
||||
std::vector<int> size(dims.begin(), dims.end());
|
||||
auto precision = blob->getTensorDesc().getPrecision();
|
||||
CV_Assert(precision == InferenceEngine::Precision::FP32);
|
||||
return cv::Mat(size, CV_32F, (void*)blob->buffer());
|
||||
}
|
||||
|
||||
InferenceEngine::StatusCode FFTImpl::execute(std::vector<InferenceEngine::Blob::Ptr> &inputs,
|
||||
std::vector<InferenceEngine::Blob::Ptr> &outputs,
|
||||
InferenceEngine::ResponseDesc *resp) noexcept {
|
||||
cv::Mat inp = infEngineBlobToMat(inputs[0]);
|
||||
cv::Mat out = infEngineBlobToMat(outputs[0]);
|
||||
|
||||
const int n = inp.size[0];
|
||||
const int h = inp.size[2];
|
||||
const int w = inp.size[3];
|
||||
cv::Mat complex(h, w, CV_32FC2), interleavedOut(h, w, CV_32FC2);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
std::vector<cv::Mat> components = {
|
||||
cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 0)),
|
||||
cv::Mat(h, w, CV_32F, inp.ptr<float>(i, 1))
|
||||
};
|
||||
cv::merge(components, complex);
|
||||
|
||||
if (!inverse)
|
||||
cv::dft(complex, interleavedOut);
|
||||
else
|
||||
cv::idft(complex, interleavedOut, cv::DFT_SCALE);
|
||||
|
||||
components = {
|
||||
cv::Mat(h, w, CV_32F, out.ptr<float>(i, 0)),
|
||||
cv::Mat(h, w, CV_32F, out.ptr<float>(i, 1))
|
||||
};
|
||||
cv::split(interleavedOut, components);
|
||||
}
|
||||
return InferenceEngine::OK;
|
||||
}
|
||||
//! [fft_kernel:implementation]
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// source: https://github.com/openvinotoolkit/openvino/tree/master/docs/template_extension
|
||||
|
||||
//! [fft_kernel:header]
|
||||
#pragma once
|
||||
|
||||
#include <ie_iextension.h>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
|
||||
namespace FFTExtension {
|
||||
|
||||
class FFTImpl : public InferenceEngine::ILayerExecImpl {
|
||||
public:
|
||||
explicit FFTImpl(const std::shared_ptr<ngraph::Node>& node);
|
||||
InferenceEngine::StatusCode getSupportedConfigurations(std::vector<InferenceEngine::LayerConfig> &conf,
|
||||
InferenceEngine::ResponseDesc *resp) noexcept override;
|
||||
InferenceEngine::StatusCode init(InferenceEngine::LayerConfig &config,
|
||||
InferenceEngine::ResponseDesc *resp) noexcept override;
|
||||
InferenceEngine::StatusCode execute(std::vector<InferenceEngine::Blob::Ptr> &inputs,
|
||||
std::vector<InferenceEngine::Blob::Ptr> &outputs,
|
||||
InferenceEngine::ResponseDesc *resp) noexcept override;
|
||||
private:
|
||||
ngraph::Shape inpShape;
|
||||
ngraph::Shape outShape;
|
||||
bool inverse;
|
||||
std::string error;
|
||||
};
|
||||
|
||||
}
|
||||
//! [fft_kernel:header]
|
||||
@@ -1,34 +0,0 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
//! [fft_op:implementation]
|
||||
#include "fft_op.hpp"
|
||||
|
||||
using namespace FFTExtension;
|
||||
|
||||
constexpr ngraph::NodeTypeInfo FFTOp::type_info;
|
||||
|
||||
FFTOp::FFTOp(const ngraph::Output<ngraph::Node>& inp, bool _inverse) : Op({inp}) {
|
||||
constructor_validate_and_infer_types();
|
||||
inverse = _inverse;
|
||||
}
|
||||
|
||||
void FFTOp::validate_and_infer_types() {
|
||||
auto outShape = get_input_partial_shape(0);
|
||||
set_output_type(0, get_input_element_type(0), outShape);
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> FFTOp::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
|
||||
if (new_args.size() != 1) {
|
||||
throw ngraph::ngraph_error("Incorrect number of new arguments");
|
||||
}
|
||||
return std::make_shared<FFTOp>(new_args.at(0), inverse);
|
||||
}
|
||||
|
||||
bool FFTOp::visit_attributes(ngraph::AttributeVisitor &visitor) {
|
||||
visitor.on_attribute("inverse", inverse);
|
||||
return true;
|
||||
}
|
||||
//! [fft_op:implementation]
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
//! [fft_op:header]
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
|
||||
namespace FFTExtension {
|
||||
|
||||
class FFTOp : public ngraph::op::Op {
|
||||
public:
|
||||
static constexpr ngraph::NodeTypeInfo type_info{"FFT", 0};
|
||||
const ngraph::NodeTypeInfo& get_type_info() const override { return type_info; }
|
||||
|
||||
FFTOp() = default;
|
||||
FFTOp(const ngraph::Output<ngraph::Node>& inp, bool inverse);
|
||||
void validate_and_infer_types() override;
|
||||
std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override;
|
||||
bool visit_attributes(ngraph::AttributeVisitor& visitor) override;
|
||||
|
||||
bool inverse;
|
||||
};
|
||||
|
||||
}
|
||||
//! [fft_op:header]
|
||||
|
||||
3
docs/HOWTO/img/IE_extensions_flow.png
Normal file
3
docs/HOWTO/img/IE_extensions_flow.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c2f362a39ae6c2af080e4f055b6fdba4954f918f85731545d1df3d687d9213d5
|
||||
size 421056
|
||||
3
docs/HOWTO/img/MEG_generic_flow.png
Normal file
3
docs/HOWTO/img/MEG_generic_flow.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cb5c700d003936779455353bfa4ed9432410c0975c46e2dfd30c6a1abccd1727
|
||||
size 23320
|
||||
3
docs/HOWTO/img/MO_extensions_flow.png
Normal file
3
docs/HOWTO/img/MO_extensions_flow.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:99d6b5146be85fa408dc5432883c3e2745cffe890133854a97dcf22f5c5962d4
|
||||
size 47564
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f7c8ab4f15874d235968471bcf876c89c795d601e69891208107b8b72aa58eb1
|
||||
size 70014
|
||||
3
docs/HOWTO/img/mo_caffe_priorities.png
Normal file
3
docs/HOWTO/img/mo_caffe_priorities.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0a4de6e502cae7542f1f311bcdbea6bb145f960f0d27d86a03160d1a60133778
|
||||
size 301310
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3d5ccf51fe1babb93d96d042494695a6a6e055d1f8ebf7eef5083d54d8987a23
|
||||
size 58789
|
||||
@@ -1,57 +0,0 @@
|
||||
"""
|
||||
Copyright (C) 2018-2020 Intel Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
#! [complex:transformation]
|
||||
import logging as log
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mo.front.common.replacement import FrontReplacementSubgraph
|
||||
from mo.graph.graph import Graph
|
||||
|
||||
|
||||
class Complex(FrontReplacementSubgraph):
|
||||
enabled = True
|
||||
|
||||
def pattern(self):
|
||||
return dict(
|
||||
nodes=[
|
||||
('strided_slice_real', dict(op='StridedSlice')),
|
||||
('strided_slice_imag', dict(op='StridedSlice')),
|
||||
('complex', dict(op='Complex')),
|
||||
],
|
||||
edges=[
|
||||
('strided_slice_real', 'complex', {'in': 0}),
|
||||
('strided_slice_imag', 'complex', {'in': 1}),
|
||||
])
|
||||
|
||||
@staticmethod
|
||||
def replace_sub_graph(graph: Graph, match: dict):
|
||||
strided_slice_real = match['strided_slice_real']
|
||||
strided_slice_imag = match['strided_slice_imag']
|
||||
complex_node = match['complex']
|
||||
|
||||
# make sure that both strided slice operations get the same data as input
|
||||
assert strided_slice_real.in_port(0).get_source() == strided_slice_imag.in_port(0).get_source()
|
||||
|
||||
# identify the output port of the operation producing datat for strided slice nodes
|
||||
input_node_output_port = strided_slice_real.in_port(0).get_source()
|
||||
input_node_output_port.disconnect()
|
||||
|
||||
# change the connection so now all consumers of "complex_node" get data from input node of strided slice nodes
|
||||
complex_node.out_port(0).get_connection().set_source(input_node_output_port)
|
||||
#! [complex:transformation]
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
"""
|
||||
Copyright (C) 2018-2020 Intel Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
#! [complex_abs:transformation]
|
||||
import numpy as np
|
||||
|
||||
from extensions.ops.elementwise import Pow
|
||||
from extensions.ops.ReduceOps import ReduceSum
|
||||
from mo.front.common.replacement import FrontReplacementOp
|
||||
from mo.graph.graph import Graph, Node
|
||||
from mo.ops.const import Const
|
||||
|
||||
|
||||
class ComplexAbs(FrontReplacementOp):
|
||||
op = "ComplexAbs"
|
||||
enabled = True
|
||||
|
||||
def replace_op(self, graph: Graph, node: Node):
|
||||
pow_2 = Const(graph, {'value': np.float32(2.0)}).create_node()
|
||||
reduce_axis = Const(graph, {'value': np.int32(-1)}).create_node()
|
||||
pow_0_5 = Const(graph, {'value': np.float32(0.5)}).create_node()
|
||||
|
||||
sq = Pow(graph, dict(name=node.in_node(0).name + '/sq', power=2.0)).create_node([node.in_node(0), pow_2])
|
||||
sum = ReduceSum(graph, dict(name=sq.name + '/sum')).create_node([sq, reduce_axis])
|
||||
sqrt = Pow(graph, dict(name=sum.name + '/sqrt', power=0.5)).create_node([sum, pow_0_5])
|
||||
return [sqrt.id]
|
||||
#! [complex_abs:transformation]
|
||||
@@ -1,47 +0,0 @@
|
||||
"""
|
||||
Copyright (C) 2018-2020 Intel Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
# ! [fft_ext:extractor]
|
||||
from ...ops.FFT import FFT
|
||||
from mo.front.extractor import FrontExtractorOp
|
||||
from mo.utils.error import Error
|
||||
|
||||
|
||||
class FFT2DFrontExtractor(FrontExtractorOp):
|
||||
op = 'FFT2D'
|
||||
enabled = True
|
||||
|
||||
@classmethod
|
||||
def extract(cls, node):
|
||||
attrs = {
|
||||
'inverse': 0
|
||||
}
|
||||
FFT.update_node_stat(node, attrs)
|
||||
return cls.enabled
|
||||
|
||||
|
||||
class IFFT2DFrontExtractor(FrontExtractorOp):
|
||||
op = 'IFFT2D'
|
||||
enabled = True
|
||||
|
||||
@classmethod
|
||||
def extract(cls, node):
|
||||
attrs = {
|
||||
'inverse': 1
|
||||
}
|
||||
FFT.update_node_stat(node, attrs)
|
||||
return cls.enabled
|
||||
# ! [fft_ext:extractor]
|
||||
@@ -1,40 +0,0 @@
|
||||
"""
|
||||
Copyright (C) 2018-2020 Intel Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
#! [fft:operation]
|
||||
from mo.front.common.partial_infer.elemental import copy_shape_infer
|
||||
from mo.graph.graph import Node, Graph
|
||||
from mo.ops.op import Op
|
||||
|
||||
|
||||
class FFT(Op):
|
||||
op = 'FFT'
|
||||
enabled = False
|
||||
|
||||
def __init__(self, graph: Graph, attrs: dict):
|
||||
super().__init__(graph, {
|
||||
'type': self.op,
|
||||
'op': self.op,
|
||||
'version': 'fft_extension',
|
||||
'inverse': None,
|
||||
'in_ports_count': 1,
|
||||
'out_ports_count': 1,
|
||||
'infer': copy_shape_infer
|
||||
}, attrs)
|
||||
|
||||
def backend_attrs(self):
|
||||
return ['inverse']
|
||||
#! [fft:operation]
|
||||
@@ -1,4 +1,4 @@
|
||||
# Custom nGraph Operation {#openvino_docs_IE_DG_Extensibility_DG_AddingNGraphOps}
|
||||
# Add Custom nGraph Operations {#openvino_docs_IE_DG_Extensibility_DG_AddingNGraphOps}
|
||||
|
||||
Inference Engine Extension API allows to register operation sets (opsets) with custom nGraph operations, it allows to support Networks with unknown operations.
|
||||
|
||||
@@ -71,9 +71,10 @@ nGraph provides opsets mechanism for operation versioning. Different opsets dist
|
||||
|
||||
When specifying opset names, follow the rules below:
|
||||
* Use unique opset names.
|
||||
* Do not use the following built-in opset names: `extension`, `experimental`, `opset1`, `opset2`, `opset3`, ... , `opsetN`.
|
||||
* Do not use the following built-in opset names: `extension`, `experimental`, `opset1`, `opest2`.
|
||||
* Make sure that the Model Optimizer and your extension use the same opset names.
|
||||
* IR v10 operations have the mandatory `version` attribute specifying the opset.
|
||||
* IR v10 layers have the mandatory `version` attribute specifying the opset.
|
||||
* `opset1` is the name of default operations set.
|
||||
Operations from the default opset cannot be redefined.
|
||||
|
||||
Use a custom opset to create a new operation or extend functionality of an existing operation from another opset.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# How to Implement Custom CPU Operations {#openvino_docs_IE_DG_Extensibility_DG_CPU_Kernel}
|
||||
# How to Implement Custom CPU Layers {#openvino_docs_IE_DG_Extensibility_DG_CPU_Kernel}
|
||||
|
||||
The primary vehicle for the performance of the CPU codepath in the Inference Engine is the Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN), and new CPU kernels extend the Inference Engine plugin for the Intel MKL-DNN. Implementing the InferenceEngine::ILayerExecImpl defines a general CPU-side extension. There are no Intel MKL-DNN specifics in the way you need to implement a kernel.
|
||||
|
||||
|
||||
@@ -24,11 +24,11 @@ The `ngraph::onnx_import::Node` class represents a node in ONNX model. It provid
|
||||
New operator registration must happen before the ONNX model is read, for example, if an ONNX model uses the 'CustomRelu' operator, `register_operator("CustomRelu", ...)` must be called before InferenceEngine::Core::ReadNetwork.
|
||||
Re-registering ONNX operators within the same process is supported. During registration of the existing operator, a warning is printed.
|
||||
|
||||
The example below demonstrates an exemplary model that requires previously created 'CustomRelu' operator:
|
||||
The example below demonstrates an examplary model that requires previously created 'CustomRelu' operator:
|
||||
@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:model
|
||||
|
||||
|
||||
For a reference on how to create a graph with nGraph operations, visit [Custom nGraph Operation](AddingNGraphOps.md).
|
||||
For a reference on how to create a graph with nGraph operations, visit [nGraph tutorial](../nGraphTutorial.md).
|
||||
For a complete list of predefined nGraph operators, visit [available operations sets](../../ops/opset.md).
|
||||
|
||||
If operator is no longer needed, it can be unregistered by calling `unregister_operator`. The function takes three arguments `op_type`, `version`, and `domain`.
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
# How to Implement Custom GPU Operations {#openvino_docs_IE_DG_Extensibility_DG_GPU_Kernel}
|
||||
# How to Implement Custom GPU Layers {#openvino_docs_IE_DG_Extensibility_DG_GPU_Kernel}
|
||||
|
||||
The GPU codepath abstracts many details about OpenCL™. You need to provide the kernel code in OpenCL C and the configuration file that connects the kernel and its parameters to the parameters of the operation.
|
||||
The GPU codepath abstracts many details about OpenCL™. You need to provide the kernel code in OpenCL C and the configuration file that connects the kernel and its parameters to the parameters of the layer.
|
||||
|
||||
There are two options of using custom operation configuration file:
|
||||
There are two options of using custom layer configuration file:
|
||||
|
||||
* Include a section with your kernels into the global automatically-loaded `cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml` file, which is hosted in the `<INSTALL_DIR>/deployment_tools/inference_engine/bin/intel64/{Debug/Release}` folder
|
||||
* Call the `InferenceEngine::Core::SetConfig()` method from your application with the `InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE` key and the configuration file name as a value before loading the network that uses custom operations to the plugin:
|
||||
* Call the `InferenceEngine::Core::SetConfig()` method from your application with the `InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE` key and the configuration file name as a value before loading the network that uses custom layers to the plugin:
|
||||
|
||||
@snippet snippets/GPU_Kernel.cpp part0
|
||||
|
||||
All Inference Engine samples, except trivial `hello_classification`,
|
||||
feature a dedicated command-line option `-c` to load custom kernels. For example, to load custom operations for the classification sample, run the command below:
|
||||
feature a dedicated command-line option `-c` to load custom kernels. For example, to load custom layers for the classification sample, run the command below:
|
||||
```sh
|
||||
$ ./classification_sample -m <path_to_model>/bvlc_alexnet_fp16.xml -i ./validation_set/daily/227x227/apron.bmp -d GPU
|
||||
-c <absolute_path_to_config>/custom_layer_example.xml
|
||||
@@ -19,7 +19,7 @@ $ ./classification_sample -m <path_to_model>/bvlc_alexnet_fp16.xml -i ./validati
|
||||
## Configuration File Format <a name="config-file-format"></a>
|
||||
|
||||
The configuration file is expected to follow the `.xml` file structure
|
||||
with a node of the type `CustomLayer` for every custom operation you provide.
|
||||
with a node of the type `CustomLayer` for every custom layer you provide.
|
||||
|
||||
The definitions described in the sections below use the following notations:
|
||||
|
||||
@@ -32,13 +32,14 @@ Notation | Description
|
||||
|
||||
### CustomLayer Node and Sub-node Structure
|
||||
|
||||
`CustomLayer` node contains the entire configuration for a single custom operation.
|
||||
`CustomLayer` node contains the entire configuration for a single custom
|
||||
layer.
|
||||
|
||||
| Attribute Name |\# | Description |
|
||||
|-----|-----|-----|
|
||||
| `name` | (1) | The name of the operation type to be used. This name should be identical to the type used in the IR.|
|
||||
| `type` | (1) | Must be `SimpleGPU`. |
|
||||
| `version` | (1) | Must be `1`. |
|
||||
| `name` | (1) | The name of the layer type to be used. This name should be identical to the type used in the IR.|
|
||||
| `type` | (1) | Must be `SimpleGPU`. |
|
||||
| `version` | (1) | Must be `1`. |
|
||||
|
||||
**Sub-nodes**: `Kernel` (1), `Buffers` (1), `CompilerOptions` (0+),
|
||||
`WorkSizes` (0/1)
|
||||
@@ -68,9 +69,9 @@ the sources during compilation (JIT).
|
||||
| Attribute Name | \# | Description |
|
||||
|------|-------|------|
|
||||
| `name` | (1) | The name of the defined JIT. For static constants, this can include the value as well (taken as a string). |
|
||||
| `param` | (0/1) | This parameter value is used as the value of this JIT definition. |
|
||||
| `param` | (0/1) | This parameter value is used as the value of this JIT definition. |
|
||||
| `type` | (0/1) | The parameter type. Accepted values: `int`, `float`, and `int[]`, `float[]` for arrays. |
|
||||
| `default` | (0/1) | The default value to be used if the specified parameters is missing from the operation in the IR. |
|
||||
| `default` | (0/1) | The default value to be used if the specified parameters is missing from the layer in the IR. |
|
||||
|
||||
**Sub-nodes:** None
|
||||
|
||||
@@ -91,7 +92,7 @@ weights or biases).
|
||||
|
||||
| Attribute Name | \# | Description |
|
||||
|----|-----|------|
|
||||
| `name` | (1) | Name of a blob attached to a operation in the IR |
|
||||
| `name` | (1) | Name of a blob attached to a layer in the IR |
|
||||
| `arg-index` | (1) | 0-based index in the entry function arguments to be bound to |
|
||||
|
||||
**Sub-nodes**: None
|
||||
@@ -104,7 +105,7 @@ weights or biases).
|
||||
|------|-------|-------|
|
||||
| `arg-index` | (1) | 0-based index in the entry function arguments to be bound to. |
|
||||
| `type` | (1) | `input` or `output` |
|
||||
| `port-index` | (1) | 0-based index in the operation input/output ports in the IR |
|
||||
| `port-index` | (1) | 0-based index in the layer’s input/output ports in the IR |
|
||||
| `format` | (0/1) | Data layout declaration for the tensor. Accepted values: `BFYX`, `BYXF`, `YXFB`, `FYXB` (also in all lowercase). Default value: `BFYX` |
|
||||
|
||||
### CompilerOptions Node and Sub-node Structure
|
||||
@@ -177,7 +178,7 @@ For an example, see [Example Kernel](#example-kernel).
|
||||
| `<TENSOR>_PITCHES_SIZE`| The size of the `<TENSOR>_PITCHES` array |
|
||||
| `<TENSOR>_OFFSET`| The number of elements from the start of the tensor to the first valid element (bypassing the lower padding) |
|
||||
All `<TENSOR>` values are automatically defined for every tensor
|
||||
bound to this operation (`INPUT0`, `INPUT1`, `OUTPUT0`, and so on), as shown
|
||||
bound to this layer (`INPUT0`, `INPUT1`, `OUTPUT0`, and so on), as shown
|
||||
in the following for example:
|
||||
|
||||
```sh
|
||||
|
||||
@@ -2,22 +2,19 @@
|
||||
|
||||
Inference Engine Extensibility API allows to add support of custom operations to the Inference Engine.
|
||||
Extension should contain operation sets with custom operations and execution kernels for custom operations.
|
||||
Physically, an extension library can be represented as a dynamic library exporting the single `CreateExtension` function
|
||||
that allows to create a new extension instance.
|
||||
Physically, an extension library can be represented as a dynamic library exporting the single `CreateExtension` function that allows to create a new extension instance.
|
||||
|
||||
Extensibility library can be loaded to the `InferenceEngine::Core` object using the
|
||||
`InferenceEngine::Core::AddExtension` method.
|
||||
Extensibility library can be loaded to the InferenceEngine::Core object using the InferenceEngine::Core::AddExtension method.
|
||||
|
||||
## Inference Engine Extension Library
|
||||
|
||||
Inference Engine Extension dynamic library contains several components:
|
||||
Inference Engine Extension dynamic library contains several main components:
|
||||
|
||||
* [Extension Library](Extension.md):
|
||||
* [Extension class](Extension.md):
|
||||
- Contains custom operation sets
|
||||
- Provides CPU implementations for custom operations
|
||||
* [Custom nGraph Operation](AddingNGraphOps.md):
|
||||
- Allows to use `InferenceEngine::Core::ReadNetwork` to read Intermediate Representation (IR) with unsupported
|
||||
operations
|
||||
* [Custom operations](Intro.md):
|
||||
- Allows to use InferenceEngine::Core::ReadNetwork to read Intermediate Representation (IR) with unsupported operations
|
||||
- Allows to create `ngraph::Function` with unsupported operations
|
||||
- Provides shape inference mechanism for custom operations
|
||||
|
||||
@@ -29,13 +26,13 @@ at `<dldt source tree>/docs/template_extension`.
|
||||
|
||||
The Inference Engine workflow involves the creation of custom kernels and either custom or existing operations.
|
||||
|
||||
An _Operation_ is a network building block implemented in the training framework, for example, `Convolution` in Caffe*.
|
||||
An _Operation_ is a Network building block implemented in the training framework, for example, `Convolution` in Caffe*.
|
||||
A _Kernel_ is defined as the corresponding implementation in the Inference Engine.
|
||||
|
||||
Refer to the [Model Optimizer Extensibility](../../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md)
|
||||
for details on how a mapping between framework operations and Inference Engine kernels is registered.
|
||||
Refer to the [Custom Layers in the Model Optimizer](../../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) section for details on how
|
||||
mapping between framework layers and Inference Engine kernels is registered.
|
||||
|
||||
In short, you can plug your own kernel implementations into the Inference Engine and map them to the operations in the original framework.
|
||||
In short, you can plug your own kernel implementations into the Inference Engine and map them to the layers in the original framework.
|
||||
|
||||
The following pages describe how to integrate custom _kernels_ into the Inference Engine:
|
||||
|
||||
|
||||
@@ -32,8 +32,7 @@ MYRIAD.1.4-ma2480
|
||||
FPGA.0
|
||||
FPGA.1
|
||||
CPU
|
||||
GPU.0
|
||||
GPU.1
|
||||
GPU
|
||||
...
|
||||
```
|
||||
|
||||
|
||||
@@ -35,15 +35,13 @@ Inference Engine sample applications include the following:
|
||||
- [Object Detection for SSD C Sample](../../inference-engine/ie_bridges/c/samples/object_detection_sample_ssd/README.md)
|
||||
- [Object Detection for SSD Python* Sample](../../inference-engine/ie_bridges/python/sample/object_detection_sample_ssd/README.md)
|
||||
|
||||
> **NOTE**: All samples support input paths containing only ASCII characters, except the Hello Classification Sample, that supports Unicode.
|
||||
|
||||
## Media Files Available for Samples
|
||||
|
||||
To run the sample applications, you can use images and videos from the media files collection available at https://github.com/intel-iot-devkit/sample-videos.
|
||||
|
||||
## Samples that Support Pre-Trained Models
|
||||
|
||||
To run the sample, you can use [public](@ref omz_models_public_index) or [Intel's](@ref omz_models_intel_index) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader_README).
|
||||
You can download the [pre-trained models](@ref omz_models_intel_index) using the OpenVINO [Model Downloader](@ref omz_tools_downloader_README) or from [https://download.01.org/opencv/](https://download.01.org/opencv/).
|
||||
|
||||
## Build the Sample Applications
|
||||
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f4f6e9d35869fa2c414e58914aaec1607eb7d4768b69c0cbcce5d5fa3ceddba3
|
||||
size 56444
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:28f4e7ee50785e9c571725942e67c899d08e87af3802f6bea4721c64bfdb2bac
|
||||
size 21722
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0923af3acfb69dd0b88a5edf097e60c2655828b643d8e328561b13b0196c0850
|
||||
size 47997
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9976341ca931f3ab4e4fbccea26844b738adb27b091149a4c6231eda841ab867
|
||||
size 144541
|
||||
@@ -1,275 +0,0 @@
|
||||
Introduction to OpenVINO state API {#openvino_docs_IE_DG_network_state_intro}
|
||||
==============================
|
||||
|
||||
This section describes how to work with stateful networks in OpenVINO toolkit, specifically:
|
||||
* How stateful networks are represented in IR and nGraph
|
||||
* How operations with state can be done
|
||||
|
||||
The section additionally provides small examples of stateful network and code to infer it.
|
||||
|
||||
## What is a stateful network
|
||||
|
||||
Several use cases require processing of data sequences. When length of a sequence is known and small enough,
|
||||
we can process it with RNN like networks that contain a cycle inside. But in some cases, like online speech recognition of time series
|
||||
forecasting, length of data sequence is unknown. Then data can be divided in small portions and processed step-by-step. But dependency
|
||||
between data portions should be addressed. For that, networks save some data between inferences - state. When one dependent sequence is over,
|
||||
state should be reset to initial value and new sequence can be started.
|
||||
|
||||
Several frameworks have special API for states in networks. For example, Keras have special option for RNNs `stateful` that turns on saving state
|
||||
between inferences. Kaldi contains special specifier `Offset` to define time offset in a network.
|
||||
|
||||
OpenVINO also contains special API to simplify work with networks with states. State is automatically saved between inferences,
|
||||
and there is a way to reset state when needed. You can also read state or set it to some new value between inferences.
|
||||
|
||||
## OpenVINO state representation
|
||||
|
||||
OpenVINO contains special abstraction variable to represent state in a network. There are two operations to work with state:
|
||||
* `Assign` to save value in state
|
||||
* `ReadValue` to read value saved on previous iteration
|
||||
|
||||
You can find more details on these operations in [ReadValue specification](../ops/infrastructure/ReadValue_3.md) and
|
||||
[Assign specification](../ops/infrastructure/Assign_3.md).
|
||||
|
||||
## Examples of representation of a network with states
|
||||
|
||||
To get a model with states ready for inference, you can convert a model from another framework to IR with Model Optimizer or create an nGraph function
|
||||
(details can be found in [Build nGraph Function section](../nGraph_DG/build_function.md)).
|
||||
Let's represent the following graph in both forms:
|
||||
![state_network_example]
|
||||
|
||||
### Example of IR with state
|
||||
|
||||
The `bin` file for this graph should contain float 0 in binary form. Content of `xml` is the following.
|
||||
|
||||
```xml
|
||||
<?xml version="1.0" ?>
|
||||
<net name="summator" version="10">
|
||||
<layers>
|
||||
<layer id="0" name="init_value" type="Const" version="opset5">
|
||||
<data element_type="f32" offset="0" shape="1,1" size="4"/>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="1" name="read" type="ReadValue" version="opset5">
|
||||
<data variable_id="id"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="2" name="input" type="Parameter" version="opset5">
|
||||
<data element_type="f32" shape="1,1"/>
|
||||
<output>
|
||||
<port id="0" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="3" name="add_sum" type="Add" version="opset5">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="4" name="save" type="Assign" version="opset5">
|
||||
<data variable_id="id"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
<layer id="10" name="add" type="Add" version="opset5">
|
||||
<data axis="1"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2" precision="FP32">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer id="5" name="output/sink_port_0" type="Result" version="opset5">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>1</dim>
|
||||
</port>
|
||||
</input>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="0" from-port="1" to-layer="1" to-port="0"/>
|
||||
<edge from-layer="2" from-port="0" to-layer="3" to-port="1"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="3" to-port="0"/>
|
||||
<edge from-layer="3" from-port="2" to-layer="4" to-port="0"/>
|
||||
<edge from-layer="3" from-port="2" to-layer="10" to-port="0"/>
|
||||
<edge from-layer="1" from-port="1" to-layer="10" to-port="1"/>
|
||||
<edge from-layer="10" from-port="2" to-layer="5" to-port="0"/>
|
||||
</edges>
|
||||
<meta_data>
|
||||
<MO_version value="unknown version"/>
|
||||
<cli_parameters>
|
||||
</cli_parameters>
|
||||
</meta_data>
|
||||
</net>
|
||||
```
|
||||
|
||||
### Example of creating model nGraph API
|
||||
|
||||
```cpp
|
||||
auto arg = make_shared<op::Parameter>(element::f32, Shape{1, 1});
|
||||
auto init_const = op::Constant::create(element::f32, Shape{1, 1}, {0});
|
||||
auto read = make_shared<op::ReadValue>(init_const, "v0");
|
||||
std::vector<shared_ptr<Node>> args = {arg, read};
|
||||
auto add = make_shared<op::Add>(arg, read);
|
||||
auto assign = make_shared<op::Assign>(add, "v0");
|
||||
auto add2 = make_shared<op::Add>(add, read);
|
||||
auto res = make_shared<op::Result>(add2);
|
||||
|
||||
auto f = make_shared<Function>(ResultVector({res}), ParameterVector({arg}), SinkVector({assign}));
|
||||
```
|
||||
|
||||
In this example, `SinkVector` is used to create `ngraph::Function`. For network with states, except inputs and outputs, `Assign` nodes should also point to `Function`
|
||||
to avoid deleting it during graph transformations. You can do it with the constructor, as shown in the example, or with the special method `add_sinks(const SinkVector& sinks)`. Also you can delete
|
||||
sink from `ngraph::Function` after deleting the node from graph with the `delete_sink()` method.
|
||||
|
||||
## OpenVINO state API
|
||||
|
||||
Inference Engine has the `InferRequest::QueryState` method to get the list of states from a network and `IVariableState` interface to operate with states. Below you can find brief description of methods and the workable example of how to use this interface.
|
||||
is below and next section contains small workable example how this interface can be used.
|
||||
|
||||
* `std::string GetName() const`
|
||||
returns name(variable_id) of according Variable
|
||||
* `void Reset()`
|
||||
reset state to default value
|
||||
* `void SetState(Blob::Ptr newState)`
|
||||
set new value for state
|
||||
* `Blob::CPtr GetState() const`
|
||||
returns current value of state
|
||||
|
||||
## Example of stateful network inference
|
||||
|
||||
Let's take an IR from the previous section example. The example below demonstrates inference of two independent sequences of data. State should be reset between these sequences.
|
||||
|
||||
One infer request and one thread
|
||||
will be used in this example. Using several threads is possible if you have several independent sequences. Then each sequence can be processed in its own infer
|
||||
request. Inference of one sequence in several infer requests is not recommended. In one infer request state will be saved automatically between inferences, but
|
||||
if the first step is done in one infer request and the second in another, state should be set in new infer request manually (using `IVariableState::SetState` method).
|
||||
|
||||
@snippet openvino/docs/snippets/InferenceEngine_network_with_state_infer.cpp part1
|
||||
|
||||
You can find more powerful examples demonstrating how to work with networks with states in speech sample and demo.
|
||||
Decsriptions can be found in [Samples Overview](./Samples_Overview.md)
|
||||
|
||||
[state_network_example]: ./img/state_network_example.png
|
||||
|
||||
|
||||
## LowLatency transformation
|
||||
|
||||
If the original framework does not have a special API for working with states, after importing the model, OpenVINO representation will not contain Assign/ReadValue layers. For example, if the original ONNX model contains RNN operations, IR will contain TensorIterator operations and the values will be obtained only after the execution of whole TensorIterator primitive, intermediate values from each iteration will not be available. To be able to work with these intermediate values of each iteration and receive them with a low latency after each infer request, a special LowLatency transformation was introduced.
|
||||
|
||||
LowLatency transformation changes the structure of the network containing [TensorIterator](../ops/infrastructure/TensorIterator_1.md) by adding the ability to work with state, inserting Assign/ReadValue layers as it is shown in the picture below.
|
||||
|
||||

|
||||
|
||||
### Steps to apply LowLatency transformation
|
||||
|
||||
1. Get CNNNetwork. Any way is acceptable:
|
||||
|
||||
* [from IR or ONNX model](Integrate_with_customer_application_new_API.md#integration-steps)
|
||||
* [from nGraph Function](../nGraph_DG/build_function.md)
|
||||
|
||||
2. [Reshape](ShapeInference) CNNNetwork network if necessary
|
||||
**Necessary case:** the sequence_lengths dimention of input > 1, it means TensorIterator layer will have number_iterations > 1. We should reshape the inputs of the network to set sequence_dimension exactly to 1.
|
||||
```cpp
|
||||
|
||||
// Network before reshape: Parameter (name: X, shape: [2 (sequence_lengths), 1, 16]) -> TensorIterator (num_iteration = 2, axis = 0) -> ...
|
||||
|
||||
cnnNetwork.reshape({"X" : {1, 1, 16});
|
||||
|
||||
// Network after reshape: Parameter (name: X, shape: [1 (sequence_lengths), 1, 16]) -> TensorIterator (num_iteration = 1, axis = 0) -> ...
|
||||
|
||||
```
|
||||
|
||||
3. Apply LowLatency transformation
|
||||
```cpp
|
||||
#include "ie_transformations.hpp"
|
||||
|
||||
...
|
||||
|
||||
InferenceEngine::LowLatency(cnnNetwork);
|
||||
```
|
||||
**State naming rule:** a name of state is a concatenation of names: original TensorIterator operation, Parameter of the body, and additional suffix "variable_" + id (0-base indexing, new indexing for each TensorIterator), for example:
|
||||
```
|
||||
tensor_iterator_name = "TI_name"
|
||||
body_parameter_name = "param_name"
|
||||
|
||||
state_name = "TI_name/param_name/variable_0"
|
||||
```
|
||||
4. [Use state API](#openvino-state-api)
|
||||
|
||||
|
||||
### Known limitations
|
||||
1. Parameters are directly connected to States (ReadValues).
|
||||
|
||||
Removing Parameters from `ngraph::Function` is not possible.
|
||||
|
||||

|
||||
|
||||
**Current solution:** replace Parameter with Constant (freeze) with the value [0, 0, 0 … 0] via [ModelOptimizer CLI](../MO_DG/prepare_model/convert_model/Converting_Model_General.md) `--input` or `--freeze_placeholder_with_value`.
|
||||
|
||||
2. Non-reshapable network.
|
||||
|
||||
Value of shapes is hard-coded somewhere in the network.
|
||||
|
||||

|
||||
|
||||
**Current solution:** trim non-reshapable layers via [ModelOptimizer CLI](../MO_DG/prepare_model/convert_model/Converting_Model_General.md) `--input`, `--output` or via nGraph.
|
||||
|
||||
```cpp
|
||||
// nGraph example:
|
||||
auto func = cnnNetwork.getFunction();
|
||||
auto new_const = std::make_shared<ngraph::opset5::Constant>(); // type, shape, value
|
||||
for (const auto& node : func->get_ops()) {
|
||||
if (node->get_friendly_name() == "name_of_non_reshapable_const") {
|
||||
auto bad_const = std::dynamic_pointer_cast<ngraph::opset5::Constant>(node);
|
||||
ngraph::replace_node(bad_const, new_const); // replace constant
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -1,30 +1,9 @@
|
||||
GPU Plugin {#openvino_docs_IE_DG_supported_plugins_CL_DNN}
|
||||
=======
|
||||
|
||||
The GPU plugin uses the Intel® Compute Library for Deep Neural Networks (clDNN) to infer deep neural networks.
|
||||
clDNN is an open source performance library for Deep Learning (DL) applications intended for acceleration of Deep Learning Inference on Intel® Processor Graphics including Intel® HD Graphics, Intel® Iris® Graphics, Intel® Iris® Xe Graphics, and Intel® Iris® Xe MAX graphics.
|
||||
For an in-depth description of clDNN, see [Inference Engine source files](https://github.com/openvinotoolkit/openvino/tree/master/inference-engine/src/cldnn_engine) and [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics).
|
||||
|
||||
## Device Naming Convention
|
||||
* Devices are enumerated as "GPU.X" where `X={0, 1, 2,...}`. Only Intel® GPU devices are considered.
|
||||
* If the system has an integrated GPU, it always has id=0 ("GPU.0").
|
||||
* Other GPUs have undefined order that depends on the GPU driver.
|
||||
* "GPU" is an alias for "GPU.0"
|
||||
* If the system doesn't have an integrated GPU, then devices are enumerated starting from 0.
|
||||
|
||||
For demonstration purposes, see the [Hello Query Device C++ Sample](../../../inference-engine/samples/hello_query_device/README.md) that can print out the list of available devices with associated indices. Below is an example output (truncated to the device names only):
|
||||
|
||||
```sh
|
||||
./hello_query_device
|
||||
Available devices:
|
||||
Device: CPU
|
||||
...
|
||||
Device: GPU.0
|
||||
...
|
||||
Device: GPU.1
|
||||
...
|
||||
Device: HDDL
|
||||
```
|
||||
The GPU plugin uses the Intel® Compute Library for Deep Neural Networks ([clDNN](https://01.org/cldnn)) to infer deep neural networks.
|
||||
clDNN is an open source performance library for Deep Learning (DL) applications intended for acceleration of Deep Learning Inference on Intel® Processor Graphics including Intel® HD Graphics and Intel® Iris® Graphics.
|
||||
For an in-depth description of clDNN, see: [clDNN sources](https://github.com/intel/clDNN) and [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics).
|
||||
|
||||
## Optimizations
|
||||
|
||||
@@ -113,7 +92,7 @@ When specifying key values as raw strings (that is, when using Python API), omit
|
||||
| `KEY_CLDNN_PLUGIN_THROTTLE` | `<0-3>` | `0` | OpenCL queue throttling (before usage, make sure your OpenCL driver supports appropriate extension)<br> Lower value means lower driver thread priority and longer sleep time for it. 0 disables the setting. |
|
||||
| `KEY_CLDNN_GRAPH_DUMPS_DIR` | `"<dump_dir>"` | `""` | clDNN graph optimizer stages dump output directory (in GraphViz format) |
|
||||
| `KEY_CLDNN_SOURCES_DUMPS_DIR` | `"<dump_dir>"` | `""` | Final optimized clDNN OpenCL sources dump output directory |
|
||||
| `KEY_GPU_THROUGHPUT_STREAMS` | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
|
||||
| `KEY_GPU_THROUGHPUT_STREAMS` | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behaviour.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
|
||||
| `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO` | `NO` | Forces async requests (also from different executable networks) to execute serially.|
|
||||
|
||||
## Note on Debug Capabilities of the GPU Plugin
|
||||
|
||||
@@ -47,13 +47,11 @@ Inference Engine now features a dedicated API to enumerate devices and their cap
|
||||
```sh
|
||||
./hello_query_device
|
||||
Available devices:
|
||||
Device: CPU
|
||||
Device: CPU
|
||||
...
|
||||
Device: GPU.0
|
||||
Device: GPU
|
||||
...
|
||||
Device: GPU.1
|
||||
...
|
||||
Device: HDDL
|
||||
Device: HDDL
|
||||
```
|
||||
Simple programmatic way to enumerate the devices and use with the multi-device is as follows:
|
||||
|
||||
|
||||
@@ -71,8 +71,6 @@ In addition to common parameters, the MYRIAD plugin accepts the following option
|
||||
| `KEY_VPU_MYRIAD_FORCE_RESET` | `YES`/`NO` | `NO` | Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
|
||||
| `KEY_VPU_PLATFORM` | empty string/`VPU_2450`/`VPU_2480` | empty string | **Deprecated** Use `KEY_VPU_MYRIAD_PLATFORM` instead. <br />If set, the plugin will use a device with specific platform to allocate a network. |
|
||||
| `KEY_VPU_FORCE_RESET` | `YES`/`NO` | `NO` | **Deprecated** Use `KEY_VPU_MYRIAD_FORCE_RESET` instead. <br />Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
|
||||
| `KEY_VPU_MYRIAD_MOVIDIUS_DDR_TYPE` | `VPU_MYRIAD_DDR_AUTO`/ `VPU_MYRIAD_DDR_MICRON_2GB`/ `VPU_MYRIAD_DDR_SAMSUNG_2GB`/ `VPU_MYRIAD_DDR_HYNIX_2GB`/ `VPU_MYRIAD_DDR_MICRON_1GB` | `VPU_MYRIAD_DDR_AUTO` | This option allows setting DDR type for the MyriadX board. |
|
||||
|
||||
|
||||
## Device allocation <a name="MYRIAD_DEVICE_ALLOC"> </a>
|
||||
|
||||
|
||||
@@ -4,7 +4,9 @@ This software and the related documents are Intel copyrighted materials, and you
|
||||
|
||||
This document contains information on products, services and/or processes in development. All information provided here is subject to change without notice. Contact your Intel representative to obtain the latest forecast, schedule, specifications and roadmaps. The products and services described may contain defects or errors known as errata which may cause deviations from published specifications. Current characterized errata are available on request. Copies of documents which have an order number and are referenced in this document may be obtained by calling 1-800-548-4725 or by visiting [www.intel.com/design/literature.htm](https://www.intel.com/design/literature.htm).
|
||||
|
||||
Performance varies by use, configuration and other factors. Learn more at [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex).
|
||||
Software and workloads used in performance tests may have been optimized for performance only on Intel microprocessors.
|
||||
|
||||
Performance tests, such as SYSmark and MobileMark, are measured using specific computer systems, components, software, operations and functions. Any change to any of those factors may cause the results to vary. You should consult other information and performance tests to assist you in fully evaluating your contemplated purchases, including the performance of that product when combined with other products. For more complete information visit [www.intel.com/benchmarks](https://www.intel.com/benchmarks).
|
||||
|
||||
Performance results are based on testing as of dates shown in configurations and may not reflect all publicly available updates. See backup for configuration details. No product or component can be absolutely secure.
|
||||
|
||||
@@ -12,7 +14,7 @@ Your costs and results may vary.
|
||||
|
||||
Intel technologies may require enabled hardware, software or service activation.
|
||||
|
||||
© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. \*Other names and brands may be claimed as the property of others.
|
||||
© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. \*Other names and brands may be claimed as the property of others.
|
||||
|
||||
## OpenVINO™ Logo
|
||||
To build equity around the project, the OpenVINO logo was created for both Intel and community usage. The logo may only be used to represent the OpenVINO toolkit and offerings built using the OpenVINO toolkit.
|
||||
|
||||
@@ -12,39 +12,51 @@ Model Optimizer produces an Intermediate Representation (IR) of the network, whi
|
||||
|
||||
* <code>.bin</code> - Contains the weights and biases binary data.
|
||||
|
||||
> **TIP**: You also can work with the Model Optimizer inside the OpenVINO™ [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench).
|
||||
> [DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare
|
||||
> performance of deep learning models on various Intel® architecture
|
||||
> configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components.
|
||||
> <br>
|
||||
> Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started.
|
||||
|
||||
## What's New in the Model Optimizer in this Release?
|
||||
|
||||
* Common changes:
|
||||
* Updated requirements for the numpy component to avoid compatibility issues with TensorFlow 1.x.
|
||||
* Improved reshape-ability of models with eltwise and CTCGreedyDecoder operations
|
||||
* Implemented several optimization transformations to replace sub-graphs of operations with HSwish, Mish, Swish and SoftPlus operations.
|
||||
* Model Optimizer generates IR keeping shape-calculating sub-graphs **by default**. Previously, this behavior was triggered if the "--keep_shape_ops" command line parameter was provided. The key is ignored in this release and will be deleted in the next release. To trigger the legacy behavior to generate an IR for a fixed input shape (folding ShapeOf operations and shape-calculating sub-graphs to Constant), use the "--static_shape" command line parameter. Changing model input shape using the Inference Engine API in runtime may fail for such an IR.
|
||||
* Fixed Model Optimizer conversion issues resulted in non-reshapeable IR using the Inference Engine reshape API.
|
||||
* Enabled transformations to fix non-reshapeable patterns in the original networks:
|
||||
* Hardcoded Reshape
|
||||
* In Reshape(2D)->MatMul pattern
|
||||
* Reshape->Transpose->Reshape when the pattern can be fused to the ShuffleChannels or DepthToSpace operation
|
||||
* Hardcoded Interpolate
|
||||
* In Interpolate->Concat pattern
|
||||
* Added a dedicated requirements file for TensorFlow 2.X as well as the dedicated install prerequisites scripts.
|
||||
* Replaced the SparseToDense operation with ScatterNDUpdate-4.
|
||||
* ONNX*:
|
||||
* Enabled an ability to specify the model output **tensor** name using the "--output" command line parameter.
|
||||
* Added support for the following operations:
|
||||
* Loop-11, 13
|
||||
* Round-11
|
||||
* GatherND-11, 12, 13
|
||||
* Acosh
|
||||
* Asinh
|
||||
* Atanh
|
||||
* DepthToSpace-11, 13
|
||||
* DequantizeLinear-10 (zero_point must be constant)
|
||||
* HardSigmoid-1,6
|
||||
* QuantizeLinear-10 (zero_point must be constant)
|
||||
* ReduceL1-11, 13
|
||||
* ReduceL2-11, 13
|
||||
* Resize-11, 13 (except mode="nearest" with 5D+ input, mode="tf_crop_and_resize", and attributes exclude_outside and extrapolation_value with non-zero values)
|
||||
* ScatterND-11, 13
|
||||
* SpaceToDepth-11, 13
|
||||
* TensorFlow*:
|
||||
* Added support for the TensorFlow Object Detection API models with pre-processing block when mean/scale values are applied prior to resizing of the image. Previously only the case when mean/scale values are applied after the resize was supported.
|
||||
* Aligned FakeQuantized limits adjustment with TensorFlow approach
|
||||
* Added support for the following operations:
|
||||
* GatherND
|
||||
* Round
|
||||
* NonMaxSuppression
|
||||
* LogSoftmax
|
||||
* FakeQuantWithMinMaxVarsPerChannel
|
||||
* Acosh
|
||||
* Asinh
|
||||
* Atanh
|
||||
* CTCLoss
|
||||
* EuclideanNorm
|
||||
* ExtractImagePatches
|
||||
* FloorDiv
|
||||
* MXNet*:
|
||||
* Added support for the following operations:
|
||||
* GatherND
|
||||
* Round
|
||||
* Acosh
|
||||
* Asinh
|
||||
* Atanh
|
||||
* Kaldi*:
|
||||
* Added support for the following operations:
|
||||
* TdnnComponent
|
||||
* Fixed bug with ParallelComponent support. Now it is fully supported with no restrictions.
|
||||
|
||||
> **NOTE:**
|
||||
> [Intel® System Studio](https://software.intel.com/en-us/system-studio) is an all-in-one, cross-platform tool suite, purpose-built to simplify system bring-up and improve system and IoT device application performance on Intel® platforms. If you are using the Intel® Distribution of OpenVINO™ with Intel® System Studio, go to [Get Started with Intel® System Studio](https://software.intel.com/en-us/articles/get-started-with-openvino-and-intel-system-studio-2019).
|
||||
@@ -65,6 +77,7 @@ Model Optimizer produces an Intermediate Representation (IR) of the network, whi
|
||||
* [Converting DeepSpeech from TensorFlow](prepare_model/convert_model/tf_specific/Convert_DeepSpeech_From_Tensorflow.md)
|
||||
* [Converting Language Model on One Billion Word Benchmark from TensorFlow](prepare_model/convert_model/tf_specific/Convert_lm_1b_From_Tensorflow.md)
|
||||
* [Converting Neural Collaborative Filtering Model from TensorFlow*](prepare_model/convert_model/tf_specific/Convert_NCF_From_Tensorflow.md)
|
||||
|
||||
* [Converting TensorFlow* Object Detection API Models](prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md)
|
||||
* [Converting TensorFlow*-Slim Image Classification Model Library Models](prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md)
|
||||
* [Converting CRNN Model from TensorFlow*](prepare_model/convert_model/tf_specific/Convert_CRNN_From_Tensorflow.md)
|
||||
@@ -77,15 +90,17 @@ Model Optimizer produces an Intermediate Representation (IR) of the network, whi
|
||||
* [Model Optimizations Techniques](prepare_model/Model_Optimization_Techniques.md)
|
||||
* [Cutting parts of the model](prepare_model/convert_model/Cutting_Model.md)
|
||||
* [Sub-graph Replacement in Model Optimizer](prepare_model/customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md)
|
||||
* [(Deprecated) Case-Study: Converting SSD models created with the TensorFlow* Object Detection API](prepare_model/customize_model_optimizer/TensorFlow_SSD_ObjectDetection_API.md)
|
||||
* [(Deprecated) Case-Study: Converting Faster R-CNN models created with the TensorFlow* Object Detection API](prepare_model/customize_model_optimizer/TensorFlow_Faster_RCNN_ObjectDetection_API.md)
|
||||
* [Supported Framework Layers](prepare_model/Supported_Frameworks_Layers.md)
|
||||
* [Intermediate Representation and Operation Sets](IR_and_opsets.md)
|
||||
* [Operations Specification](../ops/opset.md)
|
||||
* [Intermediate Representation suitable for INT8 inference](prepare_model/convert_model/IR_suitable_for_INT8_inference.md)
|
||||
* [Model Optimizer Extensibility](prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md)
|
||||
|
||||
* [Custom Layers in Model Optimizer](prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md)
|
||||
* [Extending Model Optimizer with New Primitives](prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_New_Primitives.md)
|
||||
* [Extending Model Optimizer with Caffe Python Layers](prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md)
|
||||
* [Extending Model Optimizer with Custom MXNet* Operations](prepare_model/customize_model_optimizer/Extending_MXNet_Model_Optimizer_with_New_Primitives.md)
|
||||
* [Legacy Mode for Caffe* Custom Layers](prepare_model/customize_model_optimizer/Legacy_Mode_for_Caffe_Custom_Layers.md)
|
||||
|
||||
* [Model Optimizer Frequently Asked Questions](prepare_model/Model_Optimizer_FAQ.md)
|
||||
|
||||
* [Known Issues](Known_Issues_Limitations.md)
|
||||
|
||||
@@ -38,5 +38,5 @@ Framework-specific parameters for:
|
||||
## See Also
|
||||
* [Configuring the Model Optimizer](../Config_Model_Optimizer.md)
|
||||
* [IR Notation Reference](../../IR_and_opsets.md)
|
||||
* [Model Optimizer Extensibility](../customize_model_optimizer/Customize_Model_Optimizer.md)
|
||||
* [Model Cutting](Cutting_Model.md)
|
||||
* [Custom Layers in Model Optimizer](../customize_model_optimizer/Customize_Model_Optimizer.md)
|
||||
* [Model Cutting](Cutting_Model.md)
|
||||
@@ -9,6 +9,7 @@ The following examples are the situations when model cutting is useful or even r
|
||||
* model has pre- or post-processing parts that cannot be translated to existing Inference Engine layers.
|
||||
* model has a training part that is convenient to be kept in the model, but not used during inference.
|
||||
* model is too complex (contains lots of unsupported operations that cannot be easily implemented as custom layers), so the complete model cannot be converted in one shot.
|
||||
* model is one of the supported [SSD models](../customize_model_optimizer/TensorFlow_SSD_ObjectDetection_API.md). In this case, you need to cut a post-processing part off.
|
||||
* problem with model conversion in the Model Optimizer or inference in the Inference Engine occurred. To localize the issue, limit the scope for conversion by iteratively searching for problematic places in the model.
|
||||
* single custom layer or a combination of custom layers is isolated for debugging purposes.
|
||||
|
||||
@@ -388,4 +389,4 @@ In this case, when `--input_shape` is specified and the node contains multiple i
|
||||
The correct command line is:
|
||||
```sh
|
||||
python3 mo.py --input_model=inception_v1.pb --input=0:InceptionV1/InceptionV1/Conv2d_1a_7x7/convolution --input_shape=[1,224,224,3]
|
||||
```
|
||||
```
|
||||
@@ -1582,9 +1582,9 @@ OI, which means that Input changes the fastest, then Output.
|
||||
|
||||
**Mathematical Formulation**
|
||||
|
||||
\f[
|
||||
output[:, ... ,:, i, ... , j,:, ... ,:] = input2[:, ... ,:, input1[i, ... ,j],:, ... ,:]
|
||||
\f]
|
||||
\f[
|
||||
output[:, ... ,:, i, ... , j,:, ... ,:] = input2[:, ... ,:, input1[i, ... ,j],:, ... ,:]
|
||||
\f]
|
||||
|
||||
|
||||
**Inputs**
|
||||
@@ -5086,9 +5086,7 @@ t \in \left ( 0, \quad tiles \right )
|
||||
|
||||
Output tensor is populated by values computes in the following way:
|
||||
|
||||
\f[
|
||||
output[i1, ..., i(axis-1), j, i(axis+1) ..., iN] = top_k(input[i1, ...., i(axis-1), :, i(axis+1), ..., iN]), k, sort, mode)
|
||||
\f]
|
||||
output[i1, ..., i(axis-1), j, i(axis+1) ..., iN] = top_k(input[i1, ...., i(axis-1), :, i(axis+1), ..., iN]), k, sort, mode)
|
||||
|
||||
So for each slice `input[i1, ...., i(axis-1), :, i(axis+1), ..., iN]` which represents 1D array, top_k value is computed individually. Sorting and minimum/maximum are controlled by `sort` and `mode` attributes.
|
||||
|
||||
|
||||
@@ -45,10 +45,6 @@ python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weig
|
||||
```sh
|
||||
python3 convert_weights_pb.py --class_names coco.names --data_format NHWC --weights_file yolov3-tiny.weights --tiny
|
||||
```
|
||||
At this step, you may receive a warning like `WARNING:tensorflow:Entity <...> could not be transformed and will be executed as-is.`. To workaround this issue, switch to gast 0.2.2 with the following command:
|
||||
```sh
|
||||
pip3 install --user gast==0.2.2
|
||||
```
|
||||
|
||||
If you have YOLOv3 weights trained for an input image with the size different from 416 (320, 608 or your own), please provide the `--size` key with the size of your image specified while running the converter. For example, run the following command for an image with size 608:
|
||||
```sh
|
||||
@@ -110,7 +106,7 @@ where:
|
||||
|
||||
> **NOTE:** The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](../Converting_Model_General.md).
|
||||
|
||||
OpenVINO™ toolkit provides a demo that uses YOLOv3 model. For more information, refer to [Object Detection C++ Demo](@ref omz_demos_object_detection_demo_ssd_async_README).
|
||||
OpenVINO™ toolkit provides a demo that uses YOLOv3 model. For more information, refer to [Object Detection YOLO* V3 Demo, Async API Performance Showcase](@ref omz_demos_object_detection_demo_yolov3_async_README).
|
||||
|
||||
## Convert YOLOv1 and YOLOv2 Models to the IR
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,41 +1,45 @@
|
||||
# Extending Model Optimizer for Custom MXNet* Operations {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_MXNet_Model_Optimizer_with_New_Primitives}
|
||||
# Extending the MXNet Model Optimizer with New Primitives {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_MXNet_Model_Optimizer_with_New_Primitives}
|
||||
|
||||
This section provides instruction on how to support a custom MXNet operation (or as it called in the MXNet documentation
|
||||
"operator" or "layer") which is not a part of the MXNet operation set. For example, if the operator is implemented using
|
||||
the following [guide](https://mxnet.apache.org/versions/1.7.0/api/faq/new_op.html).
|
||||
|
||||
This section describes a procedure on how to extract operator attributes in the Model Optimizer. The rest of the
|
||||
operation enabling pipeline and documentation on how to support MXNet operations from standard MXNet operation set is
|
||||
described in the main document [Customize_Model_Optimizer](Customize_Model_Optimizer.md).
|
||||
|
||||
## Writing Extractor for Custom MXNet Operation
|
||||
Custom MXNet operations have an attribute `op` (defining the type of the operation) equal to `Custom` and attribute
|
||||
`op_type` which is an operation type defined by an user. Implement extractor class inherited from the
|
||||
`MXNetCustomFrontExtractorOp` class instead of `FrontExtractorOp` class used for standard framework operations in order
|
||||
to extract attributes for such kind of operations. The `op` class attribute value should be set to the `op_type` value
|
||||
so the extractor is triggered for this kind of operation.
|
||||
|
||||
There is the example of the extractor for the custom operation registered with type (`op_type` value) equal to
|
||||
`MyCustomOp` having attribute `my_attribute` of the floating point type with default value `5.6`. In this sample we
|
||||
assume that we have already created the `CustomOp` class (inherited from `Op` class) for the Model Optimizer operation
|
||||
for this MXNet custom operation as described in the [Customize_Model_Optimizer](Customize_Model_Optimizer.md).
|
||||
This section describes how you can create a Model Optimizer extension for a custom layer from your MXNet* model. It supplements the main document [Extending Model Optimizer with New Primitives](Extending_Model_Optimizer_with_New_Primitives.md) and provides a step-by-step procedure. To create an extension for a particular layer, perform the following steps:
|
||||
|
||||
1. Create the file `custom_proposal_ext.py` in the folder `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/mxnet`
|
||||
If your MXNet layer has op `Custom`, create the `CustomProposalFrontExtractor` class inherited from `MXNetCustomFrontExtractorOp`:
|
||||
```py
|
||||
from mo.front.extractor import MXNetCustomFrontExtractorOp
|
||||
class CustomProposalFrontExtractor(MXNetCustomFrontExtractorOp):
|
||||
pass
|
||||
```
|
||||
Otherwise, for layers that are not standard MXNet layers, create the `ProposalFrontExtractor` class inherited from `FrontExtractorOp`:
|
||||
```py
|
||||
from mo.front.extractor import FrontExtractorOp
|
||||
class ProposalFrontExtractor(FrontExtractorOp):
|
||||
pass
|
||||
```
|
||||
2. Specify the operation that the extractor refers to and a specific flag. The flag represents whether the operation should be used by the Model Optimizer or should be excluded from processing:
|
||||
```py
|
||||
from mo.front.extractor import MXNetCustomFrontExtractorOp
|
||||
class CustomProposalFrontExtractor(MXNetCustomFrontExtractorOp):
|
||||
op = '_contrib_Proposal'
|
||||
enabled = True
|
||||
```
|
||||
3. Register a mapping rule between the original model and the `PythonProposalOp` attributes by overriding the following function:
|
||||
```py
|
||||
from extension.ops.custom_op import CustomOp # implementation of the MO operation class
|
||||
from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
|
||||
from mo.front.extractor import MXNetCustomFrontExtractorOp
|
||||
from mo.ops.op import Op
|
||||
|
||||
class CustomProposalFrontExtractor(MXNetCustomFrontExtractorOp): # inherit from specific base class
|
||||
op = 'MyCustomOp' # the value corresponding to the `op_type` value of the MXNet operation
|
||||
enabled = True # the extractor is enabled
|
||||
|
||||
class CustomProposalFrontExtractor(MXNetCustomFrontExtractorOp):
|
||||
op = '_contrib_Proposal'
|
||||
enabled = True
|
||||
@staticmethod
|
||||
def extract(node):
|
||||
attrs = get_mxnet_layer_attrs(node.symbol_dict) # parse the attributes to a dictionary with string values
|
||||
attrs = get_mxnet_layer_attrs(node.symbol_dict)
|
||||
node_attrs = {
|
||||
'my_attribute': attrs.float('my_attribute', 5.6)
|
||||
'feat_stride': attrs.float('feat_stride', 16)
|
||||
}
|
||||
|
||||
CustomOp.update_node_stat(node, node_attrs) # update the attributes of the node
|
||||
return self.enabled
|
||||
|
||||
# update the attributes of the node
|
||||
Op.get_op_class_by_name('Proposal').update_node_stat(node, node_attrs) # <------ here goes the name ('Proposal') of the Operation that was implemented before
|
||||
return __class__.enabled
|
||||
```
|
||||
|
||||
|
||||
@@ -1,89 +0,0 @@
|
||||
# Extending Model Optimizer with Caffe* Python Layers {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_Model_Optimizer_With_Caffe_Python_Layers}
|
||||
|
||||
This section provides instruction on how to support a custom Caffe operation written only in Python. For example, the
|
||||
[Faster-R-CNN model]((http://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0)) implemented in
|
||||
Caffe contains a custom layer Proposal written in Python. The layer is described in the
|
||||
[Faster-R-CNN protoxt](https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/models/pascal_voc/VGG16/faster_rcnn_end2end/test.prototxt)
|
||||
the following way:
|
||||
```sh
|
||||
layer {
|
||||
name: 'proposal'
|
||||
type: 'Python'
|
||||
bottom: 'rpn_cls_prob_reshape'
|
||||
bottom: 'rpn_bbox_pred'
|
||||
bottom: 'im_info'
|
||||
top: 'rois'
|
||||
python_param {
|
||||
module: 'rpn.proposal_layer'
|
||||
layer: 'ProposalLayer'
|
||||
param_str: "'feat_stride': 16"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This section describes only a procedure on how to extract operator attributes in the Model Optimizer. The rest of the
|
||||
operation enabling pipeline and documentation on how to support other Caffe operations (written in C++) is described in
|
||||
the main document [Customize_Model_Optimizer](Customize_Model_Optimizer.md).
|
||||
|
||||
## Writing Extractor for Caffe Python Layer
|
||||
Custom Caffe Python layers have an attribute `type` (defining the type of the operation) equal to `Python` and two
|
||||
mandatory attributes `module` and `layer` in the `python_param` dictionary. The `module` defines the Python module name
|
||||
with the layer implementation, while `layer` value is an operation type defined by an user. In order to extract
|
||||
attributes for such an operation it is necessary to implement extractor class inherited from the
|
||||
`CaffePythonFrontExtractorOp` class instead of `FrontExtractorOp` class used for standard framework layers. The `op`
|
||||
class attribute value should be set to the `module + "." + layer` value so the extractor is triggered for this kind of
|
||||
operation.
|
||||
|
||||
Here is a simplified example of the extractor for the custom operation Proposal from Faster-R-CNN model mentioned above.
|
||||
The full code with additional checks is provided in the
|
||||
`<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/caffe/proposal_python_ext.py`. The sample code uses
|
||||
operation `ProposalOp` which corresponds to `Proposal` operation described in the [Available Operations Sets](../../../ops/opset.md)
|
||||
document. Refer to the source code below for a detailed explanation of the extractor.
|
||||
|
||||
```py
|
||||
from extensions.ops.proposal import ProposalOp
|
||||
from mo.front.extractor import CaffePythonFrontExtractorOp
|
||||
|
||||
|
||||
class ProposalPythonFrontExtractor(CaffePythonFrontExtractorOp):
|
||||
op = 'rpn.proposal_layer.ProposalLayer' # module + "." + layer
|
||||
enabled = True # extractor is enabled
|
||||
|
||||
@staticmethod
|
||||
def extract_proposal_params(node, defaults):
|
||||
param = node.pb.python_param # get the protobuf message representation of the layer attributes
|
||||
# parse attributes from the layer protobuf message to a Python dictionary
|
||||
attrs = CaffePythonFrontExtractorOp.parse_param_str(param.param_str)
|
||||
update_attrs = defaults
|
||||
|
||||
# the operation expects ratio and scale values to be called "ratio" and "scale" while Caffe uses different names
|
||||
if 'ratios' in attrs:
|
||||
attrs['ratio'] = attrs['ratios']
|
||||
del attrs['ratios']
|
||||
if 'scales' in attrs:
|
||||
attrs['scale'] = attrs['scales']
|
||||
del attrs['scales']
|
||||
|
||||
update_attrs.update(attrs)
|
||||
ProposalOp.update_node_stat(node, update_attrs) # update the node attributes
|
||||
|
||||
@classmethod
|
||||
def extract(cls, node):
|
||||
# define default values for the Proposal layer attributes
|
||||
defaults = {
|
||||
'feat_stride': 16,
|
||||
'base_size': 16,
|
||||
'min_size': 16,
|
||||
'ratio': [0.5, 1, 2],
|
||||
'scale': [8, 16, 32],
|
||||
'pre_nms_topn': 6000,
|
||||
'post_nms_topn': 300,
|
||||
'nms_thresh': 0.7
|
||||
}
|
||||
cls.extract_proposal_params(node, defaults)
|
||||
return cls.enabled
|
||||
```
|
||||
|
||||
## See Also
|
||||
* [Customize_Model_Optimizer](Customize_Model_Optimizer.md)
|
||||
* [Legacy Mode for Caffe* Custom Layers](Legacy_Mode_for_Caffe_Custom_Layers.md)
|
||||
@@ -1,3 +1,476 @@
|
||||
# Extending Model Optimizer with New Primitives {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_Model_Optimizer_with_New_Primitives}
|
||||
# Extending the Model Optimizer with New Primitives {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_Model_Optimizer_with_New_Primitives}
|
||||
|
||||
This page is deprecated. Please, refer to [Model Optimizer Extensibility](Customize_Model_Optimizer.md) page for more information.
|
||||
This section explains how to register a custom layer in the Model Optimizer, including how to register Proposal as a custom layer. This section also demonstrates how `Proposal` works as a custom layer.
|
||||
|
||||
Model Optimizer loads the model, goes through the topology, and tries to find each layer type in the list of known layers. If the Model Optimizer does not find a layer in that list, it looks for the layer in the list of custom layers. If the Model Optimizer fails to find the layer among the defined custom layers, it registers a Caffe\* fallback for for the output shape inference. If the Model Optimizer does not find Caffe and cannot infer shapes, the Model Optimizer fails with an appropriate message.
|
||||
|
||||
You must know two things about custom layers with the Model Optimizer:
|
||||
|
||||
* How to map a subgraph in a FW model to a subgraph consisting of Inference Engine layers. For Caffe, the subgraph is a 1-to-1 mapping of a Caffe layer to an Inference Engine layer.
|
||||
* How to infer shapes for unknown subgraphs. This can be either for a step in which the internal representation consists of framework-specific layers, or for a step in which the internal representation consists of Inference Engine layers.
|
||||
|
||||
You also have the option of a framework fallback for unknown subgraphs, for when the original framework is used for inference of output shapes of operations. The example below demonstrates the case in which the framework is not available or should not be used.
|
||||
|
||||
## Preparing an Example Topology
|
||||
|
||||
> **NOTE**: Skip this section if you have a topology with a layer that is not known to the Model Optimizer.
|
||||
|
||||
The information in this section prepares a Caffe\* model with the provided, deployment-ready `prototxt` for a
|
||||
well-known topology called
|
||||
[Faster-R-CNN protoxt](https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/models/pascal_voc/VGG16/faster_rcnn_end2end/test.prototxt)
|
||||
to demonstrate the workflow. To use this example, you must have
|
||||
[weights and biases](http://dl.dropboxusercontent.com/s/o6ii098bu51d139/faster_rcnn_models.tgz?dl=0) for inference,
|
||||
because `prototxt` just describes the structure of the topology.
|
||||
|
||||
1. Download the `.caffemodel` and `.prototxt` files
|
||||
2. Run the Model Optimizer on the `.caffemodel` and `.prototxt` files:
|
||||
```shell
|
||||
python mo.py --input_model VGG16_faster_rcnn_final.caffemodel --input_proto test.prototxt
|
||||
```
|
||||
You will likely see the error message:
|
||||
```shell
|
||||
Error parsing text-format caffe.NetParameter: 196:16: Message type "caffe.DropoutParameter" has no field named "scale_train".
|
||||
```
|
||||
Whether you see the error depends on your Caffe version. For example, BVLC Caffe does not support the boolean parameter `scale_train` for the `dropout` layer. The error message does not matter, because the dropout layer is needed only for training, and the Model Optimizer removes it.
|
||||
3. To proceed, comment out these lines in `test.prototxt`:
|
||||
```sh
|
||||
...
|
||||
layer {
|
||||
name: "drop6"
|
||||
type: "Dropout"
|
||||
bottom: "fc6"
|
||||
top: "fc6"
|
||||
dropout_param {
|
||||
dropout_ratio: 0.5
|
||||
# scale_train: false # <-------------- comment out this line
|
||||
}
|
||||
}
|
||||
...
|
||||
layer {
|
||||
name: "drop7"
|
||||
type: "Dropout"
|
||||
bottom: "fc7"
|
||||
top: "fc7"
|
||||
dropout_param {
|
||||
dropout_ratio: 0.5
|
||||
# scale_train: false # <-------------- comment out this line
|
||||
}
|
||||
}
|
||||
...
|
||||
```
|
||||
4. Run the Model Optimizer on this model again:
|
||||
```shell
|
||||
python mo.py --input_model VGG16_faster_rcnn_final.caffemodel --input_proto test.prototxt
|
||||
```
|
||||
You get the model successfully converted to Intermediate Representation, and you can infer it with the Inference Engine.
|
||||
|
||||
However, the aim of this tutorial is to demonstrate the way of supporting custom layers not yet supported by the Model Optimizer.
|
||||
If you want to understand better how Model Optimizer works, remove the extension for layer `Proposal` and follow all steps of this tutorial.
|
||||
|
||||
5. Remove the extension for layer `Proposal`:
|
||||
```sh
|
||||
mkdir extensions/old
|
||||
mv extensions/front/caffe/proposal_python_ext.py extensions/old/proposal_python_ext_old.py
|
||||
mv extensions/ops/proposal_python_example.py extensions/old/proposal_python__example_old.py
|
||||
```
|
||||
6. Now you can run the Model Optimizer on this model once again:
|
||||
```sh
|
||||
python mo.py --input_model VGG16_faster_rcnn_final.caffemodel --input_proto test.prototxt
|
||||
```
|
||||
You will see the message:
|
||||
```shell
|
||||
[ ERROR ] Found custom layer proposal. Model Optimizer does not support this layer.
|
||||
Please, register it in CustomLayersMapping.xml or implement extension.
|
||||
For more information please refer to Model Optimizer FAQ, question #FAQ45.
|
||||
```
|
||||
This message means the Model Optimizer can load the model, but is unable to infer the shape and handle the custom layer properties.
|
||||
|
||||
## Registering a Custom Layer as a Model Optimizer Extension
|
||||
|
||||
In the following sections, you will learn how to make the Model Optimizer independent from Caffe\* when processing a
|
||||
model that has a custom layer. In this example, the custom layer is referred to as the Proposal layer.
|
||||
|
||||
Use this section to implement the mapping rules for the `Proposal` layer attributes and the output shape calculation. As part of these steps, you must first create a class for the `Proposal` layer and inherit it from general-purpose Op that defines the interface of every new custom layer.
|
||||
|
||||
In this section, it is important to understand the `Op` class and its function. The implementation of this class shows that it expects a graph and attributes to be passed when initializing. The graph and attributes are in `<INSTALL_DIR>/deployment_tools/model_optimizer/mo/ops/op.py`
|
||||
|
||||
`Op` keeps the attributes for each operation and contains logic for handling node creation for internal model representation. `Op` is responsible for dumping each particular operation to the `.xml` format for the Intermediate Representation. By inheriting from it, the technical items are complete and you concentrate on the specificity of this layer: the attributes it supports and the rules on computing its output shape.
|
||||
|
||||
Follow these steps:
|
||||
|
||||
1. Create the file `python_proposal.py` in the directory `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/ops`:
|
||||
```python
|
||||
from mo.ops.op import Op
|
||||
class PythonProposalOp(Op):
|
||||
pass
|
||||
```
|
||||
2. Define the name of the operation and make a stub constructor:
|
||||
```python
|
||||
from mo.ops.op import Op
|
||||
class PythonProposalOp(Op):
|
||||
op = 'Proposal'
|
||||
def __init__(self, graph, attrs):
|
||||
super().__init__(graph)
|
||||
```
|
||||
3. Every `Op` must have three specific fields defined: `type`, `op`, and `infer`. In most cases, the `type` and `op` names are the same, and `infer` is defined as a function to compute the output shape. Reflect these fields in your constructor:
|
||||
```python
|
||||
from mo.ops.op import Op
|
||||
class PythonProposalOp(Op):
|
||||
op = 'Proposal'
|
||||
def __init__(self, graph, attrs):
|
||||
mandatory_props = {
|
||||
'type': __class__.op,
|
||||
'op': __class__.op,
|
||||
'infer': None
|
||||
}
|
||||
super().__init__(graph, mandatory_props, attrs)
|
||||
```
|
||||
According to the Intermediate Representation catalog, Proposal layer has the following attributes:
|
||||
|
||||
* `pre_nms_topn`
|
||||
* `post_nms_topn`
|
||||
* `nms_thresh`
|
||||
* `feat_stride`
|
||||
* `min_size`
|
||||
* `base_size`
|
||||
* `ratio`
|
||||
* `scale`
|
||||
4. In defining supported attribute names, it is best to use the same names as in the original models. The names are similar to parameters and have no connection with the model layer properties. For clarity, you can use the name `my_ratio` for `ratio`. Other than defining the list of supported parameters, you can define only the parameters that appear in the Intermediate Representation in the `backend_attrs` method.
|
||||
Define your attributes:
|
||||
```python
|
||||
class PythonProposalOp(Op):
|
||||
# ... constructor
|
||||
def supported_attrs(self):
|
||||
return [
|
||||
'pre_nms_topn',
|
||||
'post_nms_topn',
|
||||
'nms_thresh',
|
||||
'feat_stride',
|
||||
'min_size',
|
||||
'base_size',
|
||||
'ratio',
|
||||
'scale'
|
||||
]
|
||||
```
|
||||
5. Model Optimizer now knows how to create the layer called Proposal when it is in the topology and what attributes this layer has. However, the Model Optimizer does not know how to calculate the output shape of this operation. Define a rule to calculate the output shape:
|
||||
```python
|
||||
import numpy as np
|
||||
from mo.graph.graph import Node
|
||||
from mo.ops.op import Op
|
||||
class PythonProposalOp(Op):
|
||||
def __init__(self, graph, attrs):
|
||||
mandatory_props = {
|
||||
'type': __class__.op,
|
||||
'op': __class__.op,
|
||||
'infer': PythonProposalOp.calculate_output_shape
|
||||
}
|
||||
super().__init__(graph, mandatory_props, attrs)
|
||||
# ... supported attrs
|
||||
@staticmethod
|
||||
def calculate_output_shape(node: Node):
|
||||
node.out_node().shape = (1, 1, 1, 1) # any Proposal now has always the same output
|
||||
```
|
||||
6. According to the Intermediate Representation catalog, Proposal layer has the following output calculation formula, where shape dynamically depends on the `post_nms_topn` parameter.
|
||||
Implement the output calculation formula in Python\*:
|
||||
```python
|
||||
import numpy as np
|
||||
class PythonProposalOp(Op):
|
||||
# ... static fields
|
||||
# ... constructor
|
||||
# ... supported attrs
|
||||
@staticmethod
|
||||
def calculate_output_shape(node: Node):
|
||||
input_shape = node.in_node(0).shape
|
||||
out_shape = np.array([0, 0], dtype=np.int64)
|
||||
# rois blob: holds R regions of interest, each is a 5 - tuple
|
||||
# (n, x1, y1, x2, y2) specifying an image batch index n and a
|
||||
# rectangle(x1, y1, x2, y2)
|
||||
out_shape[0] = input_shape[0] * node.post_nms_topn
|
||||
out_shape[1] = 5
|
||||
node.out_node(0).shape = out_shape
|
||||
```
|
||||
The node does not contain this parameter because it should be initialized in the constructor and in other parameters. The Inference Engine contains the implementation of a Caffe\*-like Proposal layer and works well with the default values from `caffe.proto`:
|
||||
```
|
||||
// Message that stores parameters used by ProposalLayer message ProposalParameter { optional uint32 feat_stride = 1 [default = 16]; optional uint32 base_size = 2 [default = 16]; optional uint32 min_size = 3 [default = 16]; repeated float ratio = 4; repeated float scale = 5; optional uint32 pre_nms_topn = 6 [default = 6000]; optional uint32 post_nms_topn = 7 [default = 300]; optional float nms_thresh = 8 [default = 0.7]; }
|
||||
```
|
||||
7. Change the constructor as follows:
|
||||
```python
|
||||
class PythonProposalOp(Op):
|
||||
# ... static fields
|
||||
def __init__(self, graph, attrs):
|
||||
mandatory_props = {
|
||||
'type': __class__.op,
|
||||
'op': __class__.op,
|
||||
'feat_stride': 16,
|
||||
'base_size': 16,
|
||||
'min_size': 16,
|
||||
'ratio': [0.5, 1, 2],
|
||||
'scale': [8, 16, 32],
|
||||
'pre_nms_topn': 6000,
|
||||
'post_nms_topn': 300,
|
||||
'nms_thresh': 0.7,
|
||||
'infer': PythonProposalOp.calculate_output_shape
|
||||
}
|
||||
super().__init__(graph, mandatory_props, attrs)
|
||||
# ... supported attrs
|
||||
# ... calculate output shape
|
||||
|
||||
```
|
||||
|
||||
It is mandatory to call two functions right after the implementation of that class:
|
||||
|
||||
```
|
||||
class ProposalPythonOp(Op):
|
||||
...
|
||||
|
||||
register_caffe_python_extractor(ProposalPythonOp, 'rpn.proposal_layer.ProposalLayer')
|
||||
Op.excluded_classes.append(ProposalPythonOp)
|
||||
```
|
||||
|
||||
Note that the first call <code>register_caffe_python_extractor(ProposalPythonOp, 'rpn.proposal_layer.ProposalLayer')</code> registers the extension of the layer in the Model Optimizer that will be found by a specific name (it is mandatory to join module name and layer name): <code>'rpn.proposal_layer.ProposalLayer'</code>.
|
||||
|
||||
The second call prevents the Model Optimizer from using this extension as if it is an extension for a layer with type `Proposal`. Otherwise, this layer can be chosen as an implementation of extension that can lead to potential issues.
|
||||
|
||||
**Summary**
|
||||
|
||||
In this section you implemented support for a custom layer with type `Python` that is `Proposal` layer in the topology. You learned how to calculate output shape of this layer.
|
||||
|
||||
The values of attributes are hardcoded, and in the next section you will learn how to extract these values from original framework model (Caffe model in this case).
|
||||
|
||||
## Registering Rules to Pass Extension Layer Properties from a Caffe\* Model to the Intermediate Representation
|
||||
|
||||
Model Optimizer now knows how to set the shape of the `PythonProposalOp` operation, but it is incorrect to initialize attributes with same values for every operation. Instead, the values should be extracted from the original topology. Model Optimizer does not know how to map the custom layer properties to the `PythonProposalOp`. For this, you must register the `FrontExtractorOp` instance.
|
||||
|
||||
> **NOTE**: This step is required only if the layer requires parameters from the original model.
|
||||
|
||||
1. Remove call functions `register_caffe_python_extractor` and `Op.excluded_classes.append` from the file with `op`, because you will implement extracted attributes from prototxt by yourself.
|
||||
There are multiple types of layers in Caffe: for example, `Convolution` and `Pooling`. Also, there is a specific type for custom Python\* layers called `Python`. Therefore, it is necessary to distinguish between those 'usual' types of layers and custom ones. If you want to implement extensions for a layer with type different to `Python`, you need to inherit your class of operation (for example, `ProposalFrontExtractor`) from `FrontExtractorOp`. Otherwise, inherit your class of operation from `CaffePythonFrontExtractorOp`.
|
||||
2. Create a file `python_proposal_ext.py` in the folder `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/caffe`
|
||||
```py
|
||||
from mo.front.extractor import CaffePythonFrontExtractorOp
|
||||
class PythonProposalFrontExtractor(CaffePythonFrontExtractorOp):
|
||||
pass
|
||||
```
|
||||
For other layers types, inherit from `FrontExtractorOp`:
|
||||
```py
|
||||
from mo.front.extractor import FrontExtractorOp
|
||||
class ProposalFrontExtractor(FrontExtractorOp):
|
||||
pass
|
||||
```
|
||||
You will implement extractor for layer with type `Python`, however, the steps are generally the same for layers with other types.
|
||||
3. Specify the operation that the extractor refers to and a specific flag. The flag represents whether the operation should be used by the Model Optimizer or should be excluded from processing:
|
||||
```py
|
||||
from mo.front.extractor import CaffePythonFrontExtractorOp
|
||||
class PythonProposalFrontExtractor(CaffePythonFrontExtractorOp):
|
||||
op = 'rpn.proposal_layer.ProposalLayer'
|
||||
enabled = True
|
||||
```
|
||||
4. Register a mapping rule between the original model and the `PythonProposalOp` attributes by overriding the following function:
|
||||
```py
|
||||
from mo.front.extractor import CaffePythonFrontExtractorOp
|
||||
from mo.ops.op import Op
|
||||
class ProposalPythonFrontExtractor(CaffePythonFrontExtractorOp):
|
||||
op = 'rpn.proposal_layer.ProposalLayer'
|
||||
enabled = True
|
||||
@staticmethod
|
||||
def extract(node):
|
||||
proto_layer = node.pb
|
||||
param = proto_layer.python_param # each layer has a specific parameter, take a look at caffe.proto
|
||||
python_params = str(param.param_str) # for Python layers, all params are in param_str
|
||||
attrs = {
|
||||
'feat_stride': int(python_params.split(':')[-1])
|
||||
}
|
||||
# update the attributes of the node
|
||||
Op.get_op_class_by_name('Proposal').update_node_stat(node, attrs) # <------ here goes the name ('Proposal') of the Operation that was implemented before
|
||||
return __class__.enabled
|
||||
```
|
||||
> **NOTE:** if you implement extension for layer with type different to `Python`, change the following line: <code>Op.get_op_class_by_name('Proposal').update_node_stat(node, attrs)</code> to this line: <code>Op.get_op_class_by_name(__class__.op).update_node_stat(node, mapping_rule)</code>.
|
||||
You have successfully extracted the parameter `feat_stride` from `prototxt`, assuming it is the only parameter in this layer.
|
||||
5. To increase the implementation flexibility:
|
||||
```py
|
||||
from mo.front.extractor import CaffePythonFrontExtractorOp
|
||||
from mo.ops.op import Op
|
||||
class PythonProposalFrontExtractor(CaffePythonFrontExtractorOp):
|
||||
op = 'rpn.proposal_layer.ProposalLayer'
|
||||
enabled = True
|
||||
@staticmethod
|
||||
def extract(node):
|
||||
param = node.pb.python_param
|
||||
attrs = CaffePythonFrontExtractorOp.parse_param_str(param.param_str)
|
||||
Op.get_op_class_by_name('Proposal').update_node_stat(node, attrs)
|
||||
return ProposalPythonFrontExtractor.enabled
|
||||
```
|
||||
|
||||
You can successfully convert the model. Open the `.xml` file and view your code:
|
||||
```xml
|
||||
...
|
||||
<layer id="42" name="proposal" precision="FP32" type="Python">
|
||||
<data base_size="16" feat_stride="16" min_size="16" nms_thresh="0.7" post_nms_topn="300" pre_nms_topn="6000" ratio="[0.5, 1, 2]" scale="[8, 16, 32]"/>
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>18</dim>
|
||||
<dim>15</dim>
|
||||
<dim>15</dim>
|
||||
</port>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>36</dim>
|
||||
<dim>15</dim>
|
||||
<dim>15</dim>
|
||||
</port>
|
||||
<port id="2">
|
||||
<dim>1</dim>
|
||||
<dim>3</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="3">
|
||||
<dim>300</dim>
|
||||
<dim>5</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
...
|
||||
```
|
||||
|
||||
Look at the output shape of the custom layer you implemented. The shape was calculated according to the rules specified in `PythonProposalOp`. The `ratio` and `scale` properties have the value `[0.5, 1, 2]` and `[8, 16, 32]`. They have square brackets because they are originally a repeated parameter. You converted the parameter to a list in `PythonProposalOp`. Model Optimizer cast the value to a string. According to Python\* rules, a list has a string representation of opening and closing square brackets and values joined by commas.
|
||||
|
||||
This is not a valid notation for the Intermediate Representation specification, because repeated parameters must be separated by a comma but without the brackets. Therefore, you must override the Model Optimizer default behavior regarding how it handles those parameters during the Intermediate Representation emitting stage, after the optimizations are complete. To do so, implement `backend_attrs()` in the `PythonProposalOp` class:
|
||||
```python
|
||||
class PythonProposalOp(Op):
|
||||
... other methods
|
||||
def backend_attrs(self) -> list:
|
||||
"""
|
||||
Gets list of attributes that should appear in resulting IR
|
||||
Returns:
|
||||
list of attributes names or list of tuples (name of attribute, pre-processing rule)
|
||||
"""
|
||||
return [
|
||||
( # a tuple per attribute
|
||||
'ratio', # name of attribute
|
||||
# pre-processing rule in a form of lambda
|
||||
# lambda takes a PythonProposalOp node with all defined properties
|
||||
# it translates [1,2,3] -> "1,2,3"
|
||||
lambda node: ','.join(map(str, node['ratio']))
|
||||
),
|
||||
(
|
||||
'scale',
|
||||
lambda node: ','.join(map(str, node['scale']))
|
||||
),
|
||||
'feat_stride',
|
||||
'base_size',
|
||||
'min_size',
|
||||
'pre_nms_topn',
|
||||
'post_nms_topn',
|
||||
'nms_thresh'
|
||||
]
|
||||
```
|
||||
The model can now be successfully converted.
|
||||
|
||||
Open the `.xml` file. `ratio` and `scale` have the expected correct values `0.5,1,2` and `8,16,32`:
|
||||
```xml
|
||||
...
|
||||
|
||||
<layer id="33" name="proposal" precision="FP32" type="Python">
|
||||
<data base_size="16" feat_stride="16" min_size="16" nms_thresh="0.7" post_nms_topn="300" pre_nms_topn="6000" ratio="0.5,1,2" scale="8,16,32"/>
|
||||
<input>
|
||||
...
|
||||
</input>
|
||||
<output>
|
||||
...
|
||||
</output>
|
||||
</layer>
|
||||
|
||||
...
|
||||
```
|
||||
|
||||
> **NOTE**: Model Optimizer supports the Faster-R-CNN topology. Run the following command for the same Intermediate Representation:
|
||||
|
||||
```sh
|
||||
python mo.py --input_model VGG16_faster_rcnn_final.caffemodel --input_proto test.prototxt --extensions <INSTALL_DIR>/deployment_tools/inference-engine/samples/object_detection_sample/fasterrcnn_extensions
|
||||
```
|
||||
|
||||
**Summary**
|
||||
|
||||
In this section you learned how to:
|
||||
|
||||
1. Create a framework-independent extension implementation of the Intermediate Representation custom layer with unified logic for calculating output shapes, specified set of attributes
|
||||
2. Use the Framework-Specific property extractor to map original model custom layer properties to the expected properties of the Framework-Independent extension
|
||||
3. Manipulate the custom layer properties representation in the resulting Intermediate Representation
|
||||
|
||||
Files used in this section:
|
||||
|
||||
* `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/ops/python_proposal.py`:
|
||||
|
||||
```py
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
from mo.front.extractor import attr_getter
|
||||
from mo.graph.graph import Node
|
||||
from mo.ops.op import Op
|
||||
|
||||
class ProposalOp(Op):
|
||||
op = 'Proposal'
|
||||
|
||||
def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
|
||||
mandatory_props = {
|
||||
'type': __class__.op,
|
||||
'op': __class__.op,
|
||||
'post_nms_topn': 300, # default in caffe-shared
|
||||
'infer': ProposalOp.proposal_infer
|
||||
}
|
||||
super().__init__(graph, mandatory_props, attrs)
|
||||
|
||||
def supported_attrs(self):
|
||||
return [
|
||||
'feat_stride',
|
||||
'base_size',
|
||||
'min_size',
|
||||
'ratio',
|
||||
'scale',
|
||||
'pre_nms_topn',
|
||||
'post_nms_topn',
|
||||
'nms_thresh'
|
||||
]
|
||||
|
||||
def backend_attrs(self):
|
||||
return [
|
||||
'feat_stride',
|
||||
'base_size',
|
||||
'min_size',
|
||||
('ratio', lambda node: attr_getter(node, 'ratio')),
|
||||
('scale', lambda node: attr_getter(node, 'scale')),
|
||||
'pre_nms_topn',
|
||||
'post_nms_topn',
|
||||
'nms_thresh',
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def proposal_infer(node: Node):
|
||||
input_shape = node.in_node(0).shape
|
||||
out_shape = np.array([0, 0], dtype=np.int64)
|
||||
# rois blob: holds R regions of interest, each is a 5 - tuple
|
||||
# (n, x1, y1, x2, y2) specifying an image batch index n and a
|
||||
# rectangle(x1, y1, x2, y2)
|
||||
out_shape[0] = input_shape[0] * node.post_nms_topn
|
||||
out_shape[1] = 5
|
||||
node.out_node(0).shape = out_shape
|
||||
```
|
||||
* `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/caffe/python_proposal_ext.py`:
|
||||
|
||||
```py
|
||||
from mo.front.extractor import CaffePythonFrontExtractorOp
|
||||
from mo.ops.op import Op
|
||||
|
||||
class ProposalPythonFrontExtractor(CaffePythonFrontExtractorOp):
|
||||
op = 'rpn.proposal_layer.ProposalLayer'
|
||||
enabled = True
|
||||
|
||||
@staticmethod
|
||||
def extract(node):
|
||||
param = node.pb.python_param
|
||||
attrs = CaffePythonFrontExtractorOp.parse_param_str(param.param_str)
|
||||
Op.get_op_class_by_name('Proposal').update_node_stat(node, attrs)
|
||||
return ProposalPythonFrontExtractor.enabled
|
||||
```
|
||||
|
||||
@@ -1,23 +1,10 @@
|
||||
# Legacy Mode for Caffe* Custom Layers {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Legacy_Mode_for_Caffe_Custom_Layers}
|
||||
|
||||
> **NOTE**: This functionality is deprecated and will be removed in the future releases.
|
||||
> **NOTE**: This functionality is deprecated and will be removed in future releases.
|
||||
|
||||
Model Optimizer can register custom layers in a way that the output shape is calculated by the Caffe\* framework
|
||||
installed on your system. This approach has several limitations:
|
||||
Model Optimizer can register custom layers in a way that the output shape is calculated by the Caffe\* framework installed on your system. This chapter covers this option.
|
||||
|
||||
* If your layer output shape depends on dynamic parameters, input data or previous layers parameters, calculation of
|
||||
output shape of the layer via Caffe can be incorrect. For example, `SimplerNMS` is filtering out bounding boxes that do
|
||||
not satisfy the condition. Internally, Caffe fallback forwards the whole net without any meaningful data - just some
|
||||
noise. It is natural to get only one bounding box (0,0,0,0) instead of expected number (for example, 15). There is an
|
||||
option to patch Caffe accordingly, however, it makes success of Intermediate Representation generation on the patched
|
||||
Caffe on the particular machine. To keep the solution independent from Caffe, we recommend to use extensions mechanism
|
||||
for such layers described in the [Model Optimizer Extensibility](Customize_Model_Optimizer.md).
|
||||
* It is not possible to produce Intermediate Representation on a machine that does not have Caffe installed.
|
||||
|
||||
> **NOTE**: Caffe Python\* API has an issue when layer name does not correspond to the name of its top. The fix was
|
||||
> implemented on [BVLC Caffe\*](https://github.com/BVLC/caffe/commit/35a7b87ad87457291dfc79bf8a7e7cf7ef278cbb). The
|
||||
> Caffe framework on your computer must contain this fix. Otherwise, Caffe framework can unexpectedly fail during the
|
||||
> fallback procedure.
|
||||
> **NOTE**: Caffe Python\* API has an issue when layer name does not correspond to the name of its top. The fix was implemented on [BVLC Caffe\*](https://github.com/BVLC/caffe/commit/35a7b87ad87457291dfc79bf8a7e7cf7ef278cbb). The Caffe framework on your computer must contain this fix. Otherwise, Caffe framework can unexpectedly fail during the fallback procedure.
|
||||
|
||||
> **NOTE**: The Caffe fallback feature was validated against [this GitHub revision](https://github.com/BVLC/caffe/tree/99466224dac86ddb86296b1e727794fb836bd80f). You may have issues with forks or later Caffe framework versions.
|
||||
|
||||
@@ -38,8 +25,7 @@ Where:
|
||||
|
||||
**Example**:
|
||||
|
||||
1. `Proposal` layer has parameters, and they appear in the Intermediate Representation. The parameters are stored in
|
||||
the `proposal_param` property of the layer:
|
||||
1. `Proposal` layer has parameters, and they appear in the Intermediate Representation. The parameters are stored in the `proposal_param` property of the layer:
|
||||
```shell
|
||||
\<CustomLayer NativeType="Proposal" hasParam ="true" protoParamName = "proposal_param"/\>
|
||||
```
|
||||
@@ -48,6 +34,16 @@ the `proposal_param` property of the layer:
|
||||
\<CustomLayer NativeType="CustomLayer" hasParam ="false"/\>
|
||||
```
|
||||
|
||||
For this feature, you need an appropriate version of Caffe installed on the computer on which you run the Model Optimizer.
|
||||
|
||||
## Constraints of Using the Caffe Fallback
|
||||
|
||||
Several layers in the Caffe\* framework can have shapes that dynamically depend on the input data, not only the layers that proceed the layer and its parameters. For example, `SimplerNMS` is filtering out bounding boxes that do not satisfy the condition. Internally, Caffe fallback forwards the whole net without any meaningful data - just some noise. It is natural to get only one bounding box (0,0,0,0) instead of expected number (for example, 15). There is an option to patch Caffe accordingly, however, it makes success of Intermediate Representation generation on the patched Caffe on the particular machine. To keep the solution independent from Caffe, we recommend to use extensions mechanism for such layers.
|
||||
|
||||
Known cases like `Proposal`, `DetectionOutput`, `SimplerNMS` are implemented as extensions and can be used out of the box.
|
||||
|
||||
A detailed description of supported layers is in the [Operations Specification](../../../ops/opset.md) document.
|
||||
|
||||
## Building Caffe\*
|
||||
|
||||
1. Build Caffe\* with Python\* 3.5:
|
||||
@@ -72,4 +68,4 @@ python3
|
||||
import caffe
|
||||
```
|
||||
|
||||
If Caffe was installed correctly, the `caffe` module is imported without errors.
|
||||
If Caffe was installed correctly, the `caffe` module is imported without errors.
|
||||
@@ -1,4 +1,363 @@
|
||||
# Sub-Graph Replacement in the Model Optimizer {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Subgraph_Replacement_Model_Optimizer}
|
||||
|
||||
The document has been deprecated. Refer to the [Model Optimizer Extensibility](Subgraph_Replacement_Model_Optimizer.md)
|
||||
for the up-to-date documentation.
|
||||
Several reasons exist for why the Model Optimizer could not generate an Intermediate Representation for a model. However, in some cases, the Intermediate Representation could be generated after providing certain hints to the tool. The examples of hints below are mostly related to TensorFlow\*, but potentially could be actual for models created in any framework:
|
||||
|
||||
* Topology contains an operation (or a sub-graph of operations) not known for Model Optimizer, but this operation (sub-graph) could be expressed as a combination of known operations. A hint would be a description of this combination to the tool).
|
||||
* Sub-graph of operations in the topology expresses a single layer known to Inference Engine.
|
||||
* TensorFlow and Inference Engine use different layouts of tensors, NHWC and NCHW respectively. If some tensor in NHWC layout is flattened (for example, all the dimensions are squashed into single dim), it is not possible to convert it to NCHW layout required for Inference Engine, so Model Optimizer cannot produce correct Intermediate Representation.
|
||||
|
||||
The detailed solutions for the examples above are given later, the next subsection shows what is common in all three examples.
|
||||
|
||||
## Sub-graph Replacement
|
||||
|
||||
In these cases, the sub-graph (or a single node) of initial graph is replaced with a new sub-graph (single node). The sub-graph replacement consists of the following steps:
|
||||
|
||||
1. Identify an existing sub-graph for replacement
|
||||
|
||||
2. Generate a new sub-graph
|
||||
|
||||
3. Connect a new sub-graph to the graph (create input/output edges to the new sub-graph)
|
||||
|
||||
4. Create output edges out of a new sub-graph to the graph
|
||||
|
||||
5. Do something with the original sub-graph (for example, remove it)
|
||||
|
||||
Model Optimizer provides several ways to perform most of the sub-graph replacement steps. The next subsections describe these methods.
|
||||
|
||||
## Replace a Single Operation with a Sub-graph of Operations
|
||||
|
||||
For example, there is an operation `SquaredDifference` in TensorFlow which calculates \f$(a - b)^2\f$, where \f$a\f$ and \f$b\f$ are input tensors. Inference Engine does not support such operation. However, `SquaredDifference` could be expressed using two `Power` operations and one `Eltwise Add`. The `Power` operation calculates \f$scale * (a ^ {power}) + shift\f$, where \f$a\f$ is a tensor and \f$scale\f$, \f$power\f$ and \f$shift\f$ are float values. The first `Power` operation negates the value of tensor \f$b\f$. The second one is used to square the result of \f$a + (- b)\f$ which is calculated using the `Eltwise Add` operation applied to tensor \f$a\f$ and tensor \f$-b\f$.
|
||||
|
||||
Given that, we can replace all `SquaredDifference` operations in the initial model with two `Power` and one `Eltwise` operations. The replacer is implemented in the following file `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/SquaredDifference.py`.
|
||||
```python
|
||||
import networkx as nx
|
||||
from mo.front.common.replacement import FrontReplacementOp
|
||||
from mo.graph.graph import Node
|
||||
from mo.ops.eltwise import Eltwise
|
||||
from mo.ops.power import Power
|
||||
class SquaredDifference(FrontReplacementOp):
|
||||
"""
|
||||
Example class illustrating how to implement replacement of a single op in the front-end of the MO pipeline.
|
||||
This class replaces a single op SquaredDifference by a sub-graph consisting of 3 lower-level ops.
|
||||
"""
|
||||
op = "SquaredDifference"
|
||||
enabled = True
|
||||
def replace_op(self, graph: nx.MultiDiGraph, node: Node):
|
||||
negate = Power(graph, dict(scale=-1, name=node.name + '/negate_'))
|
||||
add = Eltwise(graph, dict(operation='sum', name=node.name + '/add_'))
|
||||
squared = Power(graph, dict(power=2, name=node.name + '/squared_'))
|
||||
out_node = squared.create_node([add.create_node([node.in_node(0), negate.create_node([node.in_node(1)])])])
|
||||
# Replace edge from out port 0 of the matched node with a edge from node out_node.id with port 0.
|
||||
# The "explicit" version of the return value is: [(out_node.id, 0)])
|
||||
return [out_node.id]
|
||||
```
|
||||
Model Optimizer internal representation of the graph uses the networkx module.
|
||||
|
||||
**Key lines**:
|
||||
|
||||
* Line 1: Imports this module.
|
||||
|
||||
* Line 3: Imports class `FrontReplacementOp` that is used to replace operation of particular type with a new sub-graph. This class performs the first step of the sub-graph replacement (identifies an existing sub-graph for replacement). It is important to mention that the replacement happens before shape inference and creation of data nodes representing tensors with values. At this stage of model conversion pipeline, all nodes in the graph are operation nodes or nodes of type `Const` that produce tensor with fixed value embedded into the node.
|
||||
|
||||
* Line 4: Imports class `Node` representing a single node in the computation graph.
|
||||
|
||||
* Lines 5 - 6: Import classes representing operations `Power` and `Eltwise`. These classes are inherited from base class `mo.ops.Op` that represents operation and stores its attributes.
|
||||
|
||||
* Line 9: Defines class `SquaredDifference` inherited from `FrontReplacementOp`. This is a replacer class that is automatically registered and executed by Model Optimizer. Since the class is located in the common (not framework) specific directory `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front`, it is used for replacement for all supported frameworks.
|
||||
|
||||
* Line 15: Defines the class variable `op` that stores the name of the operation to be replaced. In this case, it is `SquaredDifference`.
|
||||
|
||||
* Line 16: Defines class variable `enabled` that controls whether the replacer is enabled or not. The only function that should be implemented in the class is `replace_op`. It gets graph to operate on and an instance of node of desired operation (`SquaredDifference` in this case). This function performs step two and three of the sub-graph replacement (generates a new sub-graph to replace with and connects a new sub-graph to the graph).
|
||||
|
||||
* Lines 19 - 21: Create instances of operations classes with required attributes.
|
||||
|
||||
* Line 23: Creates a sub-graph from the operations defined above. The `create_node` method of the `Op` class generates `Node` from the `Op` and uses single mandatory argument - the list of input nodes (represented as instances of `Node` class) to create input edges to the node being generated. Inputs of the `SquaredDifference` node are retrieved using `node.in_node(0)` and `node.in_node(1)` method calls. The `Eltwise Add` node gets first input as initial first input of `SquaredDifference` node, the second input of `add` is the result of negation of the second input of `SquaredDifference` node: `[add.create_node([node.in_node(0), negate.create_node([node.in_node(1)])])]`. Then the result of `Add` node is squared. `out_node` node performs this calculation.
|
||||
|
||||
The `replace_op` function returns a list of node names used to create output edges of the sub-graph to connect it with the rest of the graph. Each element of the list describes mapping between old output edge of the matched node and new sub-graph node and output edge index. The i-th element of the list corresponds to the i-th output tensor of the matched node. In this case, `SquaredDifference` produces single tensor through output port 0, so the returned list contains single element. In general, each element is a tuple, where the first element is the name of a new node producing required tensor and the second is the output port for that tensor. If the output port is 0, it is possible to use shortcut - just the name of the node instead of a tuple. Line 26 uses this shortcut. The returned value is used to create the new sub-graph output edges (step 4 of the sub-graph replacement).
|
||||
|
||||
Default implementation of the `FrontReplacementOp` class removes matched node and all its input/output edges (step 5 of the sub-graph replacement).
|
||||
|
||||
Another example of such kind of replacement is in the `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/Sub.py` class where all instances of `Sub` operations are replaced with two operations: `Power` to negate the second argument and the `Eltwise` to perform elementwise add.
|
||||
|
||||
## Replace Sub-graph of Operations with a New Sub-graph of Operations
|
||||
|
||||
The previous example considered situation when one single node of a specific type is replaced. When it is necessary to replace a sub-graph of operations it is necessary to tell Model Optimizer how to identify this sub-graph. There are three ways to achieve that:
|
||||
|
||||
* Use graph isomorphism pattern of the networkx module
|
||||
|
||||
* Use nodes name pattern to identify `scope` (according to TensorFlow terminology) to be replaced
|
||||
|
||||
* Use sets of `start` and `end` node names to match all nodes "between" them
|
||||
|
||||
The next sections explain each option using real examples.
|
||||
|
||||
### Replace Sub-graph of Operations Using Graph Isomorphism Pattern <a name="replace-using-isomorphism-pattern"></a>
|
||||
|
||||
networkx Python\* module provides methods to find graph isomorphic to the given one using nodes and edges match: for example, `networkx.algorithms.isomorphism.categorical_node_match`, `networkx.algorithms.isomorphism.categorical_multiedge_match`. Model Optimizer uses these methods and provides simple API to use that feature.
|
||||
|
||||
For example, the Caffe\* has layer called [Mean-Variance Normalization (MVN)](http://caffe.berkeleyvision.org/tutorial/layers/mvn.html), which is also supported by the Inference Engine. This layer is implemented with low-level operations in TensorFlow: `Mean`, `StopGradient`, `SquaredDifference`, `Squeeze` and `FusedBatchNorm`. Model Optimizer should replace sub-graph with these operations with a single Inference Engine layer of type `MVN`.
|
||||
|
||||
The file `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/tf/mvn.py` performs such a replacement. The first part of the file is:
|
||||
```python
|
||||
class MVN(FrontReplacementSubgraph):
|
||||
enabled = True
|
||||
def pattern(self):
|
||||
log.debug('Enabled MVN replacement')
|
||||
return dict(
|
||||
nodes=[
|
||||
('mean', dict(op='Mean')),
|
||||
('stop_grad', dict(op='StopGradient')),
|
||||
('sqdiff', dict(op='SquaredDifference')),
|
||||
('variance', dict(op='Mean')),
|
||||
('squeeze_mean', dict(op='Squeeze')),
|
||||
('squeeze_variance', dict(op='Squeeze')),
|
||||
('fbn', dict(op='FusedBatchNorm')),
|
||||
],
|
||||
edges=[
|
||||
('mean', 'stop_grad', {'in': 0}),
|
||||
('stop_grad', 'sqdiff', {'in': 1}),
|
||||
('sqdiff', 'variance', {'in': 0}),
|
||||
('mean', 'squeeze_mean', {'in': 0}),
|
||||
('variance', 'squeeze_variance', {'in': 0}),
|
||||
('squeeze_mean', 'fbn', {'in': 3}),
|
||||
('squeeze_variance', 'fbn', {'in': 4}),
|
||||
],
|
||||
node_attrs=['op'],
|
||||
edge_attrs=['in'])
|
||||
```
|
||||
**Key lines**:
|
||||
|
||||
* Line 1: Defines class `MVN` inherited from class `FrontReplacementSubgraph` that performs sub-graph replacement using sub-graph isomorphism pattern.
|
||||
|
||||
* Line 3: Sets class variable `enabled` to value True meaning that this replacer is enabled.
|
||||
|
||||
* The function `pattern` defines the sub-graph constraints to be matched. It returns a dictionary with four keys:
|
||||
|
||||
* the `nodes` defines a list of nodes to be matched. Each element in the list is a tuple. The first element is the alias name assigned for the matched node, the second element is a dictionary with desired attributes of the node.
|
||||
|
||||
* the `edges` defines a list of edges to be matched. Each element in the list is a tuple. The first and the second elements are the start and end edge nodes alias names respectively. The third element is a dictionary with desired edge attributes.
|
||||
|
||||
* the `node_attrs` contains the names of nodes attributes to use during sub-graph isomorphism search.
|
||||
|
||||
* the `edge_attrs` contains the names of edges attributes to use during sub-graph isomorphism search.
|
||||
|
||||
The sub-graph is matched if all provided constraints are satisfied. If at least one node with desired attributes is missing or at least one defined edge is absent, the sub-graph is not matched.
|
||||
* Line 9: Adds constraint that sub-graph should contain node with attribute `op` with value `Mean`. The matched node gets an alias name `mean`. The same way the line 10 add constrain for node `StopGradient`, the matched node gets an alias name `stop_grad`.
|
||||
|
||||
* Line 18: Defines edge from node with alias name `mean` to node with alias name `stop_grad` having attribute `in` equal to 0. This means that the output of node `mean` is connected to the node `stop_grad` as a first input (Model Optimizer uses zero-based indexing that is why `in` is 0). Another example of defining the edges constraints is in line 25 where the edge from `squeeze_mean` is connected to the `fbn` node as fourth input.
|
||||
|
||||
* Lines 26 - 27: Specify a list of attributes to be checked. In fact, these lists are just list of all keys in the dictionaries for node and edge attributes.
|
||||
|
||||
Now when the Model Optimizer knows how to find sub-graph (step 1 of the sub-graph replacement), it is necessary to implement function that will perform actual sub-graph replacement (step 2 and 3). The code for this function is:
|
||||
```python
|
||||
def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
|
||||
fbn = match['fbn']
|
||||
input = fbn.in_node(0)
|
||||
log.debug('Found potential MVN pattern after {} with name {}'.format(input.op, input.name))
|
||||
if input.id != match['mean'].in_node(0).id or input.id != match['sqdiff'].in_node(0).id:
|
||||
return
|
||||
log.debug('Confirmed MVN pattern after {} with name {}'.format(input.op, input.name))
|
||||
MVN = Op.get_op_class_by_name('MVN')
|
||||
mvn = MVN(graph, dict(
|
||||
name=fbn.name + '/MVN_',
|
||||
eps=fbn.eps,
|
||||
required_reduction_indices=[1,2] if fbn.data_format == b'NHWC' else [2,3]
|
||||
))
|
||||
mvn.attrs['old_infer'] = mvn.attrs['infer']
|
||||
mvn.attrs['infer'] = __class__.infer
|
||||
mul = Eltwise(graph, dict(operation='mul', name=fbn.name + '/Mul_'))
|
||||
add = Eltwise(graph, dict(operation='sum', name=fbn.name + '/Add_'))
|
||||
input_gamma = fbn.in_node(1)
|
||||
input_beta = fbn.in_node(2)
|
||||
mean_reduction = match['mean'].in_node(1)
|
||||
variance_reduction = match['mean'].in_node(1)
|
||||
new_subgraph = add.create_node([
|
||||
mul.create_node([
|
||||
mvn.create_node([input, mean_reduction, variance_reduction]),
|
||||
input_gamma
|
||||
]),
|
||||
input_beta
|
||||
])
|
||||
replace_node(fbn, new_subgraph)
|
||||
```
|
||||
The function accepts two arguments - the graph and the dictionary `match`. The keys in the dictionary are the alias names of matched nodes (defined in the `nodes` list in the function `pattern`) and the values are the matched node of the graph (the instance of Node object).
|
||||
|
||||
The function generates new sub-graph with node of type `MVN` and two nodes of the type `Eltwise` calculating sum and product. There is nothing interesting in how the graph is generated and mathematics behind that, so attention will be put to two aspects of this function.
|
||||
|
||||
The first one is the call to function `replace_node` in line 36. `FusedBatchNorm` node is replaced with the output node of the generated sub-graph: all input edges of the `FusedBatchNorm` node are re-connected to the `new_subgraph` node, all consumers of the `FusedBatchNorm` node are updated to get inputs from the `new_subgraph` node. This action connects newly generated sub-graph with an existing graph (step 4 of the sub-graph replacement).
|
||||
|
||||
The second one is that the default implementation of the inference function for `MVN` operation is overwritten. In line 16, the default implementation of the inference function for `MVN` is saved to attribute `old_infer`. In line 17, the new inference function is saved to the instance of the `MVN` operation class. The new inference function code looks the following way:
|
||||
```python
|
||||
@staticmethod
|
||||
def infer(node: Node):
|
||||
if not(node.in_node(1).has_valid('value') and node.in_node(2).has_valid('value')):
|
||||
log.warning('Reduction indices for mean and variance for MVN node {} are not constants'.format(node.name))
|
||||
return
|
||||
if not(all(node.in_node(1).value == node.required_reduction_indices) and
|
||||
all(node.in_node(2).value == node.required_reduction_indices)):
|
||||
log.warning('Reduction indices for mean {} and variance {} do not match required ones {}'.format(
|
||||
node.in_node(1).value,
|
||||
node.in_node(2).value,
|
||||
node.required_reduction_indices
|
||||
))
|
||||
return
|
||||
node.graph.remove_edge(node.in_node(1).id, node.id)
|
||||
node.graph.remove_edge(node.in_node(2).id, node.id)
|
||||
node.old_infer(node)
|
||||
```
|
||||
The `infer` function is needed to infer value of the node (if it is possible) and to infer shapes of the output tensors of the node (mandatory). The custom `infer` function performs additional checks that describe limitations of the `MVN` layer implementation in the Inference Engine. For example, reduction indices for mean and variance must be constants (line 10), while in TensorFlow they could be computed during model inference. In addition, the function removes two edges from the graph (lines 17 and 18) because all required information is already stored in the `MVN` node attributes. This is due to different `MVN` layer implementation in Inference Engine and TensorFlow\*: `mean` and `variance` are attributes of the node in Inference Engine while in TensorFlow they are input tensors. Edges are not removed in the `replace_sub_graph` function, because these edges are used in the `infer` function (lines 7-12).
|
||||
|
||||
The last action in the `infer` method (line 19) is to call default infer function for the `MVN`, which is saved in the attribute `old_infer` of the node to infer output tensors shapes.
|
||||
|
||||
On the step 5 of the sub-graph replacement, six matching nodes are automatically removed during the dead code elimination pass that is performed after applying of custom sub-graph replacements defined. Six matching nodes are no more connected to the inputs of the network after replacing node `fbn` with a newly created sub-graph node. Since they are not marked as output nodes (using `--output` command line parameter), they could be removed.
|
||||
|
||||
The replacement works for all sub-graph isomorphism instances found in the network.
|
||||
|
||||
### Replace Sub-graph of Operations Using Nodes Name Pattern
|
||||
|
||||
TensorFlow uses a mechanism of scope to group related operation nodes. It is a good practice to put nodes performing particular task into the scope. This approach divides a graph into logical blocks that are easier to review in TensorBoard\*. The `scope`, in fact, just defines a common prefix for the node names in the scope.
|
||||
|
||||
For example, Inception topologies contain several types of so-called "Inception blocks". Some of them are exactly equal to each other, but located in different places of the network. For example, Inception V4 from `tensorflow.contrib.slim` module has inception blocks `Mixed_5b`, `Mixed_5c` and `Mixed_5d` with exactly the same nodes with the same attributes.
|
||||
|
||||
Now consider situation when someone implemented these Inception blocks extremely efficiently using single Inference Engine custom layer called `InceptionBlock` and would like to replace these blocks with instances of the layer to decrease inference time. Model Optimizer provides mechanism to replace sub-graph of operations defined by the regular expressions for the node names prefixes (scope). In this particular case, some of the patterns are: `.*InceptionV4/Mixed_5b`, `.*InceptionV4/Mixed_5c` and `.*InceptionV4/Mixed_5d`. Each pattern starts with `.*`, because a prefix `InceptionV4` is added to all nodes names during a model freeze.
|
||||
|
||||
The sub-graph replacement using nodes name pattern is a bit trickier than replacements of single operation and networkx isomorphism pattern described above. You should do the following additional steps in comparison with previously described replacements:
|
||||
|
||||
1. Prepare configuration file template defining node names patterns and information about custom layer attributes.
|
||||
|
||||
2. Run Model Optimizer with command line parameter to add information about input and output nodes of the specified sub-graphs.
|
||||
|
||||
Consider the following possible configuration file for the Inception Block replacer:
|
||||
```json
|
||||
[
|
||||
{
|
||||
"custom_attributes": {
|
||||
"attr1_key": "attr1_value",
|
||||
"attr2_key": 123456
|
||||
},
|
||||
"id": "InceptionBlockReplacer",
|
||||
"op": "InceptionBlock",
|
||||
"instances": [
|
||||
".*InceptionV4/Mixed_5b",
|
||||
".*InceptionV4/Mixed_5c",
|
||||
".*InceptionV4/Mixed_5d"
|
||||
],
|
||||
"match_kind": "scope"
|
||||
}
|
||||
]
|
||||
```
|
||||
The `.json` file contains list of dictionaries. Each dictionary defines one replacement. Each replacement is defined with several keys:
|
||||
|
||||
* `id` (mandatory) is a unique identifier of the replacer. It is used in the Python\* code that implements sub-graph replacement to link the class and the replacement description from the configuration file.
|
||||
|
||||
* `match_kind` (mandatory) is a string that specifies what matching algorithm is used. Currently supported `scope` and `points`. In this example, the first one is considered. The `points` match kind is described below.
|
||||
|
||||
* `instances` (mandatory) specifies instances of the sub-graph to be matched. It contains a list of node names prefixes patterns for the match kind `scope`.
|
||||
|
||||
* `custom_attributes` (optional) is a dictionary with static attributes of the layer to be dumped to Inference Engine Intermediate Representation `.xml` file.
|
||||
|
||||
* `op` (optional) is used only if the sub-graph replacement Python code is not needed, because the sub-graph should be replaced with a single node of type `op`. If this attribute is not set, it is necessary to implement Python code with sub-graph generation code. Both options are considered in this example.
|
||||
|
||||
When the configuration file is ready, run the Model Optimizer with regular command line parameters pointing to the file with model and input shapes (if necessary) and additional parameter `--tensorflow_custom_operations_config_update` pointing to the generated configuration file. If the file is correct, Model Optimizer adds two keys to the `InceptionBlockReplacer` dictionary: `inputs` and `outputs` with the following content:
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": "InceptionBlockReplacer",
|
||||
...
|
||||
"inputs": [
|
||||
[
|
||||
{
|
||||
"node": "Branch_2/Conv2d_0a_1x1/Conv2D$",
|
||||
"port": 0
|
||||
},
|
||||
{
|
||||
"node": "Branch_3/AvgPool_0a_3x3/AvgPool$",
|
||||
"port": 0
|
||||
},
|
||||
{
|
||||
"node": "Branch_1/Conv2d_0a_1x1/Conv2D$",
|
||||
"port": 0
|
||||
},
|
||||
{
|
||||
"node": "Branch_0/Conv2d_0a_1x1/Conv2D$",
|
||||
"port": 0
|
||||
}
|
||||
]
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"node": "concat$",
|
||||
"port": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
The value for key `inputs` is a list of lists describing input tensors of the sub-graph. Each element of the top-level list corresponds to one unique input tensor of the sub-graph. Each internal list describes a list of nodes consuming this tensor and port numbers where the tensor is consumed. Model Optimizer generates regular expressions for the input nodes names to uniquely identify them in each instance of the sub-graph defined by the `instances`. Denote these nodes as input nodes of the sub-graph.
|
||||
|
||||
In the InceptionV4 topology, the `InceptionV4/Mixed_5b` block has four input tensors from outside of the sub-graph, but all of them are produced by the node `InceptionV4/Mixed_5a/concat`. Therefore, the top-level list of the `inputs` contains one list corresponding to this tensor. Four input nodes of the sub-graph consume the tensor produced by `InceptionV4/Mixed_5a/concat` node. In this case, all four input nodes consume input tensor into port 0.
|
||||
|
||||
The order of items in the internal list describing nodes does not matter, but the order of elements in the top-level list is important. This order defines the order in which the Model Optimizer attaches input tensors to a new generated node if the sub-graph is replaced with a single node. The i-th input node of the sub-graph is obtained using call `match.single_input_node(i)` in the sub-graph replacer code. More information about API is given below. If you need to change the order of input tensors, you can edit the configuration file in the text-editor.
|
||||
|
||||
The value for the key `outputs` is a list describing nodes of the sub-graph producing tensor that goes outside of the sub-graph or does not have child nodes. Denote these nodes as output nodes of the sub-graph. The order of elements in the list is important. The i-th element of the list describes the i-th output tensor of the sub-graph, which could be obtained using call `match.output_node(i)`. The order of elements can be manually changed in the configuration file. Model Optimizer uses this order to connect output edges if the sub-graph is replaced with a single node.
|
||||
|
||||
Now, when meaning of `inputs` and `outputs` attributes is clean, return back to the replacer implementation. The replacer `InceptionBlockReplacer` contains attribute `op` with the value `InceptionBlock`, which means that the identified sub-graph should be replaced with a single layer of type `InceptionBlock`. This layer is not known for the Model Optimizer, so it is necessary to define it. See [Extending the Model Optimizer with New Primitives](Extending_Model_Optimizer_with_New_Primitives.md). You must create file `extension/ops/InceptionBlock.py` with the following content:
|
||||
```python
|
||||
import numpy as np
|
||||
from mo.graph.graph import Node
|
||||
from mo.ops.op import Op
|
||||
class InceptionBlock(Op):
|
||||
op = "InceptionBlock"
|
||||
enabled = True
|
||||
def __init__(self, graph, attrs):
|
||||
super().__init__(graph, attrs, {
|
||||
'type': __class__.op,
|
||||
'op': __class__.op,
|
||||
})
|
||||
```
|
||||
The shape inference function is not defined. In this case, Model Optimizer uses TensorFlow fallback to calculate shapes of the sub-graph output tensors.
|
||||
|
||||
Run the Model Optimizer with the regular command line parameters, path to the model file and input shape (if necessary), and the parameter `--tensorflow_use_custom_operations_config` and point to the created configuration file. Model Optimizer generates Intermediate Representation `.xml` file with three sequential layers of type `InceptionBlock` like in the following example:
|
||||
```xml
|
||||
<layer id="1658" name="InceptionBlock1877" precision="FP32" type="InceptionBlock">
|
||||
<input>
|
||||
<port id="0">
|
||||
<dim>1</dim>
|
||||
<dim>384</dim>
|
||||
<dim>35</dim>
|
||||
<dim>35</dim>
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="1">
|
||||
<dim>1</dim>
|
||||
<dim>384</dim>
|
||||
<dim>35</dim>
|
||||
<dim>35</dim>
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
```
|
||||
The implementation of the sub-graph replacement by scope with a single layer is complete. The next subsection explains
|
||||
how Model Optimizer replaces sub-graph identified by start/end nodes (`points`) with another sub-graph.
|
||||
|
||||
### <a name="sub_graph_replacement_using_points"></a> Replace Sub-graph of Operations Using Points
|
||||
In this scenario, for the matching algorithm user defines the sub-graph via a set of "start" and "end" nodes.
|
||||
Given the set, the Model Optimizer performs the following steps:
|
||||
1. Starts graph traversal from every _start_ nodes following the direction of the graph edges.
|
||||
The search stops in _end_ nodes or in case of nodes without further children. All visited nodes are added to the matched sub-graph.
|
||||
2. Starts another graph traversal from each non-start node of the sub-graph, i.e. every node except nodes from "start" set.
|
||||
In this step the edges are traversed in the opposite edge direction. All newly visited nodes are added to the
|
||||
matched sub-graph. This step is needed to add nodes required for calculation values of internal nodes of the
|
||||
matched sub-graph.
|
||||
3. Checks that all "end" nodes were reached from "input" nodes. If no then exit with error.
|
||||
4. Check that there are no "Placeholder" operations among added nodes. If it is not true then some side branch of
|
||||
the sub-graph (added in step 2) depends on inputs of the network. Such configuration is not correct so exit with error.
|
||||
|
||||
This algorithm finds all nodes "between" start and end nodes. Also nodes needed for calculation of non-input nodes of the
|
||||
matched sub-graph produce _constant_ values because they do not depend on input of the network.
|
||||
**This sub-graph match has a limitation that each start node must have only one input**. Therefore, it is not possible
|
||||
to specify, for example, convolution node as input because it has two inputs: data tensor and tensor with weights.
|
||||
|
||||
For example of replacement with points, please refer to the case-study of the
|
||||
[conversion for the SSD models, created with TensorFlow Object Detection API](TensorFlow_SSD_ObjectDetection_API.md).
|
||||
|
||||
@@ -0,0 +1,449 @@
|
||||
# Converting Faster R-CNN models, created with TensorFlow Object Detection API {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_TensorFlow_Faster_RCNN_ObjectDetection_API}
|
||||
|
||||
This is a deprecated page. Please, consider reading [this](../convert_model/tf_specific/Convert_Object_Detection_API_Models.md) page describing new approach to convert Object Detection API models giving closer to TensorFlow inference results.
|
||||
|
||||
## Converting models created with TensorFlow Object Detection API version equal or higher than 1.6.0
|
||||
This chapter describes how to convert selected Faster R-CNN models from the TensorFlow Object Detection API zoo version equal or higher than 1.6.0. The full list of supported models is provided in the table below. Note that currently batch size 1 is supported only. The only Inference Engine plugin supporting these topologies inference is CPU.
|
||||
|
||||
The Faster R-CNN models contain several building blocks similar to building blocks from SSD models so it is highly recommended to read chapter about [enabling TensorFlow Object Detection API SSD models](TensorFlow_SSD_ObjectDetection_API.md) first. Detailed information about Faster R-CNN topologies is provided [here](https://arxiv.org/abs/1506.01497).
|
||||
|
||||
The TensorFlow network consists of a number of big blocks grouped by scope:
|
||||
|
||||
* `Preprocessor` performs scaling/resizing of the image and converts input data to [0, 1] interval. Has two outputs: the first one is modified input image and the second one is a constant tensor with shape (batch_size, 3) and values (resized_image_height, resized_image_width, 3).
|
||||
|
||||
* `FirstStageFeatureExtractor` is a backbone feature extractor.
|
||||
|
||||
* `FirstStageBoxPredictor` calculates boxes and classes predictions.
|
||||
|
||||
* `GridAnchorGenerator` generates anchors coordinates.
|
||||
|
||||
* `ClipToWindow` crops anchors to the resized image size.
|
||||
|
||||
* `Decode` decodes coordinates of boxes using anchors and data from the `FirstStageBoxPredictor`.
|
||||
|
||||
* `BatchMultiClassNonMaxSuppression` performs non maximum suppression.
|
||||
|
||||
* `map` scales coordinates of boxes to [0, 1] interval by dividing coordinates by (resized_image_height, resized_image_width).
|
||||
|
||||
* `map_1` scales coordinates from [0, 1] interval to resized image sizes.
|
||||
|
||||
* `SecondStageFeatureExtractor` is a feature extractor for predicted Regions of interest (ROIs).
|
||||
|
||||
* `SecondStageBoxPredictor` refines box coordinates according `SecondStageFeatureExtractor`.
|
||||
|
||||
* `SecondStagePostprocessor` is Detection Output layer performing final boxes predictions.
|
||||
|
||||
### Sub-graph replacements
|
||||
There are three sub-graph replacements defined in the `extensions/front/tf/legacy_faster_rcnn_support.json` used to convert these models:
|
||||
|
||||
* the first one replaces the `Preprocessor` block. The implementation of this replacer is in the `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/tf/Preprocessor.py`
|
||||
|
||||
* the second one replaces a number of blocks in the the graph including `GridAnchorGenerator`, `ClipToWindow`, `Decode`, `BatchMultiClassNonMaxSuppression`, `Tile`, `Tile_1` and `map` with Proposal and ROIRooling layers and some additional layers to pre-process input data
|
||||
|
||||
* the third one replaces `SecondStagePostprocessor` with a DetectionOutput layer.
|
||||
|
||||
The second replacer is defined using the following configuration that matches sub-graph by points:
|
||||
|
||||
```json
|
||||
{
|
||||
"custom_attributes": {
|
||||
"nms_threshold": 0.7,
|
||||
"feat_stride": 16,
|
||||
"max_proposals": 100,
|
||||
"anchor_base_size": 256,
|
||||
"anchor_scales": [0.25, 0.5, 1.0, 2.0],
|
||||
"anchor_aspect_ratios": [0.5, 1.0, 2.0],
|
||||
"roi_spatial_scale": 0.0625
|
||||
},
|
||||
"id": "TFObjectDetectionAPIFasterRCNNProposalAndROIPooling",
|
||||
"include_inputs_to_sub_graph": true,
|
||||
"include_outputs_to_sub_graph": true,
|
||||
"instances": {
|
||||
"end_points": [
|
||||
"CropAndResize",
|
||||
"map_1/TensorArrayStack/TensorArrayGatherV3",
|
||||
"map_1/while/strided_slice/Enter",
|
||||
"BatchMultiClassNonMaxSuppression/map/TensorArrayStack_4/TensorArrayGatherV3"
|
||||
],
|
||||
"start_points": [
|
||||
"FirstStageBoxPredictor/concat",
|
||||
"FirstStageBoxPredictor/concat_1",
|
||||
"GridAnchorGenerator/Identity",
|
||||
"Shape",
|
||||
"CropAndResize"
|
||||
]
|
||||
},
|
||||
"match_kind": "points"
|
||||
}
|
||||
```
|
||||
|
||||
The `start_points` list contains the following nodes:
|
||||
|
||||
* `FirstStageBoxPredictor/concat` node produces box coordinates predictions.
|
||||
|
||||
* `FirstStageBoxPredictor/concat_1` node produces classes predictions which will be used for the ROIs
|
||||
|
||||
* `GridAnchorGenerator/Identity` node produces anchors coordinates.
|
||||
|
||||
* `Shape` and `CropAndResize` nodes are specified as inputs to correctly isolate the required sub-graph. Refer to the [chapter](Subgraph_Replacement_Model_Optimizer.md) for more information about replacements by points.
|
||||
|
||||
The `end_points` list contains the following nodes:
|
||||
|
||||
* `CropAndResize` is the node that performs ROI pooling operation.
|
||||
|
||||
* `map_1/TensorArrayStack/TensorArrayGatherV3`, `map_1/while/strided_slice/Enter` and `BatchMultiClassNonMaxSuppression/map/TensorArrayStack_4/TensorArrayGatherV3` are specified to correctly isolate the sub-graph.
|
||||
|
||||
The `custom_attributes` dictionary contains attributes where most values are taken from the topology-specific configuration file `samples/configs/faster_rcnn_*.config` of the [TensorFlow Object Detection API repository](https://github.com/tensorflow/models/tree/master/research/object_detection):
|
||||
|
||||
* `nms_threshold` is the value of the `first_stage_nms_iou_threshold` parameter.
|
||||
|
||||
* `feat_stride` is the value of the `height_stride` and `width_stride` parameters. Inference Engine supports case when these two values are equal that is why the replacement configuration file contains just one parameter.
|
||||
|
||||
* `max_proposals` is the value of the `max_total_detections` parameter which is a maximum number of proposal boxes from the Proposal layer and detected boxes.
|
||||
|
||||
* `anchor_base_size` is the base size of the generated anchor. The 256 is the default value for this parameter and it is not specified in the configuration file.
|
||||
|
||||
* `anchor_scales" is the value of the `scales` attrbite.
|
||||
|
||||
* `anchor_aspect_ratios` is the value of the `aspect_ratios` attribute.
|
||||
|
||||
* `roi_spatial_scale` is needed for the Inference Engine ROIPooling layer. It is the default value that is not actually used.
|
||||
|
||||
The identifier for this replacer is `TFObjectDetectionAPIFasterRCNNProposalAndROIPooling`. The Python implementation of this replacer is in the file `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/tf/FasterRCNNs.py`.
|
||||
|
||||
The first four functions of the replacer class are the following:
|
||||
|
||||
```python
|
||||
class TFObjectDetectionAPIFasterRCNNProposalAndROIPooling(FrontReplacementFromConfigFileSubGraph):
|
||||
"""
|
||||
This class replaces sub-graph of operations with Proposal and ROIPooling layers and additional layers transforming
|
||||
tensors from layout of TensorFlow to layout required by Inference Engine.
|
||||
Refer to comments inside the function for more information about performed actions.
|
||||
"""
|
||||
replacement_id = 'TFObjectDetectionAPIFasterRCNNProposalAndROIPooling'
|
||||
|
||||
def run_after(self):
|
||||
return [PreprocessorReplacement]
|
||||
|
||||
def run_before(self):
|
||||
return [SecondStagePostprocessorReplacement]
|
||||
|
||||
def output_edges_match(self, graph: nx.DiGraph, match: SubgraphMatch, new_sub_graph: dict):
|
||||
return {match.output_node(0)[0].id: new_sub_graph['roi_pooling_node'].id}
|
||||
|
||||
def nodes_to_remove(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
|
||||
new_list = match.matched_nodes_names().copy()
|
||||
# do not remove nodes that produce box predictions and class predictions
|
||||
new_list.remove(match.single_input_node(0)[0].id)
|
||||
new_list.remove(match.single_input_node(1)[0].id)
|
||||
return new_list
|
||||
```
|
||||
|
||||
The function `run_after` returns list of Python classes inherited from one of the replacer classes (`FrontReplacementOp`, `FrontReplacementPattern`, `FrontReplacementFromConfigFileSubGraph` etc) those current sub-graph replacement class must be run after. In this case the replacer must be run after the `Preprocessor` is removed by the `PreprocessorReplacement` replacer. Similar way the `run_before` function is used to tell Model Optimizer to execute `SecondStagePostprocessorReplacement` before this replacer.
|
||||
|
||||
The `output_edges_match` function describes matching between the output nodes of the sub-graph before replacement and after. In this case the only needed output node of the sub-graph is the `CropAndResize` node which is identified with `match.output_node(0)[0]`. The new output node which is created in the `generate_sub_graph` function is identified with `new_sub_graph['roi_pooling_node']`.
|
||||
|
||||
The `nodes_to_remove` function takes the default list of nodes to be removed which contains all matched nodes and remove from them two input nodes which are identified with `match.single_input_node(0)[0]` and `match.single_input_node(1)[0]`. These nodes will be connected as inputs to new nodes being generated in the `generate_sub_graph` function so they should node be removed.
|
||||
|
||||
The code generating new sub-graph is the following:
|
||||
|
||||
```python
|
||||
def generate_sub_graph(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
|
||||
log.debug('TFObjectDetectionAPIFasterRCNNProposal: matched_nodes = {}'.format(match.matched_nodes_names()))
|
||||
|
||||
config_attrs = match.custom_replacement_desc.custom_attributes
|
||||
nms_threshold = config_attrs['nms_threshold']
|
||||
feat_stride = config_attrs['feat_stride']
|
||||
max_proposals = config_attrs['max_proposals']
|
||||
anchor_base_size = config_attrs['anchor_base_size']
|
||||
roi_spatial_scale = config_attrs['roi_spatial_scale']
|
||||
proposal_ratios = config_attrs['anchor_aspect_ratios']
|
||||
proposal_scales = config_attrs['anchor_scales']
|
||||
anchors_count = len(proposal_ratios) * len(proposal_scales)
|
||||
```
|
||||
|
||||
These lines get parameters defined in the sub-graph replacement configuration file and calculate initial anchors count.
|
||||
|
||||
```python
|
||||
# get the ROIPool size from the CropAndResize which performs the same action
|
||||
if 'CropAndResize' not in graph.nodes():
|
||||
raise Error('Failed to find node with name "CropAndResize" in the topology. Probably this is not Faster'
|
||||
' RCNN topology or it is not supported')
|
||||
roi_pool_size = Node(graph, 'CropAndResize').in_node(3).value[0]
|
||||
```
|
||||
|
||||
The code above gets the ROI Pooling spatial output dimension size as a value from the fourth argument of the node with name `CropAndResize`.
|
||||
|
||||
```python
|
||||
# Convolution/matmul node that produces classes predictions
|
||||
# Permute result of the tensor with classes permissions so it will be in a correct layout for Softmax
|
||||
predictions_node = match.single_input_node(1)[0].in_node(0).in_node(0)
|
||||
permute_predictions_op = Permute(graph, {'order': np.array([0, 2, 3, 1])})
|
||||
permute_predictions_node = permute_predictions_op.create_node([], dict(name=predictions_node.name + '/Permute_'))
|
||||
insert_node_after(predictions_node, permute_predictions_node, 0)
|
||||
|
||||
reshape_classes_op = Reshape(graph, {'dim': np.array([0, -1, 2])})
|
||||
reshape_classes_node = reshape_classes_op.create_node([permute_predictions_node],
|
||||
dict(name='Reshape_FirstStageBoxPredictor_Class_'))
|
||||
update_attrs(reshape_classes_node, 'shape_attrs', 'dim')
|
||||
|
||||
softmax_conf_op = Softmax(graph, {'axis': 1})
|
||||
softmax_conf_node = softmax_conf_op.create_node([reshape_classes_node],
|
||||
dict(name='FirstStageBoxPredictor_SoftMax_Class_'))
|
||||
```
|
||||
|
||||
The output with class predictions from the `FirstStageBoxPredictor` is generated with a convolution operation. The convolution output data layout in TensorFlow is NHWC while Inference Engine uses NCHW layout. Model Optimizer by default converts the weights of TensorFlow convolutions to produce output tensor in NCHW layout required by Inference Engine. The issue arises because the class predictions tensor is passed through the Softmax operation to produce class probabilities. The Inference Engine Softmax is performed over the fastest-changing dimension which is 'W' in Inference Engine. Thus, the softmax operation will be performed over a wrong dimension after conversion of the convolution node producing classes predicitions. The solution is to add Permute and Reshape operations to prepare the input data for Softmax. The Reshape operation is required to make the size of the fastest-changing dimension equal to 2, because there are 2 classes being predicted: background and foreground.
|
||||
|
||||
Another issue is that layout of elements in the predicted classes tensor is different between TensorFlow and Inference Engine Proposal layer requirements. In TensorFlow the tensor has the following virtual layout [N, H, W, num_anchors, num_classes] while the Inference Engine Proposal layer requires in the following virtual layout [N, num_classes, num_anchors, H, W]. Thus, it is necessary to reshape, permute and then reshape again output from the Softmax to the required shape for the Proposal layer:
|
||||
|
||||
```python
|
||||
reshape_softmax_op = Reshape(graph, {'dim': np.array([1, anchors_count, 2, -1])})
|
||||
reshape_softmax_node = reshape_softmax_op.create_node([softmax_conf_node], dict(name='Reshape_Softmax_Class_'))
|
||||
update_attrs(reshape_softmax_node, 'shape_attrs', 'dim')
|
||||
|
||||
permute_reshape_softmax_op = Permute(graph, {'order': np.array([0, 1, 3, 2])})
|
||||
permute_reshape_softmax_node = permute_reshape_softmax_op.create_node([reshape_softmax_node],
|
||||
dict(name='Permute_'))
|
||||
|
||||
# implement custom reshape infer function because we need to know the input convolution node output dimension
|
||||
# sizes but we can know it only after partial infer
|
||||
reshape_permute_op = Reshape(graph, {'dim': np.ones([4]), 'anchors_count': anchors_count,
|
||||
'conv_node': predictions_node})
|
||||
reshape_permute_op.attrs['old_infer'] = reshape_permute_op.attrs['infer']
|
||||
reshape_permute_op.attrs['infer'] = __class__.classes_probabilities_reshape_shape_infer
|
||||
reshape_permute_node = reshape_permute_op.create_node([permute_reshape_softmax_node],
|
||||
dict(name='Reshape_Permute_Class_'))
|
||||
update_attrs(reshape_permute_node, 'shape_attrs', 'dim')
|
||||
```
|
||||
|
||||
The Proposal layer has 3 inputs: classes probabilities, boxes predictions and a input shape of the image. The first two tensors are ready so it is necessary to create the Const operation that produces the desired third input tensor.
|
||||
|
||||
```python
|
||||
# create constant input with the image height, width and scale H and scale W (if present) required for Proposal
|
||||
const_value = np.array([[input_height, input_width, 1]], dtype=np.float32)
|
||||
const_op = Const(graph, dict(value=const_value, shape=const_value.shape))
|
||||
const_node = const_op.create_node([], dict(name='Proposal_const_image_size_'))
|
||||
```
|
||||
|
||||
Now add the Proposal layer:
|
||||
|
||||
```python
|
||||
|
||||
proposal_op = ProposalOp(graph, dict(min_size=10, framework='tensorflow', box_coordinate_scale=10,
|
||||
box_size_scale=5, post_nms_topn=max_proposals, feat_stride=feat_stride,
|
||||
ratio=proposal_ratios, scale=proposal_scales, base_size=anchor_base_size,
|
||||
pre_nms_topn=2**31 - 1,
|
||||
nms_thresh=nms_threshold))
|
||||
proposal_node = proposal_op.create_node([reshape_permute_node,
|
||||
match.single_input_node(0)[0].in_node(0).in_node(0),
|
||||
const_node],
|
||||
dict(name=proposal_op.attrs['type'] + '_'))
|
||||
```
|
||||
|
||||
The box coordinates in the TensorFlow are in the following layout "YXYX" while Inference Engine uses "XYXY" layout so it is necessary to swap coordinates produced by Proposal layer. It is implemented with help of a convolution node with a special filter of a size [5, 5]:
|
||||
|
||||
```python
|
||||
proposal_reshape_4d_op = Reshape(graph, {'dim': np.array([max_proposals, 1, 1, 5])})
|
||||
proposal_reshape_4d_node = proposal_reshape_4d_op.create_node([proposal_node], dict(name="reshape_4d_"))
|
||||
update_attrs(proposal_reshape_4d_node, 'shape_attrs', 'dim')
|
||||
|
||||
# create convolution node to swap X and Y coordinates in the proposals
|
||||
conv_filter_const_data = np.array(np.array([[1, 0, 0, 0, 0],
|
||||
[0, 0, 1, 0, 0],
|
||||
[0, 1, 0, 0, 0],
|
||||
[0, 0, 0, 0, 1],
|
||||
[0, 0, 0, 1, 0]],
|
||||
dtype=np.float32).reshape([1, 1, 5, 5]), dtype=np.float32)
|
||||
conv_filter_const_op = Const(graph, dict(value=conv_filter_const_data, spatial_dims=np.array([2, 3])))
|
||||
conv_filter_const_node = conv_filter_const_op.create_node([], dict(name="conv_weights"))
|
||||
|
||||
conv_op = Op(graph, {
|
||||
'op': 'Conv2D',
|
||||
'bias_addable': False,
|
||||
'spatial_dims': np.array([1, 2]),
|
||||
'channel_dims': np.array([3]),
|
||||
'batch_dims': np.array([0]),
|
||||
'pad': None,
|
||||
'pad_spatial_shape': None,
|
||||
'input_feature_channel': 2,
|
||||
'output_feature_channel': 2,
|
||||
'output_shape': [max_proposals, 1, 1, 5],
|
||||
'dilation': np.array([1, 1, 1, 1], dtype=np.int64),
|
||||
'stride': np.array([1, 1, 1, 1]),
|
||||
'type': 'Convolution',
|
||||
'group': None,
|
||||
'layout': 'NHWC',
|
||||
'infer': __class__.fake_conv_shape_infer})
|
||||
predictions_node = conv_op.create_node([proposal_reshape_4d_node, conv_filter_const_node], dict(name="conv_"))
|
||||
update_ie_fields(graph.node[predictions_node.id])
|
||||
|
||||
proposal_reshape_2d_op = Reshape(graph, {'dim': np.array([max_proposals, 5])})
|
||||
proposal_reshape_2d_node = proposal_reshape_2d_op.create_node([predictions_node], dict(name="reshape_2d_"))
|
||||
# set specific name for this Reshape operation so we can use it in the DetectionOutput replacer
|
||||
proposal_reshape_2d_node['name'] = 'swapped_proposals'
|
||||
```
|
||||
|
||||
The ROIPooling layer in TensorFlow is implemented with operation called `CropAndResize` with bi-linear filtration. Inference Engine implementation of the ROIPooling layer with bi-linear filtration requires input boxes coordinates be scaled to [0, 1] interval. Adding elementwise multiplication of box coordinates solves this issue:
|
||||
|
||||
```python
|
||||
# the TF implementation of Proposal with bi-linear filtration need proposals scaled by image size
|
||||
proposal_scale_const = np.array([1.0, 1 / input_height, 1 / input_width, 1 / input_height, 1 / input_width],
|
||||
dtype=np.float32)
|
||||
proposal_scale_const_op = Const(graph, dict(value=proposal_scale_const, shape=proposal_scale_const.shape))
|
||||
proposal_scale_const_node = proposal_scale_const_op.create_node([], dict(name='Proposal_scale_const_'))
|
||||
|
||||
scale_proposals_op = Eltwise(graph, {'operation': 'mul'})
|
||||
scale_proposals_node = scale_proposals_op.create_node([proposal_reshape_2d_node, proposal_scale_const_node],
|
||||
dict(name='scale_proposals_'))
|
||||
```
|
||||
|
||||
The last step is to create the ROIPooling node with 2 inputs: the identified feature maps from the `FirstStageFeatureExtractor` and the scaled output of the Proposal layer:
|
||||
|
||||
```python
|
||||
feature_extractor_output_nodes = scope_output_nodes(graph, 'FirstStageFeatureExtractor')
|
||||
if len(feature_extractor_output_nodes) != 1:
|
||||
raise Error("Failed to determine FirstStageFeatureExtractor output node to connect it to the ROIPooling."
|
||||
"Found the following nodes: {}".format([node.name for node in feature_extractor_output_nodes]))
|
||||
|
||||
roi_pooling_op = ROIPooling(graph, dict(method="bilinear", framework="tensorflow",
|
||||
pooled_h=roi_pool_size, pooled_w=roi_pool_size,
|
||||
spatial_scale=roi_spatial_scale))
|
||||
roi_pooling_node = roi_pooling_op.create_node([feature_extractor_output_nodes[0], scale_proposals_node],
|
||||
dict(name='ROI_Pooling_'))
|
||||
|
||||
return {'roi_pooling_node': roi_pooling_node}
|
||||
```
|
||||
|
||||
The are two additional methods implemented in the replacer class:
|
||||
|
||||
* The `fake_conv_shape_infer` is a silly infer function for the convolution that permutes X and Y coordinates of the Proposal output which avoids setting a lot of internal attributes required for propoper shape inference.
|
||||
|
||||
* The "classes_probabilities_reshape_shape_infer" function is used to update the output dimension of the reshape operation. The output spatial dimensions depends on the convolution output spatial dimensions thus they are not known until the shape inference pass which is performed after this sub-graph replacement class. So this custom infer function is called instead of default Reshape shape inference function, updates the required attribute "dim" of the node with the convolution output spatial dimensions which are known at the time of calling this inference function and then call the default Reshape inference function.
|
||||
|
||||
```python
|
||||
@staticmethod
|
||||
def fake_conv_shape_infer(node: Node):
|
||||
node.out_node(0).shape = node.in_node(0).shape
|
||||
# call functions to update internal attributes required for correct IR generation
|
||||
mark_input_bins(node)
|
||||
assign_dims_to_weights(node.in_node(1), [0, 1], node.input_feature_channel, node.output_feature_channel, 4)
|
||||
|
||||
@staticmethod
|
||||
def classes_probabilities_reshape_shape_infer(node: Node):
|
||||
# now we can determine the reshape dimensions from Convolution node
|
||||
conv_node = node.conv_node
|
||||
conv_output_shape = conv_node.out_node().shape
|
||||
|
||||
# update desired shape of the Reshape node
|
||||
node.dim = np.array([0, conv_output_shape[1], conv_output_shape[2], node.anchors_count * 2])
|
||||
node.old_infer(node)
|
||||
```
|
||||
|
||||
The second replacer defined in the sub-graph replacement configuration file replaces the `SecondStagePostprocessor` block and is defined using scope:
|
||||
|
||||
```json
|
||||
{
|
||||
"custom_attributes": {
|
||||
"code_type": "caffe.PriorBoxParameter.CENTER_SIZE",
|
||||
"confidence_threshold": 0.01,
|
||||
"keep_top_k": 300,
|
||||
"nms_threshold": 0.6,
|
||||
"pad_mode": "caffe.ResizeParameter.CONSTANT",
|
||||
"resize_mode": "caffe.ResizeParameter.WARP",
|
||||
"max_detections_per_class": 100,
|
||||
"num_classes": 90
|
||||
},
|
||||
"id": "SecondStagePostprocessorReplacement",
|
||||
"inputs": [
|
||||
[
|
||||
{
|
||||
"node": "Reshape$",
|
||||
"port": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "Reshape_1$",
|
||||
"port": 0
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"node": "ExpandDims$",
|
||||
"port": 0
|
||||
}
|
||||
]
|
||||
],
|
||||
"instances": [
|
||||
".*SecondStagePostprocessor/"
|
||||
],
|
||||
"match_kind": "scope",
|
||||
"outputs": [
|
||||
{
|
||||
"node": "BatchMultiClassNonMaxSuppression/map/TensorArrayStack/TensorArrayGatherV3$",
|
||||
"port": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The replacement code is similar to the `SecondStagePostprocessor` replacement for the SSDs topologies. The are two major difference:
|
||||
|
||||
* The tensor with bounding boxes doesn't contain locations for class 0 (background class) but Inference Engine Detection Output layer requires it. The Const node with some dummy values are created and concatenated with the tensor.
|
||||
|
||||
* The priors tensor is not constant like in SSDs so the bounding boxes tensor must be scaled with variances [0.1, 0.1, 0.2, 0.2].
|
||||
|
||||
The described above difference are resolved with the following code:
|
||||
|
||||
```python
|
||||
# TF produces locations tensor without boxes for background.
|
||||
# Inference Engine DetectionOutput layer requires background boxes so we generate them with some values
|
||||
# and concatenate with locations tensor
|
||||
fake_background_locs_blob = np.tile([[[1, 1, 2, 2]]], [max_detections_per_class, 1, 1])
|
||||
fake_background_locs_const_op = Const(graph, dict(value=fake_background_locs_blob,
|
||||
shape=fake_background_locs_blob.shape))
|
||||
fake_background_locs_const_node = fake_background_locs_const_op.create_node([])
|
||||
|
||||
reshape_loc_op = Reshape(graph, {'dim': np.array([max_detections_per_class, num_classes, 4])})
|
||||
reshape_loc_node = reshape_loc_op.create_node([match.single_input_node(0)[0].in_node(0)],
|
||||
dict(name='Reshape_loc_'))
|
||||
|
||||
concat_loc_op = Concat(graph, {'axis': 1})
|
||||
concat_loc_node = concat_loc_op.create_node([fake_background_locs_const_node, reshape_loc_node],
|
||||
dict(name='Concat_fake_loc_'))
|
||||
|
||||
# blob with variances
|
||||
variances_blob = np.array([0.1, 0.1, 0.2, 0.2])
|
||||
variances_const_op = Const(graph, dict(value=variances_blob, shape=variances_blob.shape))
|
||||
variances_const_node = variances_const_op.create_node([])
|
||||
|
||||
# reshape locations tensor to 2D so it could be passed to Eltwise which will be converted to ScaleShift
|
||||
reshape_loc_2d_op = Reshape(graph, {'dim': np.array([-1, 4])})
|
||||
reshape_loc_2d_node = reshape_loc_2d_op.create_node([concat_loc_node], dict(name='reshape_locs_2d_'))
|
||||
|
||||
# element-wise multiply locations with variances
|
||||
eltwise_locs_op = Eltwise(graph, {'operation': 'mul'})
|
||||
eltwise_locs_node = eltwise_locs_op.create_node([reshape_loc_2d_node, variances_const_node],
|
||||
dict(name='scale_locs_'))
|
||||
```
|
||||
|
||||
### Example of Model Optimizer Command-Line for TensorFlow's Faster R-CNNs
|
||||
The final command line to convert Faster R-CNNs from the TensorFlow* Object Detection Zoo is the following:
|
||||
|
||||
```sh
|
||||
./mo.py --input_model=<path_to_frozen.pb> --output=detection_boxes,detection_scores,num_detections --tensorflow_use_custom_operations_config extensions/front/tf/legacy_faster_rcnn_support.json
|
||||
```
|
||||
|
||||
Note that there are minor changes that should be made to the and sub-graph replacement configuration file `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/tf/legacy_faster_rcnn_support.json` before converting particular Faster R-CNN topology. Refer to the table below.
|
||||
|
||||
### Sub-Graph Replacement Configuration File Parameters to Convert Different Faster R-CNN Models
|
||||
|Model Name | Configuration File Changes|
|
||||
|:----|:----:|
|
||||
| faster_rcnn_inception_v2_coco | None
|
||||
| faster_rcnn_resnet50_coco | None
|
||||
| faster_rcnn_resnet50_lowproposals_coco | None
|
||||
| faster_rcnn_resnet101_coco | None
|
||||
| faster_rcnn_resnet101_lowproposals_coco | None
|
||||
| faster_rcnn_inception_resnet_v2_atrous_coco | "feat_stride: 8"
|
||||
| faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco| "feat_stride: 8"
|
||||
|
||||
@@ -0,0 +1,339 @@
|
||||
# (Deprecated) Case Study: Converting SSD Models Created with TensorFlow* Object Detection API {#openvino_docs_MO_DG_prepare_model_customize_model_optimizer_TensorFlow_SSD_ObjectDetection_API}
|
||||
|
||||
This is a deprecated page. Please, consider reading [this](../convert_model/tf_specific/Convert_Object_Detection_API_Models.md) page describing new approach to convert Object Detection API models giving closer to TensorFlow inference results.
|
||||
|
||||
## Converting Models Created with TensorFlow Object Detection API Version prior 1.6.0
|
||||
|
||||
As explained in the [Sub-graph Replacement in Model Optimizer](Subgraph_Replacement_Model_Optimizer.md) section, there are multiple
|
||||
ways to setup the sub-graph matching. In this example we are focusing on the defining the sub-graph via a set of
|
||||
"start" and "end" nodes.
|
||||
The result of matching is two buckets of nodes:
|
||||
* Nodes "between" start and end nodes.
|
||||
* Nodes connected to the first list, but just on the constant path (e.g. these nodes are not connected to the inputs of the entire graph).
|
||||
|
||||
Let's look closer to the SSD models from the TensorFlow* detection model
|
||||
<a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md">zoo</a>:
|
||||
[SSD MobileNet](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz) and
|
||||
[SSD InceptionV2](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz).
|
||||
|
||||
* Nodes "between" start and end nodes
|
||||
* Nodes connected to the first list, but just on the constant path (for example, these nodes are not connected to the inputs of the entire graph). Let's look closer to the SSD models from the TensorFlow\* detection model <a href="https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md">zoo</a> : [SSD MobileNet](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2017_11_17.tar.gz) and [SSD InceptionV2](http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2017_11_17.tar.gz).
|
||||
|
||||
A distinct layer of any SSD topology is the `DetectionOutput` layer. This layer is implemented with a dozens of primitive operations in TensorFlow, while in Inference Engine, it is one [layer](../../../ops/opset.md). Thus, to convert a SSD model from the TensorFlow, the Model Optimizer should replace the entire sub-graph of operations that implement the `DetectionOutput` layer with a single well-known `DetectionOutput` node.
|
||||
|
||||
The Inference Engine `DetectionOutput` layer consumes three tensors in the following order:
|
||||
|
||||
1. Tensor with locations of bounding boxes
|
||||
2. Tensor with confidences for each bounding box
|
||||
3. Tensor with prior boxes (anchors in TensorFlow terminology)
|
||||
|
||||
`DetectionOutput` layer produces one tensor with seven numbers for each actual detection. There are more output tensors in the TensorFlow Object Detection API, but the values in them are consistent with the Inference Engine ones.
|
||||
|
||||
The difference with [other examples](Subgraph_Replacement_Model_Optimizer.md) is that here the `DetectionOutput` sub-graph is replaced with a new sub-graph (not a single layer).
|
||||
|
||||
Look at sub-graph replacement configuration file `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/tf/legacy_ssd_support.json` that is used to enable two models listed above:
|
||||
```json
|
||||
[
|
||||
{
|
||||
"custom_attributes": {
|
||||
"code_type": "caffe.PriorBoxParameter.CENTER_SIZE",
|
||||
"confidence_threshold": 0.01,
|
||||
"keep_top_k": 200,
|
||||
"nms_threshold": 0.45,
|
||||
"pad_mode": "caffe.ResizeParameter.CONSTANT",
|
||||
"resize_mode": "caffe.ResizeParameter.WARP"
|
||||
},
|
||||
"id": "TFObjectDetectionAPIDetectionOutput",
|
||||
"include_inputs_to_sub_graph": true,
|
||||
"include_outputs_to_sub_graph": true,
|
||||
"instances": {
|
||||
"end_points": [
|
||||
"detection_boxes",
|
||||
"detection_scores",
|
||||
"num_detections"
|
||||
],
|
||||
"start_points": [
|
||||
"Postprocessor/Shape",
|
||||
"Postprocessor/Slice",
|
||||
"Postprocessor/ExpandDims",
|
||||
"Postprocessor/Reshape_1"
|
||||
]
|
||||
},
|
||||
"match_kind": "points"
|
||||
},
|
||||
{
|
||||
"custom_attributes": {
|
||||
},
|
||||
"id": "PreprocessorReplacement",
|
||||
"inputs": [
|
||||
[
|
||||
{
|
||||
"node": "map/Shape$",
|
||||
"port": 0
|
||||
},
|
||||
{
|
||||
"node": "map/TensorArrayUnstack/Shape$",
|
||||
"port": 0
|
||||
},
|
||||
{
|
||||
"node": "map/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3$",
|
||||
"port": 2
|
||||
}
|
||||
]
|
||||
],
|
||||
"instances": [
|
||||
".*Preprocessor/"
|
||||
],
|
||||
"match_kind": "scope",
|
||||
"outputs": [
|
||||
{
|
||||
"node": "sub$",
|
||||
"port": 0
|
||||
},
|
||||
{
|
||||
"node": "map/TensorArrayStack_1/TensorArrayGatherV3$",
|
||||
"port": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
**Key lines**:
|
||||
|
||||
* Lines 3-10 define static attributes that will be saved to the Intermediate Representation `.xml` file for `DetectionOutput` layer.
|
||||
|
||||
* Lines 12 and 13 define values for attributes that should be always set to "true" for this release of the Model Optimizer. These two attributes are specific for sub-graph match by points only.
|
||||
|
||||
* Lines 14-26 define one instance of the sub-graph to be match. It is an important difference between sub-graph matching by scope and points. Several instances could be specified for matching by scope, but matching with points allows specifying just one instance. So the full node names (not regular expressions like in case of match with scope) are specified in `instances` dictionary.
|
||||
|
||||
The second sub-graph replacer with identifier `PreprocessorReplacement` is used to remove the `Preprocessing` block from the graph. The replacer removes all nodes from this scope except nodes performing mean value subtraction and scaling (if applicable). Implementation of the replacer is in the `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/tf/Preprocessor.py` file.
|
||||
|
||||
Now let's analyze the structure of the topologies generated with the Object Detection API. There are several blocks in the graph performing particular task:
|
||||
|
||||
* `Preprocessor` block resizes, scales and subtracts mean values from the input image.
|
||||
|
||||
* `FeatureExtractor` block is a [MobileNet](https://arxiv.org/abs/1704.04861) or other backbone to extract features.
|
||||
|
||||
* `MultipleGridAnchorGenerator` block creates initial bounding boxes locations (anchors).
|
||||
|
||||
* `Postprocessor` block acts as a `DetectionOutput` layer. So we need to replace `Postprocessor` block with `DetectionOutput` layer. It is necessary to add all input nodes of the `Postprocessor` scope to the list `start_points`. Consider inputs of each of these nodes:
|
||||
|
||||
* `Postprocessor/Shape` consumes tensor with locations.
|
||||
* `Postprocessor/Slice` consumes tensor with confidences.
|
||||
* `Postprocessor/ExpandDims` consumes tensor with prior boxes.
|
||||
* `Postprocessor/Reshape_1` consumes tensor with locations similarly to the `Postprocessor/Shape` node. Despite the fact that the last node `Postprocessor/Reshape_1` gets the same tensor as node `Postprocessor/Shape`, it must be explicitly put to the list.
|
||||
|
||||
Object Detection API `Postprocessor` block generates output nodes: `detection_boxes`, `detection_scores`, `num_detections`, `detection_classes`.
|
||||
|
||||
Now consider the implementation of the sub-graph replacer, available in the `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/tf/SSDs.py`. The file is rather big, so only some code snippets are used:
|
||||
```python
|
||||
class PostprocessorReplacement(FrontReplacementFromConfigFileSubGraph):
|
||||
replacement_id = 'TFObjectDetectionAPIDetectionOutput'
|
||||
```
|
||||
|
||||
These lines define the new `PostprocessorReplacement` class inherited from `FrontReplacementFromConfigFileSubGraph`. `FrontReplacementFromConfigFileSubGraph` is designed to replace sub-graph of operations described in the configuration file. There are methods to override for implementing custom replacement logic that we need:
|
||||
|
||||
* `generate_sub_graph` performs new sub-graph generation and returns dictionary where key is an alias name for the node and value is a Node objects. The dictionary has the same format as parameter `match` in the `replace_sub_graph` method in the example with <a href="Subgraph_Replacement_Model_Optimizer.html#replace-using-isomorphism-pattern">networkx sub-graph isomorphism pattern</a>. This dictionary is passed as argument to the next three methods, so it should contain entries the for nodes that the functions need.
|
||||
|
||||
* `input_edges_match` specifies mapping between input edges to sub-graph before replacement and after replacement. The key of the dictionary is a tuple specifying input tensor of the sub-graph before replacement: sub-graph input node name and input port number for this node. The value for this key is also a tuple specifying the node where this tensor should be attached during replacement: the node name (or alias name of the node) and the input port for this node. If the port number is zero, the parameter could be omitted so the key or value is just a node name (alias). Default implementation of the method returns an empty dictionary, so Model Optimizer does not create new edges.
|
||||
|
||||
* `output_edges_match` returns mapping between old output edges of the matched nodes and new sub-graph node and output edge index. The format is similar to the dictionary returned in the `input_edges_match` method. The only difference is that instead of specifying input port numbers for the nodes it is necessary to specify output port number. Of course, this mapping is needed for the output nodes only. Default implementation of the method returns an empty dictionary, so the Model Optimizer does not create new edges.
|
||||
|
||||
* `nodes_to_remove` specifies list of nodes that Model Optimizer should remove after sub-graph replacement. Default implementation of the method removes all sub-graph nodes.
|
||||
|
||||
Review of the replacer code, considering details of the `DetectionOutput` layer implementation in the Inference Engine. There are several constraints to the input tensors of the `DetectionOutput` layer:
|
||||
|
||||
* The tensor with locations must be of shape `[#‍batch, #‍prior_boxes * 4]` or `[#‍batch, #‍prior_boxes * 5]` depending on shared locations between different batches or not.
|
||||
* The tensor with confidences must be of shape `[#‍batch, #‍prior_boxes * #‍classes]` and confidences values are in range [0, 1], that is passed through `softmax` layer.
|
||||
* The tensor with prior boxes must be of shape `[#‍batch, 2, #‍prior_boxes * 4]`. Inference Engine expects that it contains variance values which TensorFlow Object Detection API does not add.
|
||||
|
||||
To enable these models, add `Reshape` operations for locations and confidences tensors and update the values for the prior boxes to include the variance constants (they are not there in TensorFlow Object Detection API).
|
||||
|
||||
Look at the `generate_sub_graph` method:
|
||||
```python
|
||||
def generate_sub_graph(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
|
||||
log.debug('PostprocessorReplacement.generate_sub_graph')
|
||||
log.debug('matched_nodes = {}'.format(match.matched_nodes_names()))
|
||||
# softmax to be applied to the confidence
|
||||
softmax_conf_op = Softmax(graph, {'axis': 2, 'nchw_layout': True})
|
||||
softmax_conf_node = softmax_conf_op.add_node(dict(name='DetectionOutput_SoftMax_conf_'))
|
||||
# Inference Engine DetectionOutput layer consumes flattened tensors
|
||||
# reshape operation to flatten locations tensor
|
||||
reshape_loc_op = Reshape(graph, {'dim': np.array([0, -1])})
|
||||
reshape_loc_node = reshape_loc_op.add_node(dict(name='DetectionOutput_Reshape_loc_'))
|
||||
# Inference Engine DetectionOutput layer consumes flattened tensors
|
||||
# reshape operation to flatten confidence tensor
|
||||
reshape_conf_op = Reshape(graph, {'dim': np.array([0, -1])})
|
||||
reshape_conf_node = reshape_conf_op.add_node(dict(name='DetectionOutput_Reshape_conf_'))
|
||||
# create Node object from Op class
|
||||
detection_output_op = DetectionOutput(graph, match.custom_replacement_desc.custom_attributes)
|
||||
detection_output_op.attrs['old_infer'] = detection_output_op.attrs['infer']
|
||||
detection_output_op.attrs['infer'] = __class__.do_infer
|
||||
detection_output_node = detection_output_op.add_node(dict(name=detection_output_op.attrs['type'] + '_'))
|
||||
# create internal edges of the sub-graph. In this case we add edges to connect input port 0 and 1 of the
|
||||
# detection output with output of reshape of locations and reshape of confidence
|
||||
create_edge(softmax_conf_node, reshape_conf_node, 0, 0)
|
||||
create_edge(reshape_loc_node, detection_output_node, 0, 0)
|
||||
create_edge(reshape_conf_node, detection_output_node, 0, 1)
|
||||
return {'detection_output_node': detection_output_node, 'reshape_conf_node': softmax_conf_node,
|
||||
'reshape_loc_node': reshape_loc_node}
|
||||
```
|
||||
The method has two inputs: the graph to operate on and the instance of `SubgraphMatch` object, which describes matched sub-graph. The latter class has several useful methods to get particular input/output node of the sub-graph by input/output index or by node name pattern. Examples of these methods usage are given below.
|
||||
|
||||
**Key lines**:
|
||||
|
||||
* Lines 6 and 7 create new instance of operation of type `Softmax` and graph Node object corresponding to that operation.
|
||||
|
||||
* Lines 11-12 and 16-17 create new instance of operation of type `Reshape` to reshape locations and confidences tensors correspondingly.
|
||||
|
||||
* Lines 20-23 create new instance of operation `DetectionOutput` and graph Node object corresponding to that operation.
|
||||
|
||||
* Lines 27-29 connect `softmax` node with `reshape` node and connect two reshaped locations and confidences tensors with `DetectionOutput` node.
|
||||
|
||||
* Lines 30-31 define dictionary with aliases for detection output node, reshape locations and confidences nodes. These aliases are used in the `input_edges_match` and `output_edges_match` methods.
|
||||
|
||||
The `input_edges_match` method is the following:
|
||||
```python
|
||||
def input_edges_match(self, graph: nx.DiGraph, match: SubgraphMatch, new_sub_graph: dict):
|
||||
locs_consumer_node, locs_consumer_node_port = match.input_nodes(0)[0]
|
||||
conf_consumer_node, conf_consumer_node_port = match.input_nodes(1)[0]
|
||||
priors_consumer_node, priors_consumer_node_port = match.input_nodes(2)[0]
|
||||
# create matching nodes for locations and confidence tensors using simple scheme "old_node_name: new_node_name"
|
||||
# which in fact means "(old_node_name, 0): (new_node_name, 0)", while first '0' means old_port and the second
|
||||
# zero defines 'new_port'.
|
||||
return {locs_consumer_node.id: new_sub_graph['reshape_loc_node'].id,
|
||||
conf_consumer_node.id: new_sub_graph['reshape_conf_node'].id,
|
||||
priors_consumer_node.id: (new_sub_graph['detection_output_node'].id, 2),
|
||||
}
|
||||
```
|
||||
The method has three parameters: input `graph`, `match` object describing matched sub-graph and `new_sub_graph` dictionary with alias names returned from the `generate_sub_graph` method.
|
||||
|
||||
**Key lines**:
|
||||
|
||||
* Lines 2-4 initialize Node objects and input ports for the nodes where the input tensors for the sub-graph are consumed. The method `match.input_nodes(ind)` returns list of tuples where the first element is a Node object and the second is the input port for this node which consumes the ind-th input tensor of the sub-graph. `input_points` list in the configuration file defines the order of input tensors to the sub-graph. For example, the `locs_consumer_node` object of type Node is a node that consumes tensor with locations in the port with number `locs_consumer_node_port`.
|
||||
|
||||
* Lines 8-11 define dictionary with the mapping of tensors as described above. Note that the attribute `id` of the Node object contains the name of the node in the graph.
|
||||
|
||||
The `output_edges_match` method is the following:
|
||||
```python
|
||||
def output_edges_match(self, graph: nx.DiGraph, match: SubgraphMatch, new_sub_graph: dict):
|
||||
# the DetectionOutput in IE produces single tensor, but in TF it produces two tensors, so we need to create only
|
||||
# one output edge match
|
||||
return {match.output_node(0)[0].id: new_sub_graph['detection_output_node'].id}
|
||||
```
|
||||
|
||||
The method has the same three parameters as `input_edges_match` method. The returned dictionary contains mapping just for one tensor initially produces by the first output node of the sub-graph (which is `detection_boxes` according to the configuration file) to a single output tensor of the created `DetectionOutput` node. In fact, it is possible to use any output node of the initial sub-graph in mapping, because the sub-graph output nodes are the output nodes of the whole graph (their output is not consumed by any other nodes).
|
||||
|
||||
Now, the Model Optimizer knows how to replace the sub-graph. The last step to enable the model is to cut-off some parts of the graph not needed during inference.
|
||||
|
||||
It is necessary to remove the `Preprocessor` block where image is resized. Inference Engine does not support dynamic input shapes, so the Model Optimizer must froze the input image size, and thus, resizing of the image is not necessary. This is achieved by replacer `<INSTALL_DIR>/deployment_tools/model_optimizer/extensions/front/tf/Preprocessor.py` which is executed automatically.
|
||||
|
||||
There are several `Switch` operations in the `Postprocessor` block without output edges. For example:
|
||||
```sh
|
||||
Postprocessor/BatchMultiClassNonMaxSuppression/map/while/PadOrClipBoxList/cond/cond/switch_t
|
||||
```
|
||||
```sh
|
||||
Postprocessor/BatchMultiClassNonMaxSuppression/map/while/PadOrClipBoxList/cond/cond/switch_f
|
||||
```
|
||||
```sh
|
||||
Postprocessor/BatchMultiClassNonMaxSuppression/map/while/PadOrClipBoxList/cond_1/cond/switch_t
|
||||
```
|
||||
```sh
|
||||
Postprocessor/BatchMultiClassNonMaxSuppression/map/while/PadOrClipBoxList/cond_1/cond/switch_f
|
||||
```
|
||||
|
||||
Model Optimizer marks these nodes as output nodes of the topology. Some parts of the `Posprocessor` blocks are not removed during sub-graph replacement because of that. In order to fix this issue, it is necessary to specify output nodes of the graph manually using the `--output` command line parameter.
|
||||
|
||||
###Example Model Optimizer Command-Line for TensorFlow\* SSD
|
||||
|
||||
The final command line to convert SSDs from the TensorFlow Object Detection API Zoo is:
|
||||
```shell
|
||||
./mo_tf.py --input_model=<path_to_frozen.pb> --tensorflow_use_custom_operations_config extensions/front/tf/legacy_ssd_support.json --output="detection_boxes,detection_scores,num_detections"
|
||||
```
|
||||
|
||||
## Converting MobileNet V2 model created with TensorFlow Object Detection API <a name="convert_mobilenet_v2"></a>
|
||||
The [MobileNet V2 model](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz) differs from the previous version, so converting the model requires a new sub-graph replacement configuration file and new command line parameters. The major differences are:
|
||||
|
||||
* The `Preprocessor` block has two outputs: the pre-processed image and the pre-processed image size.
|
||||
* The `Postprocessor` block has one more input (in comparison with models created with TensorFlow Object Detection API
|
||||
version 1.6 or lower): the pre-processed image size.
|
||||
* Some node names have been changed in the `Postprocessor` block.
|
||||
|
||||
The updated sub-graph replacement configuration file `extensions/front/tf/ssd_v2_support.json` reflecting these changes
|
||||
is the following:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"custom_attributes": {
|
||||
"code_type": "caffe.PriorBoxParameter.CENTER_SIZE",
|
||||
"confidence_threshold": 0.01,
|
||||
"keep_top_k": 200,
|
||||
"nms_threshold": 0.6,
|
||||
"pad_mode": "caffe.ResizeParameter.CONSTANT",
|
||||
"resize_mode": "caffe.ResizeParameter.WARP"
|
||||
},
|
||||
"id": "TFObjectDetectionAPIDetectionOutput",
|
||||
"include_inputs_to_sub_graph": true,
|
||||
"include_outputs_to_sub_graph": true,
|
||||
"instances": {
|
||||
"end_points": [
|
||||
"detection_boxes",
|
||||
"detection_scores",
|
||||
"num_detections"
|
||||
],
|
||||
"start_points": [
|
||||
"Postprocessor/Shape",
|
||||
"Postprocessor/scale_logits",
|
||||
"Postprocessor/ExpandDims",
|
||||
"Postprocessor/Reshape_1",
|
||||
"Postprocessor/ToFloat"
|
||||
]
|
||||
},
|
||||
"match_kind": "points"
|
||||
},
|
||||
{
|
||||
"custom_attributes": {
|
||||
},
|
||||
"id": "PreprocessorReplacement",
|
||||
"inputs": [
|
||||
[
|
||||
{
|
||||
"node": "map/Shape$",
|
||||
"port": 0
|
||||
},
|
||||
{
|
||||
"node": "map/TensorArrayUnstack/Shape$",
|
||||
"port": 0
|
||||
},
|
||||
{
|
||||
"node": "map/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3$",
|
||||
"port": 2
|
||||
}
|
||||
]
|
||||
],
|
||||
"instances": [
|
||||
".*Preprocessor/"
|
||||
],
|
||||
"match_kind": "scope",
|
||||
"outputs": [
|
||||
{
|
||||
"node": "sub$",
|
||||
"port": 0
|
||||
},
|
||||
{
|
||||
"node": "map/TensorArrayStack_1/TensorArrayGatherV3$",
|
||||
"port": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Example of Model Optimizer Command-Line for TensorFlow SSD MobileNet V2
|
||||
The final command line to convert MobileNet SSD V2 from the TensorFlow Object Detection Zoo is the following:
|
||||
|
||||
```sh
|
||||
./mo_tf.py --input_model=<path_to_frozen.pb> --tensorflow_use_custom_operations_config extensions/front/tf/ssd_v2_support.json --output="detection_boxes,detection_scores,num_detections"
|
||||
```
|
||||
@@ -19,181 +19,133 @@ Measuring inference performance involves many variables and is extremely use-cas
|
||||
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-datalabels"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/chartjs-plugin-annotation/0.5.7/chartjs-plugin-annotation.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-barchart-background@1.3.0/build/Plugin.Barchart.Background.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-deferred@1"></script>
|
||||
<!-- download this file and place on your server (or include the styles inline) -->
|
||||
<link rel="stylesheet" href="ovgraphs.css" type="text/css">
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
\htmlonly
|
||||
<script src="bert-large-uncased-whole-word-masking-squad-int8-0001-ov-2021-2-170.js" id="bert-large-uncased-whole-word-masking-squad-int8-0001-ov-2021-2-170"></script>
|
||||
<script src="bert-large-uncased-whole-word-masking-squad-int8-0001-ov-2021-1-096.js" id="bert-large-uncased-whole-word-masking-squad-int8-0001-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="deeplabv3-tf-ov-2021-2-170.js" id="deeplabv3-tf-ov-2021-2-170"></script>
|
||||
<script src="deeplabv3-tf-ov-2021-1-096.js" id="deeplabv3-tf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="densenet-121-tf-ov-2021-2-170.js" id="densenet-121-tf-ov-2021-2-170"></script>
|
||||
<script src="densenet-121-tf-ov-2021-1-096.js" id="densenet-121-tf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="faster-rcnn-resnet50-coco-tf-ov-2021-2-170.js" id="faster-rcnn-resnet50-coco-tf-ov-2021-2-170"></script>
|
||||
<script src="faster-rcnn-resnet50-coco-tf-ov-2021-1-096.js" id="faster-rcnn-resnet50-coco-tf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="googlenet-v1-tf-ov-2021-2-170.js" id="googlenet-v1-tf-ov-2021-2-170"></script>
|
||||
<script src="googlenet-v1-tf-ov-2021-1-096.js" id="googlenet-v1-tf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="inception-v3-tf-ov-2021-2-170.js" id="inception-v3-tf-ov-2021-2-170"></script>
|
||||
<script src="inception-v3-tf-ov-2021-1-096.js" id="inception-v3-tf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="mobilenet-ssd-cf-ov-2021-2-170.js" id="mobilenet-ssd-cf-ov-2021-2-170"></script>
|
||||
<script src="mobilenet-ssd-cf-ov-2021-1-096.js" id="mobilenet-ssd-cf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="mobilenet-v1-1-0-224-tf-ov-2021-2-170.js" id="mobilenet-v1-1-0-224-tf-ov-2021-2-170"></script>
|
||||
<script src="mobilenet-v1-1-0-224-tf-ov-2021-1-096.js" id="mobilenet-v1-1-0-224-tf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="mobilenet-v2-pytorch-ov-2021-2-170.js" id="mobilenet-v2-pytorch-ov-2021-2-170"></script>
|
||||
<script src="mobilenet-v2-pytorch-ov-2021-1-096.js" id="mobilenet-v2-pytorch-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="resnet-18-pytorch-ov-2021-2-170.js" id="resnet-18-pytorch-ov-2021-2-170"></script>
|
||||
<script src="resnet-18-pytorch-ov-2021-1-096.js" id="resnet-18-pytorch-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="resnet-50-tf-ov-2021-2-170.js" id="resnet-50-tf-ov-2021-2-170"></script>
|
||||
<script src="resnet-50-tf-ov-2021-1-096.js" id="resnet-50-tf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
\htmlonly
|
||||
<script src="se-resnext-50-cf-ov-2021-2-170.js" id="se-resnext-50-cf-ov-2021-2-170"></script>
|
||||
<script src="se-resnext-50-cf-ov-2021-1-096.js" id="se-resnext-50-cf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="squeezenet1-1-cf-ov-2021-2-170.js" id="squeezenet1-1-cf-ov-2021-2-170"></script>
|
||||
<script src="squeezenet1-1-cf-ov-2021-1-096.js" id="squeezenet1-1-cf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
\htmlonly
|
||||
<script src="ssd300-cf-ov-2021-2-170.js" id="ssd300-cf-ov-2021-2-170"></script>
|
||||
<script src="ssd300-cf-ov-2021-1-096.js" id="ssd300-cf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
\htmlonly
|
||||
<script src="yolo-v3-tf-ov-2021-2-170.js" id="yolo-v3-tf-ov-2021-2-170"></script>
|
||||
<script src="yolo-v3-tf-ov-2021-1-096.js" id="yolo-v3-tf-ov-2021-1-096"></script>
|
||||
\endhtmlonly
|
||||
|
||||
|
||||
## Platform Configurations
|
||||
|
||||
Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are based on release 2021.2.
|
||||
Intel® Distribution of OpenVINO™ toolkit performance benchmark numbers are based on release 2021.1.
|
||||
|
||||
Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of December 9, 2020 and may not reflect all publicly available updates. See configuration disclosure for details. No product can be absolutely secure.
|
||||
Intel technologies’ features and benefits depend on system configuration and may require enabled hardware, software or service activation. Learn more at intel.com, or from the OEM or retailer. Performance results are based on testing as of September 25, 2020 and may not reflect all publicly available security updates. See configuration disclosure for details. No product can be absolutely secure.
|
||||
|
||||
Performance varies by use, configuration and other factors. Learn more at [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex).
|
||||
Software and workloads used in performance tests may have been optimized for performance only on Intel microprocessors. Performance tests, such as SYSmark and MobileMark, are measured using specific computer systems, components, software, operations and functions. Any change to any of those factors may cause the results to vary. You should consult other information and performance tests to assist you in fully evaluating your contemplated purchases, including the performance of that product when combined with other products. For more complete information, see [Performance Benchmark Test Disclosure](https://www.intel.com/content/www/us/en/benchmarks/benchmark.html).
|
||||
|
||||
Your costs and results may vary.
|
||||
|
||||
© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others.
|
||||
|
||||
Intel optimizations, for Intel compilers or other products, may not optimize to the same degree for non-Intel products.
|
||||
Optimization Notice: Intel’s compilers may or may not optimize to the same degree for non-Intel microprocessors for optimizations that are not unique to Intel microprocessors. These optimizations include SSE2, SSE3, and SSSE3 instruction sets and other optimizations. Intel does not guarantee the availability, functionality, or effectiveness of any optimization on microprocessors not manufactured by Intel. Microprocessor-dependent optimizations in this product are intended for use with Intel microprocessors. Certain optimizations not specific to Intel microarchitecture are reserved for Intel microprocessors. Please refer to the applicable product User and Reference Guides for more information regarding the specific instruction sets covered by this notice. [Notice Revision #2010804](https://software.intel.com/articles/optimization-notice).
|
||||
|
||||
Testing by Intel done on: see test date for each HW platform below.
|
||||
|
||||
**CPU Inference Engines**
|
||||
|
||||
| | Intel® Xeon® E-2124G | Intel® Xeon® W1290P | Intel® Xeon® Silver 4216R |
|
||||
| ------------------------------- | ---------------------- | --------------------------- | ---------------------------- |
|
||||
| Motherboard | ASUS* WS C246 PRO | ASUS* WS W480-ACE | Intel® Server Board S2600STB |
|
||||
| CPU | Intel® Xeon® E-2124G CPU @ 3.40GHz | Intel® Xeon® W-1290P CPU @ 3.70GHz | Intel® Xeon® Silver 4216R CPU @ 2.20GHz |
|
||||
| Hyper Threading | OFF | ON | ON |
|
||||
| Turbo Setting | ON | ON | ON |
|
||||
| Memory | 2 x 16 GB DDR4 2666MHz | 4 x 16 GB DDR4 @ 2666MHz |12 x 32 GB DDR4 2666MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc. | Intel Corporation |
|
||||
| BIOS Version | 0904 | 607 | SE5C620.86B.02.01.<br>0009.092820190230 |
|
||||
| BIOS Release | April 12, 2019 | May 29, 2020 | September 28, 2019 |
|
||||
| BIOS Settings | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>change power policy <br>to "performance", <br>save & exit |
|
||||
| Batch size | 1 | 1 | 1
|
||||
| Precision | INT8 | INT8 | INT8
|
||||
| Number of concurrent inference requests | 4 | 5 | 32
|
||||
| Test Date | December 9, 2020 | December 9, 2020 | December 9, 2020
|
||||
| Power dissipation, TDP in Watt | [71](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html#tab-blade-1-0-1) | [125](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) | [125](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) |
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [213](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html) | [539](https://ark.intel.com/content/www/us/en/ark/products/199336/intel-xeon-w-1290p-processor-20m-cache-3-70-ghz.html) |[1,002](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html) |
|
||||
| | Intel® Xeon® E-2124G | Intel® Xeon® Silver 4216R | Intel® Xeon® Gold 5218T | Intel® Xeon® Platinum 8270 |
|
||||
| ------------------------------- | ----------------------| ---------------------------- | ---------------------------- | ---------------------------- |
|
||||
| Motherboard | ASUS* WS C246 PRO | Intel® Server Board S2600STB | Intel® Server Board S2600STB | Intel® Server Board S2600STB |
|
||||
| CPU | Intel® Xeon® E-2124G CPU @ 3.40GHz | Intel® Xeon® Silver 4216R CPU @ 2.20GHz | Intel® Xeon® Gold 5218T CPU @ 2.10GHz | Intel® Xeon® Platinum 8270 CPU @ 2.70GHz |
|
||||
| Hyper Threading | OFF | ON | ON | ON |
|
||||
| Turbo Setting | ON | ON | ON | ON |
|
||||
| Memory | 2 x 16 GB DDR4 2666MHz| 12 x 32 GB DDR4 2666MHz | 12 x 32 GB DDR4 2666MHz | 12 x 32 GB DDR4 2933MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | Intel Corporation | Intel Corporation | Intel Corporation |
|
||||
| BIOS Version | 0904 | SE5C620.86B.02.01.<br>0009.092820190230 | SE5C620.86B.02.01.<br>0009.092820190230 | SE5C620.86B.02.01.<br>0009.092820190230 |
|
||||
| BIOS Release | April 12, 2019 | September 28, 2019 | September 28, 2019 | September 28, 2019 |
|
||||
| BIOS Settings | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>change power policy <br>to "performance", <br>save & exit | Select optimized default settings, <br>change power policy to "performance", <br>save & exit | Select optimized default settings, <br>change power policy to "performance", <br>save & exit |
|
||||
| Batch size | 1 | 1 | 1 | 1 |
|
||||
| Precision | INT8 | INT8 | INT8 | INT8 |
|
||||
| Number of concurrent inference requests | 4 | 32 | 32 | 52 |
|
||||
| Test Date | September 25, 2020 | September 25, 2020 | September 25, 2020 | September 25, 2020 |
|
||||
| Power dissipation, TDP in Watt | [71](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html#tab-blade-1-0-1) | [125](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | [105](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | [205](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html#tab-blade-1-0-1) |
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [213](https://ark.intel.com/content/www/us/en/ark/products/134854/intel-xeon-e-2124g-processor-8m-cache-up-to-4-50-ghz.html) | [1,002](https://ark.intel.com/content/www/us/en/ark/products/193394/intel-xeon-silver-4216-processor-22m-cache-2-10-ghz.html) | [1,349](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html) | [7,405](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html) |
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
| | Intel® Xeon® Gold 5218T | Intel® Xeon® Platinum 8270 |
|
||||
| ------------------------------- | ---------------------------- | ---------------------------- |
|
||||
| Motherboard | Intel® Server Board S2600STB | Intel® Server Board S2600STB |
|
||||
| CPU | Intel® Xeon® Gold 5218T CPU @ 2.10GHz | Intel® Xeon® Platinum 8270 CPU @ 2.70GHz |
|
||||
| Hyper Threading | ON | ON |
|
||||
| Turbo Setting | ON | ON |
|
||||
| Memory | 12 x 32 GB DDR4 2666MHz | 12 x 32 GB DDR4 2933MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | Intel Corporation | Intel Corporation |
|
||||
| BIOS Version | SE5C620.86B.02.01.<br>0009.092820190230 | SE5C620.86B.02.01.<br>0009.092820190230 |
|
||||
| BIOS Release | September 28, 2019 | September 28, 2019 |
|
||||
| BIOS Settings | Select optimized default settings, <br>change power policy to "performance", <br>save & exit | Select optimized default settings, <br>change power policy to "performance", <br>save & exit |
|
||||
| Batch size | 1 | 1 |
|
||||
| Precision | INT8 | INT8 |
|
||||
| Number of concurrent inference requests |32 | 52 |
|
||||
| Test Date | December 9, 2020 | December 9, 2020 |
|
||||
| Power dissipation, TDP in Watt | [105](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html#tab-blade-1-0-1) | [205](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html#tab-blade-1-0-1) |
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [1,349](https://ark.intel.com/content/www/us/en/ark/products/193953/intel-xeon-gold-5218t-processor-22m-cache-2-10-ghz.html) | [7,405](https://ark.intel.com/content/www/us/en/ark/products/192482/intel-xeon-platinum-8270-processor-35-75m-cache-2-70-ghz.html) |
|
||||
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
| | Intel® Core™ i7-8700T | Intel® Core™ i9-10920X | Intel® Core™ i9-10900TE<br>(iEi Flex BX210AI)| 11th Gen Intel® Core™ i7-1185G7 |
|
||||
| -------------------- | ----------------------------------- |--------------------------------------| ---------------------------------------------|---------------------------------|
|
||||
| Motherboard | GIGABYTE* Z370M DS3H-CF | ASUS* PRIME X299-A II | iEi / B595 | Intel Corporation<br>internal/Reference<br>Validation Platform |
|
||||
| CPU | Intel® Core™ i7-8700T CPU @ 2.40GHz | Intel® Core™ i9-10920X CPU @ 3.50GHz | Intel® Core™ i9-10900TE CPU @ 1.80GHz | 11th Gen Intel® Core™ i7-1185G7 @ 3.00GHz |
|
||||
| Hyper Threading | ON | ON | ON | ON |
|
||||
| Turbo Setting | ON | ON | ON | ON |
|
||||
| Memory | 4 x 16 GB DDR4 2400MHz | 4 x 16 GB DDR4 2666MHz | 2 x 8 GB DDR4 @ 2400MHz | 2 x 8 GB DDR4 3200MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.8.0-05-generic | 5.8.0-05-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | American Megatrends Inc.* | Intel Corporation |
|
||||
| BIOS Version | F11 | 505 | Z667AR10 | TGLSFWI1.R00.3425.<br>A00.2010162309 |
|
||||
| BIOS Release | March 13, 2019 | December 17, 2019 | July 15, 2020 | October 16, 2020 |
|
||||
| BIOS Settings | Select optimized default settings, <br>set OS type to "other", <br>save & exit | Default Settings | Default Settings | Default Settings |
|
||||
| Batch size | 1 | 1 | 1 | 1 |
|
||||
| Precision | INT8 | INT8 | INT8 | INT8 |
|
||||
| Number of concurrent inference requests |4 | 24 | 5 | 4 |
|
||||
| Test Date | December 9, 2020 | December 9, 2020 | December 9, 2020 | December 9, 2020 |
|
||||
| Power dissipation, TDP in Watt | [35](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html#tab-blade-1-0-1) | [165](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [35](https://ark.intel.com/content/www/us/en/ark/products/203901/intel-core-i9-10900te-processor-20m-cache-up-to-4-60-ghz.html) | [28](https://ark.intel.com/content/www/us/en/ark/products/208081/intel-core-i5-1145g7e-processor-8m-cache-up-to-4-10-ghz.html) |
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [303](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html) | [700](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [444](https://ark.intel.com/content/www/us/en/ark/products/203901/intel-core-i9-10900te-processor-20m-cache-up-to-4-60-ghz.html) | [426](https://mysamples.intel.com/SAM_U_Product/ProductDetail.aspx?InputMMID=99A3D1&RequestID=0&ProductID=1213750) |
|
||||
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
| | Intel® Core™ i5-8500 | Intel® Core™ i5-10500TE | Intel® Core™ i5-10500TE<br>(iEi Flex-BX210AI)|
|
||||
| -------------------- | ---------------------------------- | ----------------------------------- |-------------------------------------- |
|
||||
| Motherboard | ASUS* PRIME Z370-A | GIGABYTE* Z490 AORUS PRO AX | iEi / B595 |
|
||||
| CPU | Intel® Core™ i5-8500 CPU @ 3.00GHz | Intel® Core™ i5-10500TE CPU @ 2.30GHz | Intel® Core™ i5-10500TE CPU @ 2.30GHz |
|
||||
| Hyper Threading | OFF | ON | ON |
|
||||
| Turbo Setting | ON | ON | ON |
|
||||
| Memory | 2 x 16 GB DDR4 2666MHz | 2 x 16 GB DDR4 @ 2666MHz | 1 x 8 GB DDR4 @ 2400MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.3.0-24-generic | 5.3.0-24-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | American Megatrends Inc.* |
|
||||
| BIOS Version | 2401 | F3 | Z667AR10 |
|
||||
| BIOS Release | July 12, 2019 | March 25, 2020 | July 17, 2020 |
|
||||
| BIOS Settings | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>set OS type to "other", <br>save & exit | Default Settings |
|
||||
| Batch size | 1 | 1 | 1 |
|
||||
| Precision | INT8 | INT8 | INT8 |
|
||||
| Number of concurrent inference requests | 3 | 4 | 4 |
|
||||
| Test Date | December 9, 2020 | December 9, 2020 | December 9, 2020 |
|
||||
| Power dissipation, TDP in Watt | [65](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html#tab-blade-1-0-1)| [35](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | [35](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) |
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [192](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html) | [195](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) | [195](https://ark.intel.com/content/www/us/en/ark/products/203891/intel-core-i5-10500te-processor-12m-cache-up-to-3-70-ghz.html) |
|
||||
|
||||
| | Intel® Core™ i5-8500 | Intel® Core™ i7-8700T | Intel® Core™ i9-10920X | 11th Gen Intel® Core™ i5-1145G7E |
|
||||
| -------------------- | ---------------------------------- | ----------------------------------- |--------------------------------------|-----------------------------------|
|
||||
| Motherboard | ASUS* PRIME Z370-A | GIGABYTE* Z370M DS3H-CF | ASUS* PRIME X299-A II | Intel Corporation<br>internal/Reference Validation Platform |
|
||||
| CPU | Intel® Core™ i5-8500 CPU @ 3.00GHz | Intel® Core™ i7-8700T CPU @ 2.40GHz | Intel® Core™ i9-10920X CPU @ 3.50GHz | 11th Gen Intel® Core™ i5-1145G7E @ 2.60GHz |
|
||||
| Hyper Threading | OFF | ON | ON | ON |
|
||||
| Turbo Setting | ON | ON | ON | ON |
|
||||
| Memory | 2 x 16 GB DDR4 2666MHz | 4 x 16 GB DDR4 2400MHz | 4 x 16 GB DDR4 2666MHz | 2 x 8 GB DDR4 3200MHz |
|
||||
| Operating System | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS | Ubuntu* 18.04 LTS |
|
||||
| Kernel Version | 5.3.0-24-generic | 5.0.0-23-generic | 5.0.0-23-generic | 5.8.0-05-generic |
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* | American Megatrends Inc.* | Intel Corporation |
|
||||
| BIOS Version | 2401 | F11 | 505 | TGLIFUI1.R00.3243.A04.2006302148 |
|
||||
| BIOS Release | July 12, 2019 | March 13, 2019 | December 17, 2019 | June 30, 2020 |
|
||||
| BIOS Settings | Select optimized default settings, <br>save & exit | Select optimized default settings, <br>set OS type to "other", <br>save & exit | Default Settings | Default Settings |
|
||||
| Batch size | 1 | 1 | 1 | 1 |
|
||||
| Precision | INT8 | INT8 | INT8 | INT8 |
|
||||
| Number of concurrent inference requests | 3 | 4 | 24 | 4 |
|
||||
| Test Date | September 25, 2020 | September 25, 2020 | September 25, 2020 | September 25, 2020 |
|
||||
| Power dissipation, TDP in Watt | [65](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html#tab-blade-1-0-1) | [35](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html#tab-blade-1-0-1) | [165](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [28](https://ark.intel.com/content/www/us/en/ark/products/208081/intel-core-i5-1145g7e-processor-8m-cache-up-to-4-10-ghz.html) |
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [192](https://ark.intel.com/content/www/us/en/ark/products/129939/intel-core-i5-8500-processor-9m-cache-up-to-4-10-ghz.html) | [303](https://ark.intel.com/content/www/us/en/ark/products/129948/intel-core-i7-8700t-processor-12m-cache-up-to-4-00-ghz.html) | [700](https://ark.intel.com/content/www/us/en/ark/products/198012/intel-core-i9-10920x-x-series-processor-19-25m-cache-3-50-ghz.html) | [309](https://mysamples.intel.com/SAM_U_Product/ProductDetail.aspx?InputMMID=99A3D1&RequestID=0&ProductID=1213750) |
|
||||
|
||||
**CPU Inference Engines (continue)**
|
||||
|
||||
@@ -213,7 +165,7 @@ Testing by Intel done on: see test date for each HW platform below.
|
||||
| Batch size | 1 | 1 |
|
||||
| Precision | INT8 | INT8 |
|
||||
| Number of concurrent inference requests | 4 | 4 |
|
||||
| Test Date | December 9, 2020 | December 9, 2020 |
|
||||
| Test Date | September 25, 2020 | September 25, 2020 |
|
||||
| Power dissipation, TDP in Watt | [9.5](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) | [65](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html#tab-blade-1-0-1)|
|
||||
| CPU Price on September 29, 2020, USD<br>Prices may vary | [34](https://ark.intel.com/content/www/us/en/ark/products/96485/intel-atom-x5-e3940-processor-2m-cache-up-to-1-80-ghz.html) | [117](https://ark.intel.com/content/www/us/en/ark/products/126688/intel-core-i3-8100-processor-6m-cache-3-60-ghz.html) |
|
||||
|
||||
@@ -221,7 +173,7 @@ Testing by Intel done on: see test date for each HW platform below.
|
||||
|
||||
**Accelerator Inference Engines**
|
||||
|
||||
| | Intel® Neural Compute Stick 2 | Intel® Vision Accelerator Design<br>with Intel® Movidius™ VPUs (Mustang-V100-MX8) |
|
||||
| | Intel® Neural Compute Stick 2 | Intel® Vision Accelerator Design<br>with Intel® Movidius™ VPUs (Uzel* UI-AR8) |
|
||||
| --------------------------------------- | ------------------------------------- | ------------------------------------- |
|
||||
| VPU | 1 X Intel® Movidius™ Myriad™ X MA2485 | 8 X Intel® Movidius™ Myriad™ X MA2485 |
|
||||
| Connection | USB 2.0/3.0 | PCIe X4 |
|
||||
@@ -229,7 +181,7 @@ Testing by Intel done on: see test date for each HW platform below.
|
||||
| Precision | FP16 | FP16 |
|
||||
| Number of concurrent inference requests | 4 | 32 |
|
||||
| Power dissipation, TDP in Watt | 2.5 | [30](https://www.mouser.com/ProductDetail/IEI/MUSTANG-V100-MX8-R10?qs=u16ybLDytRaZtiUUvsd36w%3D%3D) |
|
||||
| CPU Price, USD<br>Prices may vary | [69](https://ark.intel.com/content/www/us/en/ark/products/140109/intel-neural-compute-stick-2.html) (from December 9, 2020) | [214](https://www.arrow.com/en/products/mustang-v100-mx8-r10/iei-technology?gclid=Cj0KCQiA5bz-BRD-ARIsABjT4ng1v1apmxz3BVCPA-tdIsOwbEjTtqnmp_rQJGMfJ6Q2xTq6ADtf9OYaAhMUEALw_wcB) (from December 9, 2020) |
|
||||
| CPU Price, USD<br>Prices may vary | [69](https://ark.intel.com/content/www/us/en/ark/products/140109/intel-neural-compute-stick-2.html) (from September 29, 2020) | [768](https://www.mouser.com/ProductDetail/IEI/MUSTANG-V100-MX8-R10?qs=u16ybLDytRaZtiUUvsd36w%3D%3D) (from May 15, 2020) |
|
||||
| Host Computer | Intel® Core™ i7 | Intel® Core™ i5 |
|
||||
| Motherboard | ASUS* Z370-A II | Uzelinfo* / US-E1300 |
|
||||
| CPU | Intel® Core™ i7-8700 CPU @ 3.20GHz | Intel® Core™ i5-6600 CPU @ 3.30GHz |
|
||||
@@ -241,9 +193,9 @@ Testing by Intel done on: see test date for each HW platform below.
|
||||
| BIOS Vendor | American Megatrends Inc.* | American Megatrends Inc.* |
|
||||
| BIOS Version | 411 | 5.12 |
|
||||
| BIOS Release | September 21, 2018 | September 21, 2018 |
|
||||
| Test Date | December 9, 2020 | December 9, 2020 |
|
||||
| Test Date | September 25, 2020 | September 25, 2020 |
|
||||
|
||||
Please follow this link for more detailed configuration descriptions: [Configuration Details](https://docs.openvinotoolkit.org/resources/benchmark_files/system_configurations_2021.2.html)
|
||||
Please follow this link for more detailed configuration descriptions: [Configuration Details](https://docs.openvinotoolkit.org/resources/benchmark_files/system_configurations_2021.1.html)
|
||||
|
||||
\htmlonly
|
||||
<style>
|
||||
@@ -254,7 +206,7 @@ Please follow this link for more detailed configuration descriptions: [Configura
|
||||
<div class="opt-notice-wrapper">
|
||||
<p class="opt-notice">
|
||||
\endhtmlonly
|
||||
Results may vary. For workloads and configurations visit: [www.intel.com/PerformanceIndex](https://www.intel.com/PerformanceIndex) and [Legal Information](../Legal_Information.md).
|
||||
For more complete information about performance and benchmark results, visit: [www.intel.com/benchmarks](https://www.intel.com/benchmarks) and [Optimization Notice](https://software.intel.com/articles/optimization-notice). [Legal Information](../Legal_Information.md).
|
||||
\htmlonly
|
||||
</p>
|
||||
</div>
|
||||
|
||||
@@ -49,7 +49,7 @@ Intel partners with various vendors all over the world. Visit the [Intel® AI: I
|
||||
We published a set of guidelines and recommendations to optimize your models available in an [introductory](../IE_DG/Intro_to_Performance.md) guide and an [advanced](../optimization_guide/dldt_optimization_guide.md) guide. For further support, please join the conversation in the [Community Forum](https://software.intel.com/en-us/forums/intel-distribution-of-openvino-toolkit).
|
||||
|
||||
#### 9. Why are INT8 optimized models used for benchmarking on CPUs with no VNNI support?
|
||||
The benefit of low-precision optimization using the OpenVINO™ toolkit model optimizer extends beyond processors supporting VNNI through Intel® DL Boost. The reduced bit width of INT8 compared to FP32 allows Intel® CPU to process the data faster and thus offers better throughput on any converted model agnostic of the intrinsically supported low-precision optimizations within Intel® hardware. Please refer to [INT8 vs. FP32 Comparison on Select Networks and Platforms](performance_int8_vs_fp32.md) for comparison on boost factors for different network models and a selection of Intel® CPU architectures, including AVX-2 with Intel® Core™ i7-8700T, and AVX-512 (VNNI) with Intel® Xeon® 5218T and Intel® Xeon® 8270.
|
||||
The benefit of low-precision optimization using the OpenVINO™ toolkit model optimizer extends beyond processors supporting VNNI through Intel® DL Boost. The reduced bit width of INT8 compared to FP32 allows Intel® CPU to process the data faster and thus offers better throughput on any converted model agnostic of the intrinsically supported low-precision optimizations within Intel® hardware. Please refer to [INT8 vs. FP32 Comparison on Select Networks and Platforms](./performance_int8_vs_fp32.html) for comparison on boost factors for different network models and a selection of Intel® CPU architectures, including AVX-2 with Intel® Core™ i7-8700T, and AVX-512 (VNNI) with Intel® Xeon® 5218T and Intel® Xeon® 8270.
|
||||
|
||||
#### 10. Previous releases included benchmarks on googlenet-v1-CF (Caffe). Why is there no longer benchmarks on this neural network model?
|
||||
We replaced googlenet-v1-CF to resnet-18-pytorch due to changes in developer usage. The public model resnet-18 is used by many developers as an Image Classification model. This pre-optimized model was also trained on the ImageNet database, similar to googlenet-v1-CF. Both googlenet-v1-CF and resnet-18 will remain part of the Open Model Zoo. Developers are encouraged to utilize resnet-18-pytorch for Image Classification use cases.
|
||||
|
||||
@@ -9,36 +9,40 @@ The table below illustrates the speed-up factor for the performance gain by swit
|
||||
<th>Intel® Core™ <br>i7-8700T</th>
|
||||
<th>Intel® Xeon® <br>Gold <br>5218T</th>
|
||||
<th>Intel® Xeon® <br>Platinum <br>8270</th>
|
||||
<th>Intel® Core™ <br>i7-1185G7</th>
|
||||
<th>Intel® Core™ <br>i7-1065G7</th>
|
||||
<th>Intel® Core™ <br>i5-1145G7E</th>
|
||||
</tr>
|
||||
<tr align="left">
|
||||
<th>OpenVINO <br>benchmark <br>model name</th>
|
||||
<th>Dataset</th>
|
||||
<th colspan="3" align="center">Throughput speed-up FP16-INT8 vs FP32</th>
|
||||
<th colspan="4" align="center">Throughput speed-up FP16-INT8 vs FP32</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>bert-large-<br>uncased-whole-word-<br>masking-squad-0001</td>
|
||||
<td>SQuAD</td>
|
||||
<td>1.6</td>
|
||||
<td>2.7</td>
|
||||
<td>2.5</td>
|
||||
<td>2.0</td>
|
||||
<td>2.6</td>
|
||||
<td>N/A</td>
|
||||
<td>2.8</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>brain-tumor-<br>segmentation-<br>0001-MXNET</td>
|
||||
<td>BraTS</td>
|
||||
<td>1.5</td>
|
||||
<td>1.9</td>
|
||||
<td>1.7</td>
|
||||
<td>1.6</td>
|
||||
<td>1.9</td>
|
||||
<td>1.8</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>deeplabv3-TF</td>
|
||||
<td>VOC 2012<br>Segmentation</td>
|
||||
<td>1.5</td>
|
||||
<td>1.4</td>
|
||||
<td>2.4</td>
|
||||
<td>2.6</td>
|
||||
<td>2.8</td>
|
||||
<td>3.1</td>
|
||||
<td>2.9</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>densenet-121-TF</td>
|
||||
@@ -46,6 +50,7 @@ The table below illustrates the speed-up factor for the performance gain by swit
|
||||
<td>1.6</td>
|
||||
<td>3.2</td>
|
||||
<td>3.2</td>
|
||||
<td>3.0</td>
|
||||
<td>3.2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@@ -54,15 +59,17 @@ The table below illustrates the speed-up factor for the performance gain by swit
|
||||
<td>2.0</td>
|
||||
<td>3.6</td>
|
||||
<td>3.5</td>
|
||||
<td>3.4</td>
|
||||
<td>3.2</td>
|
||||
<td>3.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>faster_rcnn_<br>resnet50_coco-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>1.7</td>
|
||||
<td>3.5</td>
|
||||
<td>3.4</td>
|
||||
<td>3.4</td>
|
||||
<td>3.4</td>
|
||||
<td>3.6</td>
|
||||
<td>3.6</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>googlenet-v1-TF</td>
|
||||
@@ -71,6 +78,7 @@ The table below illustrates the speed-up factor for the performance gain by swit
|
||||
<td>3.6</td>
|
||||
<td>3.7</td>
|
||||
<td>3.5</td>
|
||||
<td>3.6</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>inception-v3-TF</td>
|
||||
@@ -78,38 +86,43 @@ The table below illustrates the speed-up factor for the performance gain by swit
|
||||
<td>1.8</td>
|
||||
<td>3.8</td>
|
||||
<td>4.0</td>
|
||||
<td>3.5</td>
|
||||
<td>3.7</td>
|
||||
<td>3.7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-<br>ssd-CF</td>
|
||||
<td>VOC2012</td>
|
||||
<td>1.5</td>
|
||||
<td>3.0</td>
|
||||
<td>3.3</td>
|
||||
<td>3.1</td>
|
||||
<td>3.6</td>
|
||||
<td>3.1</td>
|
||||
<td>3.3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v1-1.0-<br>224-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.5</td>
|
||||
<td>3.2</td>
|
||||
<td>4.1</td>
|
||||
<td>3.1</td>
|
||||
<td>3.9</td>
|
||||
<td>2.9</td>
|
||||
<td>3.2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v2-1.0-<br>224-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.3</td>
|
||||
<td>2.7</td>
|
||||
<td>4.3</td>
|
||||
<td>3.8</td>
|
||||
<td>2.2</td>
|
||||
<td>2.5</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v2-<br>pytorch</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.4</td>
|
||||
<td>2.8</td>
|
||||
<td>4.6</td>
|
||||
<td>2.6</td>
|
||||
<td>3.6</td>
|
||||
<td>2.3</td>
|
||||
<td>2.4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@@ -119,54 +132,61 @@ The table below illustrates the speed-up factor for the performance gain by swit
|
||||
<td>3.7</td>
|
||||
<td>3.8</td>
|
||||
<td>3.6</td>
|
||||
<td>3.6</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>resnet-50-<br>pytorch</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.8</td>
|
||||
<td>3.6</td>
|
||||
<td>3.9</td>
|
||||
<td>3.4</td>
|
||||
<td>3.8</td>
|
||||
<td>3.5</td>
|
||||
<td>3.6</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>resnet-50-<br>TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.8</td>
|
||||
<td>3.6</td>
|
||||
<td>3.9</td>
|
||||
<td>3.5</td>
|
||||
<td>3.8</td>
|
||||
<td>3.4</td>
|
||||
<td>4.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>squeezenet1.1-<br>CF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>1.6</td>
|
||||
<td>2.9</td>
|
||||
<td>3.4</td>
|
||||
<td>3.2</td>
|
||||
<td>3.0</td>
|
||||
<td>3.2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssd_mobilenet_<br>v1_coco-tf</td>
|
||||
<td>VOC2012</td>
|
||||
<td>1.6</td>
|
||||
<td>3.1</td>
|
||||
<td>3.7</td>
|
||||
<td>3.0</td>
|
||||
<td>3.4</td>
|
||||
<td>3.1</td>
|
||||
<td>3.3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssd300-CF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>1.8</td>
|
||||
<td>3.7</td>
|
||||
<td>3.7</td>
|
||||
<td>3.6</td>
|
||||
<td>3.8</td>
|
||||
<td>4.0</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssdlite_<br>mobilenet_<br>v2-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>1.4</td>
|
||||
<td>2.3</td>
|
||||
<td>3.9</td>
|
||||
<td>2.5</td>
|
||||
<td>3.1</td>
|
||||
<td>2.4</td>
|
||||
<td>2.6</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>yolo_v3-TF</td>
|
||||
@@ -174,7 +194,8 @@ The table below illustrates the speed-up factor for the performance gain by swit
|
||||
<td>1.8</td>
|
||||
<td>3.8</td>
|
||||
<td>3.9</td>
|
||||
<td>3.6</td>
|
||||
<td>3.7</td>
|
||||
<td>3.8</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
@@ -187,14 +208,21 @@ The following table shows the absolute accuracy drop that is calculated as the d
|
||||
<th></th>
|
||||
<th>Intel® Core™ <br>i9-10920X CPU<br>@ 3.50GHZ (VNNI)</th>
|
||||
<th>Intel® Core™ <br>i9-9820X CPU<br>@ 3.30GHz (AVX512)</th>
|
||||
<th>Intel® Core™ <br>i7-6700 CPU<br>@ 4.0GHz (AVX2)</th>
|
||||
<th>Intel® Core™ <br>i7-1185G7 CPU<br>@ 4.0GHz (TGL VNNI)</th>
|
||||
<th>Intel® Core™ <br>i7-8700 CPU<br>@ 3.20GHz (AVX2)</th>
|
||||
</tr>
|
||||
<tr align="left">
|
||||
<th>OpenVINO Benchmark <br>Model Name</th>
|
||||
<th>Dataset</th>
|
||||
<th>Metric Name</th>
|
||||
<th colspan="4" align="center">Absolute Accuracy Drop, %</th>
|
||||
<th colspan="3" align="center">Absolute Accuracy Drop, %</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>bert-large-<br>uncased-whole-word-<br>masking-squad-0001</td>
|
||||
<td>SQuAD</td>
|
||||
<td>F1</td>
|
||||
<td>0.65</td>
|
||||
<td>0.57</td>
|
||||
<td>0.83</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>brain-tumor-<br>segmentation-<br>0001-MXNET</td>
|
||||
@@ -202,26 +230,23 @@ The following table shows the absolute accuracy drop that is calculated as the d
|
||||
<td>Dice-index@ <br>Mean@ <br>Overall Tumor</td>
|
||||
<td>0.08</td>
|
||||
<td>0.08</td>
|
||||
<td>0.08</td>
|
||||
<td>0.08</td>
|
||||
<td>0.9</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>deeplabv3-TF</td>
|
||||
<td>VOC 2012<br>Segmentation</td>
|
||||
<td>mean_iou</td>
|
||||
<td>0.73</td>
|
||||
<td>1.10</td>
|
||||
<td>1.10</td>
|
||||
<td>0.73</td>
|
||||
<td>1.11</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>densenet-121-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.73</td>
|
||||
<td>0.72</td>
|
||||
<td>0.72</td>
|
||||
<td>0.73</td>
|
||||
<td>0.74</td>
|
||||
<td>0.74</td>
|
||||
<td>0.76</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>facenet-<br>20180408-<br>102900-TF</td>
|
||||
@@ -230,25 +255,22 @@ The following table shows the absolute accuracy drop that is calculated as the d
|
||||
<td>0.02</td>
|
||||
<td>0.02</td>
|
||||
<td>0.02</td>
|
||||
<td>0.47</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>faster_rcnn_<br>resnet50_coco-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>coco_<br>precision</td>
|
||||
<td>0.21</td>
|
||||
<td>0.20</td>
|
||||
<td>0.20</td>
|
||||
<td>0.21</td>
|
||||
<td>0.20</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>googlenet-v1-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.03</td>
|
||||
<td>0.01</td>
|
||||
<td>0.01</td>
|
||||
<td>0.03</td>
|
||||
<td>0.01</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>inception-v3-TF</td>
|
||||
@@ -257,7 +279,6 @@ The following table shows the absolute accuracy drop that is calculated as the d
|
||||
<td>0.03</td>
|
||||
<td>0.01</td>
|
||||
<td>0.01</td>
|
||||
<td>0.03</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-<br>ssd-CF</td>
|
||||
@@ -266,7 +287,6 @@ The following table shows the absolute accuracy drop that is calculated as the d
|
||||
<td>0.35</td>
|
||||
<td>0.34</td>
|
||||
<td>0.34</td>
|
||||
<td>0.35</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v1-1.0-<br>224-TF</td>
|
||||
@@ -275,25 +295,22 @@ The following table shows the absolute accuracy drop that is calculated as the d
|
||||
<td>0.27</td>
|
||||
<td>0.20</td>
|
||||
<td>0.20</td>
|
||||
<td>0.27</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v2-1.0-<br>224-TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.44</td>
|
||||
<td>0.92</td>
|
||||
<td>0.92</td>
|
||||
<td>0.44</td>
|
||||
<td>0.45</td>
|
||||
<td>0.94</td>
|
||||
<td>0.94</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mobilenet-v2-<br>PYTORCH</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.25</td>
|
||||
<td>7.42</td>
|
||||
<td>7.42</td>
|
||||
<td>0.25</td>
|
||||
<td>0.35</td>
|
||||
<td>0.63</td>
|
||||
<td>0.63</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>resnet-18-<br>pytorch</td>
|
||||
@@ -302,7 +319,6 @@ The following table shows the absolute accuracy drop that is calculated as the d
|
||||
<td>0.26</td>
|
||||
<td>0.25</td>
|
||||
<td>0.25</td>
|
||||
<td>0.26</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>resnet-50-<br>PYTORCH</td>
|
||||
@@ -311,65 +327,58 @@ The following table shows the absolute accuracy drop that is calculated as the d
|
||||
<td>0.18</td>
|
||||
<td>0.19</td>
|
||||
<td>0.19</td>
|
||||
<td>0.18</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>resnet-50-<br>TF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.15</td>
|
||||
<td>0.11</td>
|
||||
<td>0.11</td>
|
||||
<td>0.15</td>
|
||||
<td>0.10</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>squeezenet1.1-<br>CF</td>
|
||||
<td>ImageNet</td>
|
||||
<td>acc@top-1</td>
|
||||
<td>0.66</td>
|
||||
<td>0.64</td>
|
||||
<td>0.64</td>
|
||||
<td>0.66</td>
|
||||
<td>0.64</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssd_mobilenet_<br>v1_coco-tf</td>
|
||||
<td>VOC2012</td>
|
||||
<td>COCO mAp</td>
|
||||
<td>0.24</td>
|
||||
<td>3.07</td>
|
||||
<td>3.07</td>
|
||||
<td>0.24</td>
|
||||
<td>3.07</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssd300-CF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>COCO mAp</td>
|
||||
<td>0.06</td>
|
||||
<td>0.05</td>
|
||||
<td>0.05</td>
|
||||
<td>0.06</td>
|
||||
<td>0.05</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ssdlite_<br>mobilenet_<br>v2-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>COCO mAp</td>
|
||||
<td>0.14</td>
|
||||
<td>0.43</td>
|
||||
<td>0.43</td>
|
||||
<td>0.14</td>
|
||||
<td>0.47</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>yolo_v3-TF</td>
|
||||
<td>MS COCO</td>
|
||||
<td>COCO mAp</td>
|
||||
<td>0.12</td>
|
||||
<td>0.35</td>
|
||||
<td>0.35</td>
|
||||
<td>0.12</td>
|
||||
<td>0.20</td>
|
||||
<td>0.20</td>
|
||||
<td>0.36</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||

|
||||

|
||||
|
||||
\htmlonly
|
||||
<style>
|
||||
|
||||
@@ -37,15 +37,17 @@
|
||||
</tab>
|
||||
<tab type="user" title="Model Optimizations Techniques" url="@ref openvino_docs_MO_DG_prepare_model_Model_Optimization_Techniques"/>
|
||||
<tab type="user" title="Cutting off Parts of a Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Cutting_Model"/>
|
||||
<tab type="user" title="Sub-graph Replacement in Model Optimizer" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Subgraph_Replacement_Model_Optimizer"/>
|
||||
<tab type="usergroup" title="Sub-graph Replacement in Model Optimizer" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Subgraph_Replacement_Model_Optimizer">
|
||||
<tab type="user" title="[DEPRECATED] Case-Study: Converting SSD models created with the TensorFlow* Object Detection API" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_TensorFlow_SSD_ObjectDetection_API"/>
|
||||
<tab type="user" title="[DEPRECATED] Case-Study: Converting Faster R-CNN models created with the TensorFlow* Object Detection API" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_TensorFlow_Faster_RCNN_ObjectDetection_API"/>
|
||||
</tab>
|
||||
<tab type="user" title="Supported Framework Layers" url="@ref openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers"/>
|
||||
<tab type="user" title="[DEPRECATED] IR Notation Reference" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Legacy_IR_Layers_Catalog_Spec"/>
|
||||
<tab type="user" title="IR suitable for INT8 inference" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_IR_suitable_for_INT8_inference"/>
|
||||
</tab>
|
||||
<tab type="usergroup" title="Model Optimizer Extensibility" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer">
|
||||
<tab type="usergroup" title="Custom Layers in Model Optimizer" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Customize_Model_Optimizer">
|
||||
<tab type="user" title="Extending Model Optimizer with New Primitives" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_Model_Optimizer_with_New_Primitives"/>
|
||||
<tab type="user" title="Extending Model Optimizer with Caffe* Python Layers" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_Model_Optimizer_With_Caffe_Python_Layers"/>
|
||||
<tab type="user" title="Extending Model Optimizer for Custom MXNet* Operations" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_MXNet_Model_Optimizer_with_New_Primitives"/>
|
||||
<tab type="user" title="Extending MXNet Model Optimizer with New Primitives" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Extending_MXNet_Model_Optimizer_with_New_Primitives"/>
|
||||
<tab type="user" title="Legacy Mode for Caffe* Custom Layers" url="@ref openvino_docs_MO_DG_prepare_model_customize_model_optimizer_Legacy_Mode_for_Caffe_Custom_Layers"/>
|
||||
<tab type="user" title="[DEPRECATED] Offloading Sub-Graph Inference" url="https://docs.openvinotoolkit.org/2020.1/_docs_MO_DG_prepare_model_customize_model_optimizer_Offloading_Sub_Graph_Inference.html"/>
|
||||
</tab>
|
||||
@@ -55,10 +57,10 @@
|
||||
</tab>
|
||||
<!-- Model Downloader -->
|
||||
<tab id="model_downloader" type="user" title="Model Downloader" url="@ref omz_tools_downloader_README"/>
|
||||
<!-- Custom Operations Guide -->
|
||||
<tab type="usergroup" title="Custom Operations Guide" url="@ref openvino_docs_HOWTO_Custom_Layers_Guide"></tab>
|
||||
</tab>
|
||||
|
||||
<!-- Custom Layers Guide -->
|
||||
<tab type="usergroup" title="Custom Layers Guide" url="@ref openvino_docs_HOWTO_Custom_Layers_Guide"></tab>
|
||||
</tab>
|
||||
|
||||
<!-- Intermediate Representation and Operations Sets -->
|
||||
<tab id="intermediate_representaton_and_operations_sets" type="usergroup" title="Intermediate Representation and Operations Sets" url="@ref openvino_docs_MO_DG_IR_and_opsets">
|
||||
<tab type="usergroup" title="Available Operations Sets" url="@ref openvino_docs_ops_opset">
|
||||
@@ -79,7 +81,6 @@
|
||||
<tab type="user" title="Atan-1" url="@ref openvino_docs_ops_arithmetic_Atan_1"/>
|
||||
<tab type="user" title="Atanh-3" url="@ref openvino_docs_ops_arithmetic_Atanh_3"/>
|
||||
<tab type="user" title="AvgPool-1" url="@ref openvino_docs_ops_pooling_AvgPool_1"/>
|
||||
<tab type="user" title="BatchNormInference-1" url="@ref openvino_docs_ops_normalization_BatchNormInference_1"/>
|
||||
<tab type="user" title="BatchNormInference-5" url="@ref openvino_docs_ops_normalization_BatchNormInference_5"/>
|
||||
<tab type="user" title="BatchToSpace-2" url="@ref openvino_docs_ops_movement_BatchToSpace_2"/>
|
||||
<tab type="user" title="BinaryConvolution-1" url="@ref openvino_docs_ops_convolution_BinaryConvolution_1"/>
|
||||
@@ -260,7 +261,6 @@
|
||||
<tab type="usergroup" title="Utilities to Validate Your Converted Model" url="@ref openvino_inference_engine_tools_cross_check_tool_README">
|
||||
<tab type="user" title="Using Cross Check Tool for Per-Layer Comparison Between Plugins" url="@ref openvino_inference_engine_tools_cross_check_tool_README"/>
|
||||
</tab>
|
||||
<tab type="user" title="Introduction to OpenVINO state API" url="@ref openvino_docs_IE_DG_network_state_intro"/>
|
||||
<tab type="usergroup" title="Supported Devices" url="@ref openvino_docs_IE_DG_supported_plugins_Supported_Devices">
|
||||
<tab type="usergroup" title="GPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_CL_DNN">
|
||||
<tab type="user" title="RemoteBlob API of GPU Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_GPU_RemoteBlob_API"/>
|
||||
@@ -296,7 +296,7 @@
|
||||
|
||||
<!-- Compile Tool -->
|
||||
<tab type="user" title="Compile Tool" url="@ref openvino_inference_engine_tools_compile_tool_README"/>
|
||||
|
||||
|
||||
<!-- API References -->
|
||||
<tab id="api_references" type="usergroup" title="API References">
|
||||
<!-- IE C -->
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
</tab>
|
||||
<tab type="user" title="macOS" url="@ref openvino_docs_install_guides_installing_openvino_macos"/>
|
||||
<tab type="user" title="Raspbian OS" url="@ref openvino_docs_install_guides_installing_openvino_raspbian"/>
|
||||
<tab type="user" title="DL Workbench Installation Guide" url="./workbench_docs_Workbench_DG_Install_Workbench.html"/><!-- Link to the original Workbench topic -->
|
||||
</tab>
|
||||
<!-- Install From Images and Repositories -->
|
||||
<tab type="usergroup" title="Install From Images and Repositories" url="@ref openvino_docs_install_guides_installing_openvino_images">
|
||||
@@ -38,7 +39,6 @@
|
||||
<tab type="user" title="Linux" url="@ref openvino_docs_get_started_get_started_linux"/>
|
||||
<tab type="user" title="Windows" url="@ref openvino_docs_get_started_get_started_windows"/>
|
||||
<tab type="user" title="macOS" url="@ref openvino_docs_get_started_get_started_macos"/>
|
||||
<tab type="user" title="Raspbian" url="@ref openvino_docs_get_started_get_started_raspbian"/>
|
||||
<tab type="user" title="Get Started with OpenVINO via DL Workbench" url="@ref openvino_docs_get_started_get_started_dl_workbench"/>
|
||||
<tab type="user" title="Legal Information" url="@ref openvino_docs_Legal_Information"/>
|
||||
<tab type="user" title="Introduction to DL Workbench" url="./openvino_docs_get_started_get_started_dl_workbench.html"/><!-- Link to the original Workbench topic -->
|
||||
@@ -60,7 +60,6 @@
|
||||
<tab type="user" title="Introduction" url="@ref openvino_docs_security_guide_introduction"/>
|
||||
<tab type="user" title="Using DL Workbench Securely" url="@ref openvino_docs_security_guide_workbench"/>
|
||||
<tab type="user" title="Using Encrypted Models" url="@ref openvino_docs_IE_DG_protecting_model_guide"/>
|
||||
<tab type="user" title="Security Add-on" url="@ref ovsa_get_started"/>
|
||||
</tab>
|
||||
</tab>
|
||||
|
||||
@@ -77,7 +76,7 @@
|
||||
<!-- Performance Benchmarks -->
|
||||
<tab type="usergroup" title="Performance Measures" url="@ref openvino_docs_performance_benchmarks">
|
||||
<tab type="user" title="Performance Information Frequently Asked Questions" url="@ref openvino_docs_performance_benchmarks_faq"/>
|
||||
<tab type="user" title="Download Performance Data Spreadsheet in MS Excel* Format" url="https://docs.openvinotoolkit.org/downloads/benchmark_files/OV-2021.2-Download-Excel.xlsx"/>
|
||||
<tab type="user" title="Download Performance Data Spreadsheet in MS Excel* Format" url="https://docs.openvinotoolkit.org/downloads/benchmark_files/OV-2021.1-Download-Excel.xlsx"/>
|
||||
<tab type="user" title="INT8 vs. FP32 Comparison on Select Networks and Platforms" url="@ref openvino_docs_performance_int8_vs_fp32"/>
|
||||
</tab>
|
||||
<tab type="user" title="Performance Optimization Guide" url="@ref openvino_docs_optimization_guide_dldt_optimization_guide"/>
|
||||
@@ -152,8 +151,7 @@
|
||||
<tab type="user" title="Benchmark Sample" url="@ref gst_samples_benchmark_README"/>
|
||||
</tab>
|
||||
<tab type="usergroup" title="Add-Ons" url="">
|
||||
<tab type="user" title="Model Server" url="@ref openvino_docs_ovms"/>
|
||||
<tab type="user" title="Security Add-on" url="./ovsa_get_started.html"/>
|
||||
<tab type="user" title="Model Server" url="@ref openvino_docs_ovms"/>
|
||||
</tab>
|
||||
</tab>
|
||||
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6675f4b68df7eaa3d6188ecc8b5d53be572cf9c92f53abac3bc6416e6b428d0c
|
||||
size 196146
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:539deb67a7d1c0e8b0c037f8e7488445be0895e8e717bed5cfec64131936870c
|
||||
size 198207
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2925e58a71d684e23776e6ed55cc85d9085b3ba5e484720528aeac5fa59f9e3a
|
||||
size 55404
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f4a52661c05977d878c614c4f8510935982ce8a0e120e05690307d7c95e4ab31
|
||||
size 73999
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ddb0550f3f04c177ec116d6c41e6d3a2ac1fedea7121e10ad3836f84c86a5c78
|
||||
size 35278
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f1e329304ff3d586bb2b8e2442333ede085593f40b1567bd5250508d33d3b9f9
|
||||
size 32668
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:605515f25a746579d3622b7a274c7dece95e4fbfc6c1817f99431c1abf116070
|
||||
size 55409
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0ca48900ca8f6733c4a8ebc957517fbed80f3c080f53d251eeebb01f082c8f83
|
||||
size 55646
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ba94c2c0e0cb98b9e43c876d060d8a7965182461b0d505167eb71134d4975b8f
|
||||
size 58204
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:75628b7d02f1fe5c25a233fa16ae1c6c3d5060bf3d15bc7b1e5b9ea71ce50b73
|
||||
size 50227
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:72ab36115cecfee4b215e1b21911ebac3706e513b72eea7bb829932f7bdb3a19
|
||||
size 70515
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:70aee6f0fd30c8e2139950c6bc831dc11b2616ea8f04b991efc9b3f5b7b11ce6
|
||||
size 88891
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c1e297da7f7dfd2af7a0ba47ba1e5c14376f21b15dfcde1fe6f5ad3412ad8feb
|
||||
size 21296
|
||||
@@ -1,140 +0,0 @@
|
||||
# Get Started with OpenVINO™ Toolkit via Deep Learning Workbench {#openvino_docs_get_started_get_started_dl_workbench}
|
||||
|
||||
The OpenVINO™ toolkit optimizes and runs Deep Learning Neural Network models on Intel® hardware. This guide helps you get started with the OpenVINO™ toolkit via the Deep Learning Workbench (DL Workbench) on Linux\*, Windows\*, or macOS\*.
|
||||
|
||||
In this guide, you will:
|
||||
* Learn the OpenVINO™ inference workflow.
|
||||
* Start DL Workbench on Linux. Links to instructions for other operating systems are provided as well.
|
||||
* Create a project and run a baseline inference.
|
||||
|
||||
[DL Workbench](@ref workbench_docs_Workbench_DG_Introduction) is a web-based graphical environment that enables you to easily use various sophisticated
|
||||
OpenVINO™ toolkit components:
|
||||
* [Model Downloader](@ref omz_tools_downloader_README) to download models from the [Intel® Open Model Zoo](@ref omz_models_intel_index)
|
||||
with pretrained models for a range of different tasks
|
||||
* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) to transform models into
|
||||
the Intermediate Representation (IR) format
|
||||
* [Post-Training Optimization toolkit](@ref pot_README) to calibrate a model and then execute it in the
|
||||
INT8 precision
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker_README) to determine the accuracy of a model
|
||||
* [Benchmark Tool](@ref openvino_inference_engine_samples_benchmark_app_README) to estimate inference performance on supported devices
|
||||
|
||||

|
||||
|
||||
DL Workbench supports the following scenarios:
|
||||
1. [Calibrate the model in INT8 precision](@ref workbench_docs_Workbench_DG_Int_8_Quantization)
|
||||
2. [Find the best combination](@ref workbench_docs_Workbench_DG_View_Inference_Results) of inference parameters: [number of streams and batches](../optimization_guide/dldt_optimization_guide.md)
|
||||
3. [Analyze inference results](@ref workbench_docs_Workbench_DG_Visualize_Model) and [compare them across different configurations](@ref workbench_docs_Workbench_DG_Compare_Performance_between_Two_Versions_of_Models)
|
||||
4. [Implement an optimal configuration into your application](@ref workbench_docs_Workbench_DG_Deploy_and_Integrate_Performance_Criteria_into_Application)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Prerequisite | Linux* | Windows* | macOS*
|
||||
:----- | :----- |:----- |:-----
|
||||
Operating system|Ubuntu\* 18.04. Other Linux distributions, such as Ubuntu\* 16.04 and CentOS\* 7, are not validated.|Windows\* 10 | macOS\* 10.15 Catalina
|
||||
CPU | Intel® Core™ i5| Intel® Core™ i5 | Intel® Core™ i5
|
||||
GPU| Intel® Pentium® processor N4200/5 with Intel® HD Graphics | Not supported| Not supported
|
||||
HDDL, Myriad| Intel® Neural Compute Stick 2 <br> Intel® Vision Accelerator Design with Intel® Movidius™ VPUs| Not supported | Not supported
|
||||
Available RAM space| 4 GB| 4 GB| 4 GB
|
||||
Available storage space | 8 GB + space for imported artifacts| 8 GB + space for imported artifacts| 8 GB + space for imported artifacts
|
||||
Docker\*| Docker CE 18.06.1 | Docker Desktop 2.1.0.1|Docker CE 18.06.1
|
||||
Web browser| Google Chrome\* 76 <br> Browsers like Mozilla Firefox\* 71 or Apple Safari\* 12 are not validated. <br> Microsoft Internet Explorer\* is not supported.| Google Chrome\* 76 <br> Browsers like Mozilla Firefox\* 71 or Apple Safari\* 12 are not validated. <br> Microsoft Internet Explorer\* is not supported.| Google Chrome\* 76 <br>Browsers like Mozilla Firefox\* 71 or Apple Safari\* 12 are not validated. <br> Microsoft Internet Explorer\* is not supported.
|
||||
Resolution| 1440 x 890|1440 x 890|1440 x 890
|
||||
Internet|Optional|Optional|Optional
|
||||
Installation method| From Docker Hub <br> From OpenVINO™ toolkit package|From Docker Hub|From Docker Hub
|
||||
|
||||
## Start DL Workbench
|
||||
|
||||
This section provides instructions to run the DL Workbench on Linux from Docker Hub.
|
||||
|
||||
Use the command below to pull the latest Docker image with the application and run it:
|
||||
|
||||
```bash
|
||||
wget https://raw.githubusercontent.com/openvinotoolkit/workbench_aux/master/start_workbench.sh && bash start_workbench.sh
|
||||
```
|
||||
DL Workbench uses [authentication tokens](@ref workbench_docs_Workbench_DG_Authentication) to access the application. A token
|
||||
is generated automatically and displayed in the console output when you run the container for the first time. Once the command is executed, follow the link with the token. The **Get Started** page opens:
|
||||

|
||||
|
||||
For details and more installation options, visit the links below:
|
||||
* [Install DL Workbench from Docker Hub* on Linux* OS](@ref workbench_docs_Workbench_DG_Install_from_DockerHub_Linux)
|
||||
* [Install DL Workbench from Docker Hub on Windows*](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub_Win)
|
||||
* [Install DL Workbench from Docker Hub on macOS*](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub_mac)
|
||||
* [Install DL Workbench from the OpenVINO toolkit package on Linux](@ref workbench_docs_Workbench_DG_Install_from_Package)
|
||||
|
||||
## <a name="workflow-overview"></a>OpenVINO™ DL Workbench Workflow Overview
|
||||
|
||||
The simplified OpenVINO™ DL Workbench workflow is:
|
||||
1. **Get a trained model** for your inference task. Example inference tasks: pedestrian detection, face detection, vehicle detection, license plate recognition, head pose.
|
||||
2. **Run the trained model through the Model Optimizer** to convert the model to an Intermediate Representation, which consists of a pair of `.xml` and `.bin` files that are used as the input for Inference Engine.
|
||||
3. **Run inference against the Intermediate Representation** (optimized model) and output inference results.
|
||||
|
||||
## Run Baseline Inference
|
||||
|
||||
This section illustrates a sample use case of how to infer a pretrained model from the [Intel® Open Model Zoo](@ref omz_models_intel_index) with an autogenerated noise dataset on a CPU device.
|
||||
|
||||
<iframe width="560" height="315" src="https://www.youtube.com/embed/9TRJwEmY0K4" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
|
||||
|
||||
Once you log in to the DL Workbench, create a project, which is a combination of a model, a dataset, and a target device. Follow the steps below:
|
||||
|
||||
### Step 1. Open a New Project
|
||||
|
||||
On the the **Active Projects** page, click **Create** to open the **Create Project** page:
|
||||

|
||||
|
||||
### Step 2. Choose a Pretrained Model
|
||||
|
||||
Click **Import** next to the **Model** table on the **Create Project** page. The **Import Model** page opens. Select the squeezenet1.1 model from the Open Model Zoo and click **Import**.
|
||||

|
||||
|
||||
### Step 3. Convert the Model into Intermediate Representation
|
||||
|
||||
The **Convert Model to IR** tab opens. Keep the FP16 precision and click **Convert**.
|
||||

|
||||
|
||||
You are directed back to the **Create Project** page where you can see the status of the chosen model.
|
||||

|
||||
|
||||
### Step 4. Generate a Noise Dataset
|
||||
|
||||
Scroll down to the **Validation Dataset** table. Click **Generate** next to the table heading.
|
||||

|
||||
|
||||
The **Autogenerate Dataset** page opens. Click **Generate**.
|
||||

|
||||
|
||||
You are directed back to the **Create Project** page where you can see the status of the dataset.
|
||||

|
||||
|
||||
### Step 5. Create the Project and Run a Baseline Inference
|
||||
|
||||
On the **Create Project** page, select the imported model, CPU target, and the generated dataset. Click **Create**.
|
||||

|
||||
|
||||
The inference starts and you cannot proceed until it is done.
|
||||

|
||||
|
||||
Once the inference is complete, the **Projects** page opens automatically. Find your inference job in the **Projects Settings** table indicating all jobs.
|
||||

|
||||
|
||||
Congratulations, you have performed your first inference in the OpenVINO DL Workbench. Now you can proceed to:
|
||||
* [Select the inference](@ref workbench_docs_Workbench_DG_Run_Single_Inference)
|
||||
* [Visualize statistics](@ref workbench_docs_Workbench_DG_Visualize_Model)
|
||||
* [Experiment with model optimization](@ref workbench_docs_Workbench_DG_Int_8_Quantization)
|
||||
and inference options to profile the configuration
|
||||
|
||||
For detailed instructions to create a new project, visit the links below:
|
||||
* [Select a model](@ref workbench_docs_Workbench_DG_Select_Model)
|
||||
* [Select a dataset](@ref workbench_docs_Workbench_DG_Select_Datasets)
|
||||
* [Select a target and an environment](@ref workbench_docs_Workbench_DG_Select_Environment). This can be your local workstation or a remote target. If you use a remote target, [register the remote machine](@ref workbench_docs_Workbench_DG_Add_Remote_Target) first.
|
||||
|
||||
## Additional Resources
|
||||
|
||||
* [OpenVINO™ Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
|
||||
* [OpenVINO™ Toolkit Overview](../index.md)
|
||||
* [DL Workbench Installation Guide](@ref workbench_docs_Workbench_DG_Install_Workbench)
|
||||
* [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
|
||||
* [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
* [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md)
|
||||
* [Overview of OpenVINO™ Toolkit Pre-Trained Models](https://software.intel.com/en-us/openvino-toolkit/documentation/pretrained-models)
|
||||
* [OpenVINO™ Hello World Face Detection Exercise](https://github.com/intel-iot-devkit/inference-tutorials-generic)
|
||||
@@ -1,109 +0,0 @@
|
||||
# Get Started with OpenVINO™ Toolkit on Raspbian* OS {#openvino_docs_get_started_get_started_raspbian}
|
||||
|
||||
The OpenVINO™ toolkit optimizes and runs Deep Learning Neural Network models on Intel® hardware. This guide helps you get started with the OpenVINO™ toolkit you installed on Raspbian* OS.
|
||||
|
||||
In this guide, you will:
|
||||
* Learn the OpenVINO™ inference workflow.
|
||||
* Build and run sample code using detailed instructions.
|
||||
|
||||
## <a name="openvino-components"></a>OpenVINO™ Toolkit Components
|
||||
On Raspbian* OS, the OpenVINO™ toolkit consists of the following components:
|
||||
* **Inference Engine:** The software libraries that run inference against the Intermediate Representation (optimized model) to produce inference results.
|
||||
* **MYRIAD Plugin:** The plugin developed for inference of neural networks on Intel® Neural Compute Stick 2.
|
||||
|
||||
> **NOTE**:
|
||||
> * The OpenVINO™ package for Raspberry* does not include the [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). To convert models to Intermediate Representation (IR), you need to install it separately to your host machine.
|
||||
> * The package does not include the Open Model Zoo demo applications. You can download them separately from the [Open Models Zoo repository](https://github.com/opencv/open_model_zoo).
|
||||
|
||||
In addition, [code samples](../IE_DG/Samples_Overview.md) are provided to help you get up and running with the toolkit.
|
||||
|
||||
## <a name="openvino-installation"></a>Intel® Distribution of OpenVINO™ Toolkit Directory Structure
|
||||
This guide assumes you completed all Intel® Distribution of OpenVINO™ toolkit installation and configuration steps. If you have not yet installed and configured the toolkit, see [Install Intel® Distribution of OpenVINO™ toolkit for Raspbian*](../install_guides/installing-openvino-raspbian.md).
|
||||
|
||||
The OpenVINO toolkit for Raspbian* OS is distributed without installer. This document refers to the directory to which you unpacked the toolkit package as `<INSTALL_DIR>`.
|
||||
|
||||
The primary tools for deploying your models and applications are installed to the `<INSTALL_DIR>/deployment_tools` directory.
|
||||
<details>
|
||||
<summary><strong>Click for the <code>deployment_tools</code> directory structure</strong></summary>
|
||||
|
||||
|
||||
| Directory | Description |
|
||||
|:----------------------------------------|:--------------------------------------------------------------------------------------|
|
||||
| `inference_engine/` | Inference Engine directory. Contains Inference Engine API binaries and source files, samples and extensions source files, and resources like hardware drivers.|
|
||||
| `external/` | Third-party dependencies and drivers.|
|
||||
| `include/` | Inference Engine header files. For API documentation, see the [Inference Engine API Reference](./annotated.html). |
|
||||
| `lib/` | Inference Engine libraries.|
|
||||
| `samples/` | Inference Engine samples. Contains source code for C++ and Python* samples and build scripts. See the [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md). |
|
||||
| `share/` | CMake configuration files for linking with Inference Engine.|
|
||||
|
||||
</details>
|
||||
|
||||
## <a name="workflow-overview"></a>OpenVINO™ Workflow Overview
|
||||
|
||||
The OpenVINO™ workflow on Raspbian* OS is as follows:
|
||||
1. **Get a pre-trained model** for your inference task. If you want to use your model for inference, the model must be converted to the `.bin` and `.xml` Intermediate Representation (IR) files, which are used as input by Inference Engine. On Raspberry PI, OpenVINO™ toolkit includes only the Inference Engine module. The Model Optimizer is not supported on this platform. To get the optimized models you can use one of the following options:
|
||||
|
||||
* Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using [Model Downloader tool](@ref omz_tools_downloader_README).
|
||||
<br> For more information on pre-trained models, see [Pre-Trained Models Documentation](@ref omz_models_intel_index)
|
||||
|
||||
* Convert a model using the Model Optimizer from a full installation of Intel® Distribution of OpenVINO™ toolkit on one of the supported platforms. Installation instructions are available:
|
||||
* [Installation Guide for macOS*](../install_guides/installing-openvino-macos.md)
|
||||
* [Installation Guide for Windows*](../install_guides/installing-openvino-windows.md)
|
||||
* [Installation Guide for Linux*](../install_guides/installing-openvino-linux.md)
|
||||
2. **Use the Inference Engine API in the application** to run inference against the Intermediate Representation (optimized model) and output inference results. The application can be an OpenVINO™ sample or your own application.
|
||||
|
||||
## <a name="using-sample"></a>Build and Run Code Samples
|
||||
|
||||
Follow the steps below to run pre-trained Face Detection network using Inference Engine samples from the OpenVINO toolkit.
|
||||
|
||||
1. Create a samples build directory. This example uses a directory named `build`:
|
||||
```sh
|
||||
mkdir build && cd build
|
||||
```
|
||||
2. Build the Object Detection Sample with the following command:
|
||||
```sh
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-march=armv7-a" /opt/intel/openvino/deployment_tools/inference_engine/samples/cpp
|
||||
```
|
||||
```sh
|
||||
make -j2 object_detection_sample_ssd
|
||||
```
|
||||
3. Download the pre-trained Face Detection model with the Model Downloader:
|
||||
|
||||
```sh
|
||||
git clone --depth 1 https://github.com/openvinotoolkit/open_model_zoo
|
||||
cd open_model_zoo/tools/downloader
|
||||
python3 -m pip install -r requirements.in
|
||||
python3 downloader.py --name face-detection-adas-0001
|
||||
```
|
||||
|
||||
4. Run the sample, specifying the model and path to the input image:
|
||||
```sh
|
||||
./armv7l/Release/object_detection_sample_ssd -m face-detection-adas-0001.xml -d MYRIAD -i <path_to_image>
|
||||
```
|
||||
The application outputs an image (`out_0.bmp`) with detected faced enclosed in rectangles.
|
||||
|
||||
## <a name="basic-guidelines-sample-application"></a>Basic Guidelines for Using Code Samples
|
||||
|
||||
Following are some basic guidelines for executing the OpenVINO™ workflow using the code samples:
|
||||
|
||||
1. Before using the OpenVINO™ samples, always set up the environment:
|
||||
```sh
|
||||
source <INSTALL_DIR>/bin/setupvars.sh
|
||||
```
|
||||
2. Have the directory path for the following:
|
||||
- Code Sample binaries
|
||||
- Media: Video or image. Many sources are available from which you can download video media to use the code samples and demo applications, like https://videos.pexels.com and https://images.google.com.
|
||||
- Model in the IR format (.bin and .xml files).
|
||||
|
||||
|
||||
## Additional Resources
|
||||
|
||||
Use these resources to learn more about the OpenVINO™ toolkit:
|
||||
|
||||
* [OpenVINO™ Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
|
||||
* [OpenVINO™ Toolkit Overview](../index.md)
|
||||
* [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
|
||||
* [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
* [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md)
|
||||
* [Overview of OpenVINO™ Toolkit Pre-Trained Models](https://software.intel.com/en-us/openvino-toolkit/documentation/pretrained-models)
|
||||
* [OpenVINO™ Hello World Face Detection Exercise](https://github.com/intel-iot-devkit/inference-tutorials-generic)
|
||||
@@ -12,8 +12,8 @@
|
||||
* [Introducing int8 quantization for fast CPU inference using OpenVINO](https://www.intel.ai/introducing-int8-quantization-for-fast-cpu-inference-using-openvino/)
|
||||
* [Accelerate Vision-based AI with Intel® Distribution of OpenVINO™ Toolkit](https://www.intel.ai/accelerate-vision-based-ai-with-intel-distribution-of-openvino-toolkit/)
|
||||
|
||||
## Custom Operations Guide
|
||||
To learn about what is *custom operation* and how to work with them in the Deep Learning Deployment Toolkit, see the [Custom Operations Guide](../HOWTO/Custom_Layers_Guide.md).
|
||||
## Custom Layers Guide
|
||||
To learn about what is *custom layers* and how to work with them in the Deep Learning Deployment Toolkit, see the [Custom Layers Guide](../HOWTO/Custom_Layers_Guide.md).
|
||||
|
||||
## Introducing OpenVINO™ and Computer Vision | IoT Developer Show Season 2 | Intel Software
|
||||
|
||||
@@ -65,4 +65,4 @@ To learn about what is *custom operation* and how to work with them in the Deep
|
||||
[performance-boost-dl]: ../img/performance-boost-DL-algorithm.jpg
|
||||
[digital-security-surveillance]: ../img/digital-security-surveillance.jpg
|
||||
[robotics-with-AI]: ../img/robotics-with-AI.jpg
|
||||
[people-counter-syestem]: ../img/people-counter-syestem.jpg
|
||||
[people-counter-syestem]: ../img/people-counter-syestem.jpg
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1e432a8beb8290adec0d498f5c4aff63eed36c7fdf4bade3db0e0b0bdc5ff70f
|
||||
size 26737
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bb7beec6a00edbde13eca331d513f72b35b442f41f3a0c0e84f5185a1dcbf9ec
|
||||
size 50220
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bae3a4992d6d4f157674496b6230ae68f0cc33a58e4f305eeb935c09b5278409
|
||||
size 28518
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c9bc3815efa5c7a2e2b3db4bfdc229b991c86346b086a74e5b02fa512cb3811f
|
||||
size 26772
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:22f39675a2dab15ff466094d623444ae2bf3d9447113bc4528e19ef275932260
|
||||
size 51382
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:86866c2c0a192eaaa78875ef324334d2d50eb488aa5a6ed980cb3b0d21f35953
|
||||
size 26142
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0540bb419eacf351d9e311c8b25c057c0c02f070b8615ae351e7cd21e3a9e020
|
||||
size 25934
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1fd333909d651648152c881122cdc6261e40737a96b2ef4eb291cce9da84e713
|
||||
size 133129
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0109b9cbc2908f786f6593de335c725f8ce5c800f37a7d79369408cc47eb8471
|
||||
size 25725
|
||||
oid sha256:304869bcbea000f6dbf46dee7900ff01aa61a75a3787969cc307f2f54d57263c
|
||||
size 32185
|
||||
|
||||
@@ -66,7 +66,7 @@ In case of proxy issues, please use an offline installer for Build Tools (follow
|
||||
|
||||
## Run the Docker* Image for CPU
|
||||
|
||||
To install the OpenVINO toolkit from the prepared Docker image, run the image with the following command:
|
||||
To install the OpenVINO toolkit from the prepared Docker image, run the image with the following command (currently support only CPU target):
|
||||
~~~
|
||||
docker run -it --rm <image_name>
|
||||
~~~
|
||||
@@ -76,64 +76,6 @@ If you want to try some demos then run image with the root privileges (some addi
|
||||
docker run -itu ContainerAdministrator --rm <image_name> cmd /S /C "cd deployment_tools\demo && demo_security_barrier_camera.bat -d CPU -sample-options -no_show"
|
||||
~~~
|
||||
|
||||
## Build and Run the Docker* Image for GPU
|
||||
|
||||
GPU Acceleration in Windows containers feature requires to meet Windows host, OpenVINO toolkit and Docker* requirements:
|
||||
* [Windows requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration):
|
||||
* The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher.
|
||||
* The container base image must be `mcr.microsoft.com/windows:1809` or higher. Windows Server Core and Nano Server container images are not currently supported.
|
||||
* The container host must be running Docker Engine 19.03 or higher.
|
||||
* The container host must have GPU running display drivers of version WDDM 2.5 or higher.
|
||||
* [OpenVINO™ GPU requirement](https://docs.openvinotoolkit.org/latest/openvino_docs_install_guides_installing_openvino_windows.html#Install-GPU):
|
||||
* Intel Graphics Driver for Windows of version 15.65 or higher.
|
||||
* [Docker isolation mode requirement](https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container):
|
||||
* Windows host and container version tags must match.
|
||||
* [Windows host and container isolation process support](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility)
|
||||
|
||||
## Build a Docker* Image for Your Host System
|
||||
|
||||
1. Reuse one of [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles). You can also use your own Dockerfile.
|
||||
2. Check your [Windows host and container isolation process compatibility](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility).
|
||||
3. Find the appropriate Windows container base image on [DockerHub*](https://hub.docker.com/_/microsoft-windows) and set up your host/container version in the `FROM` Dockerfile instruction.
|
||||
For example, in [openvino_c_dev_2021.dockerfile](https://github.com/openvinotoolkit/docker_ci/blob/master/dockerfiles/winserver2019/openvino_c_dev_2021.dockerfile), change:
|
||||
~~~
|
||||
FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base
|
||||
~~~
|
||||
to
|
||||
~~~
|
||||
FROM mcr.microsoft.com/windows:20H2
|
||||
~~~
|
||||
4. Build the Docker image
|
||||
~~~
|
||||
docker build --build-arg package_url=<OpenVINO pkg> -f <Dockerfile> -t <image_name> .
|
||||
~~~
|
||||
5. Copy `OpenCL.dll` from your `C:\Windows\System32` host folder to any `temp` directory:
|
||||
~~~
|
||||
mkdir C:\tmp
|
||||
copy C:\Windows\System32\OpenCL.dll C:\tmp
|
||||
~~~
|
||||
|
||||
## Run the Docker* Image for GPU
|
||||
|
||||
1. To try inference on a GPU, run the image with the following command:
|
||||
~~~
|
||||
docker run -it --rm -u ContainerAdministrator --isolation process --device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599 -v C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409:C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409 -v C:\tmp:C:\tmp <image_name>
|
||||
~~~
|
||||
where
|
||||
* `--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599` is a reserved interface class GUID for a GPU device.
|
||||
* `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409` is the path to OpenCL driver home directory. To find it on your PC, run the `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*` regular expression.
|
||||
* `C:\tmp` is the folder with the copy of `OpenCL.dll` from your `C:\Windows\System32` host folder.
|
||||
|
||||
2. Copy `OpenCL.dll` to the `C:\Windows\System32` folder inside the container and set appropriate registry entry. Now you can run inference on a GPU device:
|
||||
~~~
|
||||
copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0
|
||||
~~~
|
||||
3. For example, run the `demo_security_barrier_camera` demo with the command below:
|
||||
~~~
|
||||
cd bin && setupvars.bat && cd ../ && cd deployment_tools\demo && demo_security_barrier_camera.bat -d GPU -sample-options -no_show
|
||||
~~~
|
||||
> **NOTE**: Addittional third-party dependencies will be installed.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
If you got proxy issues, please setup proxy settings for Docker. See the Proxy section in the [Install the DL Workbench from Docker Hub* ](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) topic.
|
||||
|
||||
@@ -31,19 +31,6 @@ The Intel® Distribution of OpenVINO™ toolkit for Linux\*:
|
||||
| [Documentation for Pre-Trained Models ](@ref omz_models_intel_index) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo). |
|
||||
| Deep Learning Streamer (DL Streamer) | Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. For the DL Streamer documentation, see [DL Streamer Samples](@ref gst_samples_README), [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/), [Elements](https://github.com/opencv/gst-video-analytics/wiki/Elements), [Tutorial](https://github.com/opencv/gst-video-analytics/wiki/DL%20Streamer%20Tutorial). |
|
||||
|
||||
**Could Be Optionally Installed**
|
||||
|
||||
[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture
|
||||
configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components:
|
||||
* [Model Downloader](@ref omz_tools_downloader_README)
|
||||
* [Intel® Open Model Zoo](@ref omz_models_intel_index)
|
||||
* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
* [Post-training Optimization Tool](@ref pot_README)
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker_README)
|
||||
* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md)
|
||||
|
||||
Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started.
|
||||
|
||||
## System Requirements
|
||||
|
||||
**Hardware**
|
||||
@@ -84,8 +71,7 @@ This guide provides step-by-step instructions on how to install the Intel® Dist
|
||||
8. <a href="#install-VPU">Steps for Intel® Vision Accelerator Design with Intel® Movidius™ VPU</a><br>
|
||||
After installing your Intel® Movidius™ VPU, you will return to this guide to complete OpenVINO™ installation.
|
||||
9. <a href="#run-a-sample">Run a Sample Application</a>
|
||||
10. <a href="#uninstall">Uninstall the Intel® Distribution of OpenVINO™ Toolkit.</a>
|
||||
11. <a href="#Hello-World-Face-Detection-Tutorial">Use the Face Detection Tutorial</a>
|
||||
10. <a href="#Hello-World-Face-Detection-Tutorial">Use the Face Detection Tutorial</a>
|
||||
|
||||
## <a name="install-openvino"></a>Install the Intel® Distribution of OpenVINO™ Toolkit Core Components
|
||||
|
||||
@@ -116,7 +102,7 @@ toolkit installed, rename or delete these two directories:
|
||||
|
||||
**Installation Notes:**
|
||||
- Choose an installation option and run the related script as root.
|
||||
- You can use either a GUI installation wizard or command line instructions (CLI).
|
||||
- You can use either a GUI installation wizard or command-line instructions (CLI).
|
||||
- Screenshots are provided for the GUI, but not for CLI. The following information also applies to CLI and will be helpful to your installation where you will be presented with the same choices and tasks.
|
||||
|
||||
5. Choose your installation option:
|
||||
@@ -124,11 +110,11 @@ toolkit installed, rename or delete these two directories:
|
||||
```sh
|
||||
sudo ./install_GUI.sh
|
||||
```
|
||||
- **Option 2:** Command Line Instructions:
|
||||
- **Option 2:** Command-Line Instructions:
|
||||
```sh
|
||||
sudo ./install.sh
|
||||
```
|
||||
- **Option 3:** Command Line Silent Instructions:
|
||||
- **Option 3:** Command-Line Silent Instructions:
|
||||
```sh
|
||||
sudo sed -i 's/decline/accept/g' silent.cfg
|
||||
sudo ./install.sh -s silent.cfg
|
||||
@@ -142,15 +128,16 @@ messages such as the following in case you must complete additional
|
||||
steps:
|
||||

|
||||
|
||||
7. If you select the default options, the **Installation summary** GUI screen looks like this:
|
||||
7. If you select the default options, the **Installation summary** GUI screen
|
||||
looks like this:
|
||||

|
||||
**Optional:** You can choose **Customize** to change the installation directory or the components you want to install:
|
||||

|
||||
By default, the Intel® Distribution of OpenVINO™ is installed to the following directory, referred to as `<INSTALL_DIR>`:
|
||||
- For root or administrator: `/opt/intel/openvino_<version>/`
|
||||
- For regular users: `/home/<USER>/intel/openvino_<version>/`
|
||||
For simplicity, a symbolic link to the latest installation is also created: `/opt/intel/openvino_2021/`.
|
||||
- **Optional:** You can choose **Customize** to change the installation directory or the components you want to install:
|
||||

|
||||
When installed as **root** the default installation directory for the Intel Distribution of OpenVINO is
|
||||
`/opt/intel/openvino_<version>/`.<br>
|
||||
For simplicity, a symbolic link to the latest installation is also created: `/opt/intel/openvino_2021/`.
|
||||
> **NOTE**: The Intel® Media SDK component is always installed in the `/opt/intel/mediasdk` directory regardless of the OpenVINO installation path chosen.
|
||||
|
||||
8. A Complete screen indicates that the core components have been installed:
|
||||
|
||||

|
||||
@@ -336,10 +323,11 @@ cd /opt/intel/openvino_2021/install_dependencies/
|
||||
```sh
|
||||
sudo -E su
|
||||
```
|
||||
3. Install the **Intel® Graphics Compute Runtime for OpenCL™** driver components required to use the GPU plugin and write custom layers for Intel® Integrated Graphics. The drivers are not included in the package, to install it, make sure you have the internet connection and run the installation script:
|
||||
3. Install the **Intel® Graphics Compute Runtime for OpenCL™** driver components required to use the GPU plugin and write custom layers for Intel® Integrated Graphics. Run the installation script:
|
||||
```sh
|
||||
./install_NEO_OCL_driver.sh
|
||||
```
|
||||
The drivers are not included in the package and the script downloads them. Make sure you have the internet connection for this step.<br>
|
||||
The script compares the driver version on the system to the current version. If the driver version on the system is higher or equal to the current version, the script does
|
||||
not install a new driver. If the version of the driver is lower than the current version, the script uninstalls the lower and installs the current version with your permission:
|
||||

|
||||
@@ -347,8 +335,7 @@ not install a new driver. If the version of the driver is lower than the current
|
||||
```sh
|
||||
Add OpenCL user to video group
|
||||
```
|
||||
Ignore this suggestion and continue.<br>You can also find the most recent version of the driver, installation procedure and other information in the [https://github.com/intel/compute-runtime/](https://github.com/intel/compute-runtime/) repository.
|
||||
|
||||
Ignore this suggestion and continue.
|
||||
4. **Optional** Install header files to allow compiling a new code. You can find the header files at [Khronos OpenCL™ API Headers](https://github.com/KhronosGroup/OpenCL-Headers.git).
|
||||
|
||||
## <a name="additional-NCS-steps"></a>Steps for Intel® Neural Compute Stick 2
|
||||
@@ -454,32 +441,6 @@ Congratulations, you have finished the installation of the Intel® Distribution
|
||||
|
||||
See the [OpenVINO™ Hello World Face Detection Exercise](https://github.com/intel-iot-devkit/inference-tutorials-generic).
|
||||
|
||||
## <a name="uninstall"></a>Uninstall the Intel® Distribution of OpenVINO™ Toolkit
|
||||
Choose one of the options provided below to uninstall the Intel® Distribution of OpenVINO™ Toolkit from your system.
|
||||
|
||||
### Uninstall with GUI
|
||||
1. Run the uninstallation script from `<INSTALL_DIR>/openvino_toolkit_uninstaller`:
|
||||
```sh
|
||||
sudo ./uninstall_GUI.sh
|
||||
```
|
||||
2. Follow the uninstallation wizard instructions.
|
||||
|
||||
|
||||
### Uninstall with Command Line (Interactive Mode)
|
||||
1. Run the uninstallation script from `<INSTALL_DIR>/openvino_toolkit_uninstaller`:
|
||||
```sh
|
||||
sudo ./uninstall.sh
|
||||
```
|
||||
2. Follow the instructions on your screen.
|
||||
4. When uninstallation is complete, press **Enter**.
|
||||
|
||||
### Uninstall with Command Line (Silent Mode)
|
||||
1. Run the following command from `<INSTALL_DIR>/openvino_toolkit_uninstaller`:
|
||||
```sh
|
||||
sudo ./uninstall.sh -s
|
||||
```
|
||||
2. Intel® Distribution of OpenVINO™ Toolkit is now uninstalled from your system.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
PRC developers might encounter pip installation related issues during OpenVINO™ installation. To resolve the issues, you may use one of the following options at your discretion:
|
||||
|
||||
@@ -31,19 +31,6 @@ The following components are installed by default:
|
||||
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker_README), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader_README) and other |
|
||||
| [Documentation for Pre-Trained Models ](@ref omz_models_intel_index) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo) |
|
||||
|
||||
**Could Be Optionally Installed**
|
||||
|
||||
[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture
|
||||
configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components:
|
||||
* [Model Downloader](@ref omz_tools_downloader_README)
|
||||
* [Intel® Open Model Zoo](@ref omz_models_intel_index)
|
||||
* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
* [Post-training Optimization Tool](@ref pot_README)
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker_README)
|
||||
* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md)
|
||||
|
||||
Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started.
|
||||
|
||||
## Development and Target Platform
|
||||
|
||||
The development and target platforms have the same requirements, but you can select different components during the installation, based on your intended use.
|
||||
@@ -79,7 +66,6 @@ The following steps will be covered:
|
||||
2. <a href="#set-the-environment-variables">Set the OpenVINO environment variables and (optional) Update to <code>.bash_profile</code></a>.
|
||||
4. <a href="#configure-the-model-optimizer">Configure the Model Optimizer</a>.
|
||||
5. <a href="#Run-Demos">Run verification scripts to verify installation and compile samples</a>.
|
||||
6. <a href="#uninstall">Uninstall the Intel® Distribution of OpenVINO™ Toolkit</a>.
|
||||
|
||||
## <a name="Install-Core"></a>Install the Intel® Distribution of OpenVINO™ toolkit Core Components
|
||||
|
||||
@@ -118,12 +104,13 @@ The disk image is mounted to `/Volumes/m_openvino_toolkit_p_<version>` and autom
|
||||
8. The **Installation summary** screen shows you the default component set to install:
|
||||

|
||||
|
||||
By default, the Intel® Distribution of OpenVINO™ is installed to the following directory, referred to as `<INSTALL_DIR>`:
|
||||
- If you used **root** or **administrator** privileges to run the installer, it installs the OpenVINO toolkit to `/opt/intel/openvino_<version>/`
|
||||
|
||||
* For root or administrator: `/opt/intel/openvino_<version>/`
|
||||
* For regular users: `/home/<USER>/intel/openvino_<version>/`
|
||||
For simplicity, a symbolic link to the latest installation is also created: `/opt/intel/openvino_2021/`
|
||||
|
||||
For simplicity, a symbolic link to the latest installation is also created: `/home/<user>/intel/openvino_2021/`.
|
||||
- If you used **regular user** privileges to run the installer, it installs the OpenVINO toolkit to `/home/<user>/intel/openvino_<version>/`
|
||||
|
||||
For simplicity, a symbolic link to the latest installation is also created: `/home/<user>/intel/openvino_2021/`
|
||||
|
||||
9. If needed, click **Customize** to change the installation directory or the components you want to install:
|
||||

|
||||
@@ -308,14 +295,6 @@ brew install libusb
|
||||
|
||||
Visit the Intel Distribution of OpenVINO Toolkit [Inference Tutorials for Face Detection and Car Detection Exercises](https://github.com/intel-iot-devkit/inference-tutorials-generic/tree/openvino_toolkit_r3_0)
|
||||
|
||||
## <a name="uninstall"></a>Uninstall the Intel® Distribution of OpenVINO™ Toolkit
|
||||
|
||||
Follow the steps below to uninstall the Intel® Distribution of OpenVINO™ Toolkit from your system:
|
||||
|
||||
1. From the `<INSTALL_DIR>`, locate and open `openvino_toolkit_uninstaller.app`.
|
||||
2. Follow the uninstallation wizard instructions.
|
||||
3. When uninstallation is complete, click **Finish**.
|
||||
|
||||
|
||||
## Additional Resources
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ python3 -m pip install --upgrade pip
|
||||
|
||||
Run the command below:
|
||||
```sh
|
||||
pip install openvino
|
||||
pip install openvino-python
|
||||
```
|
||||
|
||||
### Step 3. Add PATH to environment variables
|
||||
@@ -78,5 +78,5 @@ Now you are ready to develop and run your application.
|
||||
- [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
|
||||
- [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md).
|
||||
- For more information on Sample Applications, see the [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md).
|
||||
- [Intel® Distribution of OpenVINO™ toolkit PIP home page](https://pypi.org/project/openvino/)
|
||||
- [Intel® Distribution of OpenVINO™ toolkit PIP home page](https://pypi.org/project/openvino-python/)
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ This guide provides step-by-step instructions on how to install the OpenVINO™
|
||||
|
||||
## <a name="install-package"></a>Install the OpenVINO™ Toolkit for Raspbian* OS Package
|
||||
|
||||
The guide assumes you downloaded the OpenVINO toolkit for Raspbian* OS. If you do not have a copy of the toolkit package file `l_openvino_toolkit_runtime_raspbian_p_<version>.tgz`, download the latest version from the [OpenVINO™ Toolkit packages storage](https://storage.openvinotoolkit.org/repositories/openvino/packages/) and then return to this guide to proceed with the installation.
|
||||
The guide assumes you downloaded the OpenVINO toolkit for Raspbian* OS. If you do not have a copy of the toolkit package file `l_openvino_toolkit_runtime_raspbian_p_<version>.tgz`, download the latest version from the [Intel® Open Source Technology Center](https://download.01.org/opencv/2020/openvinotoolkit/) and then return to this guide to proceed with the installation.
|
||||
|
||||
> **NOTE**: The OpenVINO toolkit for Raspbian OS is distributed without installer, so you need to perform extra steps comparing to the [Intel® Distribution of OpenVINO™ toolkit for Linux* OS](installing-openvino-linux.md).
|
||||
|
||||
@@ -150,13 +150,16 @@ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-march=armv7-a" /opt/intel/o
|
||||
make -j2 object_detection_sample_ssd
|
||||
```
|
||||
|
||||
3. Download the pre-trained Face Detection model with the Model Downloader or copy it from the host machine:
|
||||
3. Download the pre-trained Face Detection model or copy it from the host machine:
|
||||
|
||||
```sh
|
||||
git clone --depth 1 https://github.com/openvinotoolkit/open_model_zoo
|
||||
cd open_model_zoo/tools/downloader
|
||||
python3 -m pip install -r requirements.in
|
||||
python3 downloader.py --name face-detection-adas-0001
|
||||
- To download the `.bin` file with weights:
|
||||
```sh
|
||||
wget --no-check-certificate https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-adas-0001/FP16/face-detection-adas-0001.bin
|
||||
```
|
||||
|
||||
- To download the `.xml` file with the network topology:
|
||||
```sh
|
||||
wget --no-check-certificate https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/models_bin/1/face-detection-adas-0001/FP16/face-detection-adas-0001.xml
|
||||
```
|
||||
|
||||
4. Run the sample with specifying the model and a path to the input image:
|
||||
@@ -173,7 +176,11 @@ Read the next topic if you want to learn more about OpenVINO workflow for Raspbe
|
||||
|
||||
If you want to use your model for inference, the model must be converted to the .bin and .xml Intermediate Representation (IR) files that are used as input by Inference Engine. OpenVINO™ toolkit support on Raspberry Pi only includes the Inference Engine module of the Intel® Distribution of OpenVINO™ toolkit. The Model Optimizer is not supported on this platform. To get the optimized models you can use one of the following options:
|
||||
|
||||
* Download public and Intel's pre-trained models from the [Open Model Zoo](https://github.com/opencv/open_model_zoo) using [Model Downloader tool](@ref omz_tools_downloader_README).
|
||||
* Download a set of ready-to-use pre-trained models for the appropriate version of OpenVINO from the Intel® Open Source Technology Center:
|
||||
|
||||
* Models for the 2020.1 release of OpenVINO are available at [https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/](https://download.01.org/opencv/2020/openvinotoolkit/2020.1/open_model_zoo/).
|
||||
* Models for the 2019 R1 release of OpenVINO are available at [https://download.01.org/opencv/2019/open_model_zoo/R1/](https://download.01.org/opencv/2019/open_model_zoo/R1/).
|
||||
* Models for the 2018 R5 release of OpenVINO are available at [https://download.01.org/openvinotoolkit/2018_R5/open_model_zoo/](https://download.01.org/openvinotoolkit/2018_R5/open_model_zoo/).
|
||||
|
||||
For more information on pre-trained models, see [Pre-Trained Models Documentation](@ref omz_models_intel_index)
|
||||
|
||||
|
||||
@@ -36,8 +36,6 @@ Your installation is complete when these are all completed:
|
||||
|
||||
- <a href="#Update-Path">Update Windows* environment variables</a>
|
||||
|
||||
7. <a href="#uninstall">Uninstall the Intel® Distribution of OpenVINO™ Toolkit</a>
|
||||
|
||||
### About the Intel® Distribution of OpenVINO™ toolkit
|
||||
|
||||
OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applications and solutions that solve a variety of tasks including emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, and many others. Based on latest generations of artificial neural networks, including Convolutional Neural Networks (CNNs), recurrent and attention-based networks, the toolkit extends computer vision and non-vision workloads across Intel® hardware, maximizing performance. It accelerates applications with high-performance, AI and deep learning inference deployed from edge to cloud.
|
||||
@@ -65,19 +63,6 @@ The following components are installed by default:
|
||||
| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker_README), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader_README) and other |
|
||||
| [Documentation for Pre-Trained Models ](@ref omz_models_intel_index) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/opencv/open_model_zoo) |
|
||||
|
||||
**Could Be Optionally Installed**
|
||||
|
||||
[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench) is a platform built upon OpenVINO™ and provides a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models on various Intel® architecture
|
||||
configurations. In the DL Workbench, you can use most of OpenVINO™ toolkit components:
|
||||
* [Model Downloader](@ref omz_tools_downloader_README)
|
||||
* [Intel® Open Model Zoo](@ref omz_models_intel_index)
|
||||
* [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
* [Post-training Optimization Tool](@ref pot_README)
|
||||
* [Accuracy Checker](@ref omz_tools_accuracy_checker_README)
|
||||
* [Benchmark Tool](../../inference-engine/samples/benchmark_app/README.md)
|
||||
|
||||
Proceed to an [easy installation from Docker](@ref workbench_docs_Workbench_DG_Install_from_Docker_Hub) to get started.
|
||||
|
||||
### System Requirements
|
||||
|
||||
**Hardware**
|
||||
@@ -329,7 +314,7 @@ Use the optional steps below if you want to:
|
||||
|
||||
> **NOTE**: These steps are required only if you want to use a GPU.
|
||||
|
||||
If your applications offload computation to **Intel® Integrated Graphics**, you must have the Intel Graphics Driver for Windows version 15.65 or higher. To see if you have this driver installed:
|
||||
If your applications offload computation to Intel® Integrated Graphics, you must have the Intel Graphics Driver for Windows version 15.65 or higher. To see if you have this driver installed:
|
||||
|
||||
1. Type **device manager** in your **Search Windows** box. The **Device Manager** opens.
|
||||
|
||||
@@ -345,8 +330,6 @@ If your applications offload computation to **Intel® Integrated Graphics**, you
|
||||
|
||||
5. If your device driver version is lower than 15.65, [download and install a higher version](http://downloadcenter.intel.com/product/80939/Graphics-Drivers).
|
||||
|
||||
To use the **Intel® Iris® Xe MAX Graphics**, install the driver manually. See the [Drivers & Software](https://downloadcenter.intel.com/download/29993/Intel-Iris-Xe-MAX-Dedicated-Graphics-Drivers?product=80939) page for driver downloads and installation instructions.
|
||||
|
||||
You are done updating your device driver and are ready to use your GPU.
|
||||
|
||||
|
||||
@@ -461,12 +444,6 @@ For information on Sample Applications, see the [Inference Engine Samples Overvi
|
||||
|
||||
Congratulations, you have finished the installation of the Intel® Distribution of OpenVINO™ toolkit for Windows*. To learn more about how the Intel® Distribution of OpenVINO™ toolkit works, the Hello World tutorial and other resources are provided below.
|
||||
|
||||
## <a name="uninstall"></a>Uninstall the Intel® Distribution of OpenVINO™ Toolkit
|
||||
Follow the steps below to uninstall the Intel® Distribution of OpenVINO™ Toolkit from your system:
|
||||
1. Choose the **Apps & Features** option from the Windows* Settings app.
|
||||
2. From the list of installed applications, select the Intel® Distribution of OpenVINO™ Toolkit and click **Uninstall**.
|
||||
3. Follow the uninstallation wizard instructions.
|
||||
4. When uninstallation is complete, click **Finish**.
|
||||
|
||||
## <a name="Summary"></a>Summary
|
||||
|
||||
@@ -486,7 +463,7 @@ To learn more about converting deep learning models, go to:
|
||||
- [Intel Distribution of OpenVINO Toolkit home page](https://software.intel.com/en-us/openvino-toolkit)
|
||||
- [Intel Distribution of OpenVINO Toolkit documentation](https://software.intel.com/en-us/openvino-toolkit/documentation/featured)
|
||||
- [OpenVINO™ Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes)
|
||||
- [Introduction to Inference Engine](../IE_DG/inference_engine_intro.md)
|
||||
- [Introduction to Inference Engine](inference_engine_intro.md)
|
||||
- [Inference Engine Developer Guide](../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md)
|
||||
- [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
|
||||
- [Inference Engine Samples Overview](../IE_DG/Samples_Overview.md)
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
**Detailed description**: For each element from the input tensor calculates corresponding
|
||||
element in the output tensor with the following formula:
|
||||
|
||||
\f[
|
||||
HSwish(x) = x \frac{min(max(x + 3, 0), 6)}{6}
|
||||
\f]
|
||||
\f[
|
||||
HSwish(x) = x \frac{min(max(x + 3, 0), 6)}{6}
|
||||
\f]
|
||||
|
||||
The HSwish operation is introduced in the following [article](https://arxiv.org/pdf/1905.02244.pdf).
|
||||
|
||||
|
||||
@@ -26,9 +26,9 @@
|
||||
|
||||
For each element from the input tensor calculates corresponding
|
||||
element in the output tensor with the following formula:
|
||||
\f[
|
||||
Mish(x) = x*tanh(ln(1.0+e^{x}))
|
||||
\f]
|
||||
\f[
|
||||
Mish(x) = x*tanh(ln(1.0+e^{x}))
|
||||
\f]
|
||||
|
||||
**Examples**
|
||||
|
||||
|
||||
@@ -14,9 +14,9 @@
|
||||
|
||||
For each element from the input tensor calculates corresponding
|
||||
element in the output tensor with the following formula:
|
||||
\f[
|
||||
sigmoid( x ) = \frac{1}{1+e^{-x}}
|
||||
\f]
|
||||
\f[
|
||||
sigmoid( x ) = \frac{1}{1+e^{-x}}
|
||||
\f]
|
||||
|
||||
**Inputs**:
|
||||
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
**Detailed description**: For each element from the input tensor calculates corresponding
|
||||
element in the output tensor with the following formula:
|
||||
|
||||
\f[
|
||||
SoftPlus(x) = ln(e^{x} + 1.0)
|
||||
\f]
|
||||
\f[
|
||||
SoftPlus(x) = ln(e^{x} + 1.0)
|
||||
\f]
|
||||
|
||||
**Attributes**: *SoftPlus* operation has no attributes.
|
||||
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
**Detailed description**: For each element from the input tensor calculates corresponding
|
||||
element in the output tensor with the following formula:
|
||||
|
||||
\f[
|
||||
Swish(x) = x / (1.0 + e^{-(beta * x)})
|
||||
\f]
|
||||
\f[
|
||||
Swish(x) = x / (1.0 + e^{-(beta * x)})
|
||||
\f]
|
||||
|
||||
The Swish operation is introduced in the [article](https://arxiv.org/pdf/1710.05941.pdf).
|
||||
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
* **Type**: string
|
||||
* **Default value**: none
|
||||
* **Required**: *yes*
|
||||
* **Note**: Only 2D and 4D tensors with `axes = {0, 1}` and `axes = {2, 3}` respectively are supported for `"mode" == "linear_onnx"`.
|
||||
|
||||
* *shape_calculation_mode*
|
||||
|
||||
@@ -57,10 +56,10 @@
|
||||
|
||||
* **Description**: *antialias* is a flag that specifies whether to perform anti-aliasing.
|
||||
* **Range of values**:
|
||||
* false - do not perform anti-aliasing
|
||||
* true - perform anti-aliasing
|
||||
* False - do not perform anti-aliasing
|
||||
* True - perform anti-aliasing
|
||||
* **Type**: boolean
|
||||
* **Default value**: false
|
||||
* **Default value**: False
|
||||
* **Required**: *no*
|
||||
|
||||
* *pads_begin*
|
||||
|
||||
@@ -78,9 +78,9 @@
|
||||
|
||||
**Mathematical Formulation**
|
||||
|
||||
\f[
|
||||
output_{j} = \frac{\sum_{i = 0}^{n}x_{i}}{n}
|
||||
\f]
|
||||
\f[
|
||||
output_{j} = \frac{\sum_{i = 0}^{n}x_{i}}{n}
|
||||
\f]
|
||||
|
||||
**Example**
|
||||
|
||||
|
||||
@@ -70,9 +70,9 @@
|
||||
|
||||
**Mathematical Formulation**
|
||||
|
||||
\f[
|
||||
output_{j} = MAX\{ x_{0}, ... x_{i}\}
|
||||
\f]
|
||||
\f[
|
||||
output_{j} = MAX\{ x_{0}, ... x_{i}\}
|
||||
\f]
|
||||
|
||||
**Example**
|
||||
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2e7ed21b111f0438b9fad367c4db293c35882de05bc8bb3252a1ef5bc289ae2a
|
||||
size 33369
|
||||
@@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:356688e3fd7dd4ad6c591cda1d35d9ebd5c2b6f9787e6caa4c116717101669e5
|
||||
size 29847
|
||||
@@ -1,798 +0,0 @@
|
||||
# OpenVINO™ Security Add-on {#ovsa_get_started}
|
||||
|
||||
This guide provides instructions for people who use the OpenVINO™ Security Add-on to create, distribute, and use models that are created with the OpenVINO™ toolkit:
|
||||
|
||||
* **Model Developer**: The Model Developer interacts with the Independent Software Vendor to control the User access to models. This document shows you how to setup hardware and virtual machines to use the OpenVINO™ Security Add-on to define access control to your OpenVINO™ models and then provide the access controlled models to the users.
|
||||
* **Independent Software Vendor**: Use this guide for instructions to use the OpenVINO™ Security Add-on to validate license for access controlled models that are provided to your customers (users).
|
||||
* **User**: This document includes instructions for end users who need to access and run access controlled models through the OpenVINO™ Security Add-on.
|
||||
|
||||
In this release, one person performs the role of both the Model Developer and the Independent Software Vendor. Therefore, this document provides instructions to configure one system for these two roles and one system for the User role. This document also provides a way for the same person to play the role of the Model Developer, Independent Software Vendor, and User to let you see how the OpenVINO™ Security Add-on functions from the User perspective.
|
||||
|
||||
|
||||
## Overview
|
||||
|
||||
The OpenVINO™ Security Add-on works with the [OpenVINO™ Model Server](@ref openvino_docs_ovms) on Intel® architecture. Together, the OpenVINO™ Security Add-on and the OpenVINO™ Model Server provide a way for Model Developers and Independent Software Vendors to use secure packaging and secure model execution to enable access control to the OpenVINO™ models, and for model Users to run inference within assigned limits.
|
||||
|
||||
The OpenVINO™ Security Add-on consists of three components that run in Kernel-based Virtual Machines (KVMs). These components provide a way to run security-sensitive operations in an isolated environment. A brief description of the three components are as follows. Click each triangled line for more information about each.
|
||||
|
||||
<details>
|
||||
<summary><strong>OpenVINO™ Security Add-on Tool</strong>: As a Model Developer or Independent Software Vendor, you use the OpenVINO™ Security Add-on Tool(`ovsatool`) to generate a access controlled model and master license. </summary>
|
||||
|
||||
- The Model Developer generates a access controlled model from the OpenVINO™ toolkit output. The access controlled model uses the model's Intermediate Representation (IR) files to create a access controlled output file archive that are distributed to Model Users. The Developer can also put the archive file in long-term storage or back it up without additional security.
|
||||
|
||||
- The Model Developer uses the OpenVINO™ Security Add-on Tool(`ovsatool`) to generate and manage cryptographic keys and related collateral for the access controlled models. Cryptographic material is only available in a virtual machine (VM) environment. The OpenVINO™ Security Add-on key management system lets the Model Developer to get external Certificate Authorities to generate certificates to add to a key-store.
|
||||
|
||||
- The Model Developer generates user-specific licenses in a JSON format file for the access controlled model. The Model Developer can define global or user-specific licenses and attach licensing policies to the licenses. For example, the Model Developer can add a time limit for a model or limit the number of times a user can run a model.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>OpenVINO™ Security Add-on License Service</strong>: Use the OpenVINO™ Security Add-on License Service to verify user parameters.</summary>
|
||||
|
||||
- The Independent Software Vendor hosts the OpenVINO™ Security Add-on License Service, which responds to license validation requests when a user attempts to load a access controlled model in a model server. The licenses are registered with the OpenVINO™ Security Add-on License Service.
|
||||
|
||||
- When a user loads the model, the OpenVINO™ Security Add-on Runtime contacts the License Service to make sure the license is valid and within the parameters that the Model Developer defined with the OpenVINO™ Security Add-on Tool(`ovsatool`). The user must be able to reach the Independent Software Vendor's License Service over the Internet.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><strong>OpenVINO™ Security Add-on Runtime</strong>: Users install and use the OpenVINO™ Security Add-on Runtime on a virtual machine. </summary>
|
||||
|
||||
Users host the OpenVINO™ Security Add-on Runtime component in a virtual machine.
|
||||
|
||||
Externally from the OpenVINO™ Security Add-on, the User adds the access controlled model to the OpenVINO™ Model Server config file. The OpenVINO™ Model Server attempts to load the model in memory. At this time, the OpenVINO™ Security Add-on Runtime component validates the user's license for the access controlled model against information stored in the License Service provided by the Independent Software Vendor.
|
||||
|
||||
After the license is successfully validated, the OpenVINO™ Model Server loads the model and services the inference requests.
|
||||
|
||||
</details>
|
||||
|
||||
<br>
|
||||
**Where the OpenVINO™ Security Add-on Fits into Model Development and Deployment**
|
||||
|
||||

|
||||
|
||||
## About the Installation
|
||||
The Model Developer, Independent Software Vendor, and User each must prepare one physical hardware machine and one Kernel-based Virtual Machine (KVM). In addition, each person must prepare a Guest Virtual Machine (Guest VM) for each role that person plays.
|
||||
|
||||
For example:
|
||||
* If one person acts as both the Model Developer and as the Independent Software Vendor, that person must prepare two Guest VMs. Both Guest VMs can be on the same physical hardware (Host Machine) and under the same KVM on that Host Machine.
|
||||
* If one person acts as all three roles, that person must prepare three Guest VMs. All three Guest VMs can be on the same Host Machine and under the same KVM on that Host Machine.
|
||||
|
||||
**Purpose of Each Machine**
|
||||
|
||||
| Machine | Purpose |
|
||||
| ----------- | ----------- |
|
||||
| Host Machine | Physical hardware on which the KVM and Guest VM share set up. |
|
||||
| Kernel-based Virtual Machine (KVM) | The OpenVINO™ Security Add-on runs in this virtual machine because it provides an isolated environment for security sensitive operations. |
|
||||
| Guest VM | The Model Developer uses the Guest VM to enable access control to the completed model. <br>The Independent Software Provider uses the Guest VM to host the License Service.<br>The User uses the Guest VM to contact the License Service and run the access controlled model. |
|
||||
|
||||
|
||||
## Prerequisites <a name="prerequisites"></a>
|
||||
|
||||
**Hardware**
|
||||
* Intel® Core™ or Xeon® processor<br>
|
||||
|
||||
**Operating system, firmware, and software**
|
||||
* Ubuntu* Linux* 18.04 on the Host Machine.<br>
|
||||
* TPM version 2.0-conformant Discrete Trusted Platform Module (dTPM) or Firmware Trusted Platform Module (fTPM)
|
||||
* Secure boot is enabled.<br>
|
||||
|
||||
**Other**
|
||||
* The Independent Software Vendor must have access to a Certificate Authority (CA) that implements the Online Certificate Status Protocol (OCSP), supporting Elliptic Curve Cryptography (ECC) certificates for deployment.
|
||||
* The example in this document uses self-signed certificates.
|
||||
|
||||
## How to Prepare a Host Machine <a name="setup-host"></a>
|
||||
|
||||
This section is for the combined role of Model Developer and Independent Software Vendor, and the separate User role.
|
||||
|
||||
### Step 1: Set up Packages on the Host Machine<a name="setup-packages"></a>
|
||||
|
||||
Begin this step on the Intel® Core™ or Xeon® processor machine that meets the <a href="#prerequisites">prerequisites</a>.
|
||||
|
||||
> **NOTE**: As an alternative to manually following steps 1 - 11, you can run the script `install_host_deps.sh` in the `Scripts/reference directory` under the OpenVINO™ Security Add-on repository. The script stops with an error message if it identifies any issues. If the script halts due to an error, correct the issue that caused the error and restart the script. The script runs for several minutes and provides progress information.
|
||||
|
||||
1. Test for Trusted Platform Module (TPM) support:
|
||||
```sh
|
||||
dmesg | grep -i TPM
|
||||
```
|
||||
The output indicates TPM availability in the kernel boot logs. Look for presence of the following devices to indicate TPM support is available:
|
||||
* `/dev/tpm0`
|
||||
* `/dev/tpmrm0`
|
||||
|
||||
If you do not see this information, your system does not meet the <a href="#prerequisites">prerequisites</a> to use the OpenVINO™ Security Add-on.
|
||||
2. Make sure hardware virtualization support is enabled in the BIOS:
|
||||
```sh
|
||||
kvm-ok
|
||||
```
|
||||
The output should show: <br>
|
||||
`INFO: /dev/kvm exists` <br>
|
||||
`KVM acceleration can be used`
|
||||
|
||||
If your output is different, modify your BIOS settings to enable hardware virtualization.
|
||||
|
||||
If the `kvm-ok` command is not present, install it:
|
||||
```sh
|
||||
sudo apt install -y cpu-checker
|
||||
```
|
||||
3. Install the Kernel-based Virtual Machine (KVM) and QEMU packages.
|
||||
```sh
|
||||
sudo apt install qemu qemu-kvm libvirt-bin bridge-utils virt-manager
|
||||
```
|
||||
4. Check the QEMU version:
|
||||
```sh
|
||||
qemu-system-x86_64 --version
|
||||
```
|
||||
If the response indicates a QEMU version lower than 2.12.0 download, compile and install the latest QEMU version from [https://www.qemu.org/download](https://www.qemu.org/download).
|
||||
5. Build and install the [`libtpm` package](https://github.com/stefanberger/libtpms/).
|
||||
6. Build and install the [`swtpm` package](https://github.com/stefanberger/swtpm/).
|
||||
7. Add the `swtpm` package to the `$PATH` environment variable.
|
||||
8. Install the software tool [`tpm2-tss`]( https://github.com/tpm2-software/tpm2-tss/releases/download/2.4.4/tpm2-tss-2.4.4.tar.gz).<br>
|
||||
Installation information is at https://github.com/tpm2-software/tpm2-tss/blob/master/INSTALL.md
|
||||
9. Install the software tool [`tpm2-abmrd`](https://github.com/tpm2-software/tpm2-abrmd/releases/download/2.3.3/tpm2-abrmd-2.3.3.tar.gz).<br>
|
||||
Installation information is at https://github.com/tpm2-software/tpm2-abrmd/blob/master/INSTALL.md
|
||||
10. Install the [`tpm2-tools`](https://github.com/tpm2-software/tpm2-tools/releases/download/4.3.0/tpm2-tools-4.3.0.tar.gz).<br>
|
||||
Installation information is at https://github.com/tpm2-software/tpm2-tools/blob/master/INSTALL.md
|
||||
11. Install the [Docker packages](https://docs.docker.com/engine/install/ubuntu/).
|
||||
> **NOTE**: Regardless of whether you used the `install_host_deps.sh` script, complete step 12 to finish setting up the packages on the Host Machine.
|
||||
12. If you are running behind a proxy, [set up a proxy for Docker](https://docs.docker.com/config/daemon/systemd/).
|
||||
|
||||
The following are installed and ready to use:
|
||||
* Kernel-based Virtual Machine (KVM)
|
||||
* QEMU
|
||||
* SW-TPM
|
||||
* HW-TPM support
|
||||
* Docker<br>
|
||||
|
||||
You're ready to configure the Host Machine for networking.
|
||||
|
||||
### Step 2: Set up Networking on the Host Machine<a name="setup-networking"></a>
|
||||
|
||||
This step is for the combined Model Developer and Independent Software Vendor roles. If Model User VM is running on different physical host, repeat the following steps for that host also.
|
||||
|
||||
In this step you prepare two network bridges:
|
||||
* A global IP address that a KVM can access across the Internet. This is the address that the OpenVINO™ Security Add-on Run-time software on a user's machine uses to verify they have a valid license.
|
||||
* A host-only local address to provide communication between the Guest VM and the QEMU host operating system.
|
||||
|
||||
This example in this step uses the following names. Your configuration might use different names:
|
||||
* `50-cloud-init.yaml` as an example configuration file name.
|
||||
* `eno1` as an example network interface name.
|
||||
* `br0` as an example bridge name.
|
||||
* `virbr0` as an example bridge name.
|
||||
|
||||
1. Open the network configuration file for editing. This file is in `/etc/netplan` with a name like `50-cloud-init.yaml`
|
||||
2. Look for these lines in the file:
|
||||
```sh
|
||||
network:
|
||||
ethernets:
|
||||
eno1:
|
||||
dhcp4: true
|
||||
dhcp-identifier: mac
|
||||
version: 2
|
||||
```
|
||||
3. Change the existing lines and add the `br0` network bridge. These changes enable external network access:
|
||||
```sh
|
||||
network:
|
||||
ethernets:
|
||||
eno1:
|
||||
dhcp4: false
|
||||
bridges:
|
||||
br0:
|
||||
interfaces: [eno1]
|
||||
dhcp4: yes
|
||||
dhcp-identifier: mac
|
||||
version: 2
|
||||
```
|
||||
4. Save and close the network configuration file.
|
||||
5. Run two commands to activate the updated network configuration file. If you use ssh, you might lose network connectivity when issuing these commands. If so, reconnect to the network.
|
||||
```sh
|
||||
sudo netplan generate
|
||||
```
|
||||
```sh
|
||||
sudo netplan apply
|
||||
```
|
||||
A bridge is created and an IP address is assigned to the new bridge.
|
||||
6. Verify the new bridge:
|
||||
```sh
|
||||
ip a | grep br0
|
||||
```
|
||||
The output looks similar to this and shows valid IP addresses:
|
||||
```sh
|
||||
4: br0:<br><BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000<br>inet 123.123.123.123/<mask> brd 321.321.321.321 scope global dynamic br0
|
||||
```
|
||||
7. Create a script named `br0-qemu-ifup` to bring up the `br0` interface. Add the following script contents:
|
||||
```sh
|
||||
#!/bin/sh
|
||||
nic=$1
|
||||
if [ -f /etc/default/qemu-kvm ]; then
|
||||
. /etc/default/qemu-kvm
|
||||
fi
|
||||
switch=br0
|
||||
ifconfig $nic 0.0.0.0 up
|
||||
brctl addif ${switch} $nic
|
||||
```
|
||||
8. Create a script named `br0-qemu-ifdown` to bring down the `br0` interface. Add the following script contents:
|
||||
```sh
|
||||
#!/bin/sh
|
||||
nic=$1
|
||||
if [ -f /etc/default/qemu-kvm ]; then
|
||||
. /etc/default/qemu-kvm
|
||||
fi
|
||||
switch=br0
|
||||
brctl delif $switch $nic
|
||||
ifconfig $nic 0.0.0.0 down
|
||||
```
|
||||
9. Create a script named `virbr0-qemu-ifup` to bring up the `virbr0` interface. Add the following script contents:
|
||||
```sh
|
||||
#!/bin/sh
|
||||
nic=$1
|
||||
if [ -f /etc/default/qemu-kvm ]; then
|
||||
. /etc/default/qemu-kvm
|
||||
fi
|
||||
switch=virbr0
|
||||
ifconfig $nic 0.0.0.0 up
|
||||
brctl addif ${switch} $nic
|
||||
```
|
||||
10. Create a script named `virbr0-qemu-ifdown` to bring down the `virbr0` interface. Add the following script contents:
|
||||
```sh
|
||||
#!/bin/sh
|
||||
nic=$1
|
||||
if [ -f /etc/default/qemu-kvm ]; then
|
||||
. /etc/default/qemu-kvm
|
||||
fi
|
||||
switch=virbr0
|
||||
brctl delif $switch $nic
|
||||
ifconfig $nic 0.0.0.0 down
|
||||
```
|
||||
|
||||
See the QEMU documentation for more information about the QEMU network configuration.
|
||||
|
||||
Networking is set up on the Host Machine. Continue to the Step 3 to prepare a Guest VM for the combined role of Model Developer and Independent Software Vendor.
|
||||
|
||||
|
||||
### Step 3: Set Up one Guest VM for the combined roles of Model Developer and Independent Software Vendor<a name="dev-isv-vm"></a>
|
||||
|
||||
For each separate role you play, you must prepare a virtual machine, called a Guest VM. Because in this release, the Model Developer and Independent Software Vendor roles are combined, these instructions guide you to set up one Guest VM, named `ovsa_isv`.
|
||||
|
||||
Begin these steps on the Host Machine.
|
||||
|
||||
As an option, you can use `virsh` and the virtual machine manager to create and bring up a Guest VM. See the `libvirtd` documentation for instructions if you'd like to do this.
|
||||
|
||||
1. Download the [Ubuntu 18.04 server ISO image](https://releases.ubuntu.com/18.04/ubuntu-18.04.5-live-server-amd64.iso)
|
||||
|
||||
2. Create an empty virtual disk image to serve as the Guest VM for your role as Model Developer and Independent Software Vendor:
|
||||
```sh
|
||||
sudo qemu-img create -f qcow2 <path>/ovsa_isv_dev_vm_disk.qcow2 20G
|
||||
```
|
||||
3. Install Ubuntu 18.04 on the Guest VM. Name the Guest VM `ovsa_isv`:
|
||||
```sh
|
||||
sudo qemu-system-x86_64 -m 8192 -enable-kvm \
|
||||
-cpu host \
|
||||
-drive if=virtio,file=<path-to-disk-image>/ovsa_isv_dev_vm_disk.qcow2,cache=none \
|
||||
-cdrom <path-to-iso-image>/ubuntu-18.04.5-live-server-amd64.iso \
|
||||
-device e1000,netdev=hostnet1,mac=52:54:00:d1:66:5f \
|
||||
-netdev tap,id=hostnet1,script=<path-to-scripts>/virbr0-qemu-ifup,downscript=<path-to-scripts>/virbr0-qemu-ifdown \
|
||||
-vnc :1
|
||||
```
|
||||
4. Connect a VNC client with `<host-ip-address>:1`
|
||||
5. Follow the prompts on the screen to finish installing the Guest VM. Name the VM as `ovsa_isv_dev`
|
||||
6. Shut down the Guest VM.
|
||||
7. Restart the Guest VM after removing the option of cdrom image:
|
||||
```sh
|
||||
sudo qemu-system-x86_64 -m 8192 -enable-kvm \
|
||||
-cpu host \
|
||||
-drive if=virtio,file=<path-to-disk-image>/ovsa_isv_dev_vm_disk.qcow2,cache=none \
|
||||
-device e1000,netdev=hostnet1,mac=52:54:00:d1:66:5f \
|
||||
-netdev tap,id=hostnet1,script=<path-to-scripts>/virbr0-qemu-ifup,downscript=<path-to-scripts>/virbr0-qemu-ifdown \
|
||||
-vnc :1
|
||||
```
|
||||
8. Choose ONE of these options to install additional required software:
|
||||
* **Option 1**: Use a script to install additional software
|
||||
1. Copy the script `install_guest_deps.sh` from the `Scripts/reference directory` of the OVSA repository to the Guest VM
|
||||
2. Run the script.
|
||||
3. Shut down the Guest VM.<br>
|
||||
* **Option 2** : Manually install additional software
|
||||
1. Install the software tool [`tpm2-tss`](https://github.com/tpm2-software/tpm2-tss/releases/download/2.4.4/tpm2-tss-2.4.4.tar.gz).
|
||||
Installation information is at https://github.com/tpm2-software/tpm2-tss/blob/master/INSTALL.md
|
||||
2. Install the software tool [`tpm2-abmrd`](https://github.com/tpm2-software/tpm2-abrmd/releases/download/2.3.3/tpm2-abrmd-2.3.3.tar.gz).
|
||||
Installation information is at https://github.com/tpm2-software/tpm2-abrmd/blob/master/INSTALL.md
|
||||
3. Install the [`tpm2-tools`](https://github.com/tpm2-software/tpm2-tools/releases/download/4.3.0/tpm2-tools-4.3.0.tar.gz).
|
||||
Installation information is at https://github.com/tpm2-software/tpm2-tools/blob/master/INSTALL.md
|
||||
4. Install the [Docker packages](https://docs.docker.com/engine/install/ubuntu/)
|
||||
5. Shut down the Guest VM.<br>
|
||||
9. On the host, create a directory to support the virtual TPM device. Only `root` should have read/write permission to this directory:
|
||||
```sh
|
||||
sudo mkdir -p /var/OVSA/
|
||||
sudo mkdir /var/OVSA/vtpm
|
||||
sudo mkdir /var/OVSA/vtpm/vtpm_isv_dev
|
||||
```
|
||||
**NOTE**: For steps 10 and 11, you can copy and edit the script named `start_ovsa_isv_dev_vm.sh` in the `Scripts/reference` directory in the OpenVINO™ Security Add-on repository instead of manually running the commands. If using the script, select the script with `isv` in the file name regardless of whether you are playing the role of the Model Developer or the role of the Independent Software Vendor. Edit the script to point to the correct directory locations and increment `vnc` for each Guest VM.
|
||||
10. Start the vTPM on Host:
|
||||
```sh
|
||||
swtpm socket --tpmstate dir=/var/OVSA/vtpm/vtpm_isv_dev \
|
||||
--tpm2 \
|
||||
--ctrl type=unixio,path=/var/OVSA/vtpm/vtpm_isv_dev/swtpm-sock \
|
||||
--log level=20
|
||||
```
|
||||
|
||||
11. Start the Guest VM:
|
||||
```sh
|
||||
sudo qemu-system-x86_64 \
|
||||
-cpu host \
|
||||
-enable-kvm \
|
||||
-m 8192 \
|
||||
-smp 8,sockets=1,cores=8,threads=1 \
|
||||
-device e1000,netdev=hostnet0,mac=52:54:00:d1:66:6f \
|
||||
-netdev tap,id=hostnet0,script=<path-to-scripts>/br0-qemu-ifup,downscript=<path-to-scripts>/br0-qemu-ifdown \
|
||||
-device e1000,netdev=hostnet1,mac=52:54:00:d1:66:5f \
|
||||
-netdev tap,id=hostnet1,script=<path-to-scripts>/virbr0-qemu-ifup,downscript=<path-to-scripts>/virbr0-qemu-ifdown \
|
||||
-drive if=virtio,file=<path-to-disk-image>/ovsa_isv_dev_vm_disk.qcow2,cache=none \
|
||||
-chardev socket,id=chrtpm,path=/var/OVSA/vtpm/vtpm_isv_dev/swtpm-sock \
|
||||
-tpmdev emulator,id=tpm0,chardev=chrtpm \
|
||||
-device tpm-tis,tpmdev=tpm0 \
|
||||
-vnc :1
|
||||
```
|
||||
Use the QEMU runtime options in the command to change the memory amount or CPU assigned to this Guest VM.
|
||||
|
||||
12. Use a VNC client to log on to the Guest VM at `<host-ip-address>:1`
|
||||
|
||||
### Step 4: Set Up one Guest VM for the User role
|
||||
|
||||
1. Choose ONE of these options to create a Guest VM for the User role:<br>
|
||||
**Option 1: Copy and Rename the `ovsa_isv_dev_vm_disk.qcow2` disk image**
|
||||
1. Copy the `ovsa_isv_dev_vm_disk.qcow2` disk image to a new image named `ovsa_runtime_vm_disk.qcow2`. You created the `ovsa_isv_dev_vm_disk.qcow2` disk image in <a href="#prerequisites">Step 3</a>.
|
||||
2. Boot the new image.
|
||||
3. Change the hostname from `ovsa_isv_dev` to `ovsa_runtime`.
|
||||
```sh
|
||||
sudo hostnamectl set-hostname ovsa_runtime
|
||||
```
|
||||
4. Replace all instances of `ovsa_isv_dev` to `ovsa_runtime` in the new image.
|
||||
```sh
|
||||
sudo nano /etc/hosts
|
||||
```
|
||||
5. Change the `/etc/machine-id`:
|
||||
```sh
|
||||
sudo rm /etc/machine-id
|
||||
systemd-machine-id-setup
|
||||
```
|
||||
6. Shut down the Guest VM.<br><br>
|
||||
|
||||
**Option 2: Manually create the Guest VM**
|
||||
1. Create an empty virtual disk image:
|
||||
```sh
|
||||
sudo qemu-img create -f qcow2 <path>/ovsa_ovsa_runtime_vm_disk.qcow2 20G
|
||||
```
|
||||
2. Install Ubuntu 18.04 on the Guest VM. Name the Guest VM `ovsa_runtime`:
|
||||
```sh
|
||||
sudo qemu-system-x86_64 -m 8192 -enable-kvm \
|
||||
-cpu host \
|
||||
-drive if=virtio,file=<path-to-disk-image>/ovsa_ovsa_runtime_vm_disk.qcow2,cache=none \
|
||||
-cdrom <path-to-iso-image>/ubuntu-18.04.5-live-server-amd64.iso \
|
||||
-device e1000,netdev=hostnet1,mac=52:54:00:d1:66:5f \
|
||||
-netdev tap,id=hostnet1,script=<path-to-scripts>/virbr0-qemu-ifup, downscript=<path-to-scripts>/virbr0-qemu-ifdown \
|
||||
-vnc :2
|
||||
```
|
||||
3. Connect a VNC client with `<host-ip-address>:2`.
|
||||
4. Follow the prompts on the screen to finish installing the Guest VM. Name the Guest VM `ovsa_runtime`.
|
||||
5. Shut down the Guest VM.
|
||||
6. Restart the Guest VM:
|
||||
```sh
|
||||
sudo qemu-system-x86_64 -m 8192 -enable-kvm \
|
||||
-cpu host \
|
||||
-drive if=virtio,file=<path-to-disk-image>/ovsa_ovsa_runtime_vm_disk.qcow2,cache=none \
|
||||
-device e1000,netdev=hostnet1,mac=52:54:00:d1:66:5f \
|
||||
-netdev tap,id=hostnet1,script=<path-to-scripts>/virbr0-qemu-ifup, downscript=<path-to-scripts>/virbr0-qemu-ifdown \
|
||||
-vnc :2
|
||||
```
|
||||
7. Choose ONE of these options to install additional required software:
|
||||
|
||||
**Option 1: Use a script to install additional software**
|
||||
1. Copy the script `install_guest_deps.sh` from the `Scripts/reference` directory of the OVSA repository to the Guest VM
|
||||
2. Run the script.
|
||||
3. Shut down the Guest VM.<br><br>
|
||||
|
||||
**Option 2: Manually install additional software**
|
||||
1. Install the software tool [`tpm2-tss`](https://github.com/tpm2-software/tpm2-tss/releases/download/2.4.4/tpm2-tss-2.4.4.tar.gz) <br>
|
||||
Installation information is at https://github.com/tpm2-software/tpm2-tss/blob/master/INSTALL.md <br><br>
|
||||
2. Install the software tool [`tpm2-abmrd`](https://github.com/tpm2-software/tpm2-abrmd/releases/download/2.3.3/tpm2-abrmd-2.3.3.tar.gz) <br>
|
||||
Installation information is at https://github.com/tpm2-software/tpm2-abrmd/blob/master/INSTALL.md <br><br>
|
||||
3. Install the [`tpm2-tools`](https://github.com/tpm2-software/tpm2-tools/releases/download/4.3.0/tpm2-tools-4.3.0.tar.gz) <br>
|
||||
Installation information is at https://github.com/tpm2-software/tpm2-tools/blob/master/INSTALL.md <br><br>
|
||||
4. Install the [Docker packages](https://docs.docker.com/engine/install/ubuntu/)
|
||||
5. Shut down the Guest VM.<br><br>
|
||||
|
||||
2. Create a directory to support the virtual TPM device. Only `root` should have read/write permission to this directory:
|
||||
```sh
|
||||
sudo mkdir /var/OVSA/vtpm/vtpm_runtime
|
||||
```
|
||||
**NOTE**: For steps 3 and 4, you can copy and edit the script named `start_ovsa_runtime_vm.sh` in the scripts directory in the OpenVINO™ Security Add-on repository instead of manually running the commands. Edit the script to point to the correct directory locations and increment `vnc` for each Guest VM. This means that if you are creating a third Guest VM on the same Host Machine, change `-vnc :2` to `-vnc :3`
|
||||
3. Start the vTPM:
|
||||
```sh
|
||||
swtpm socket --tpmstate dir=/var/OVSA/vtpm/vtpm_runtime \
|
||||
--tpm2 \
|
||||
--ctrl type=unixio,path=/var/OVSA/vtpm/vtpm_runtime/swtpm-sock \
|
||||
--log level=20
|
||||
```
|
||||
4. Start the Guest VM in a new terminal. To do so, either copy and edit the script named `start_ovsa_runtime_vm.sh` in the scripts directory in the OpenVINO™ Security Add-on repository or manually run the command:
|
||||
```sh
|
||||
sudo qemu-system-x86_64 \
|
||||
-cpu host \
|
||||
-enable-kvm \
|
||||
-m 8192 \
|
||||
-smp 8,sockets=1,cores=8,threads=1 \
|
||||
-device e1000,netdev=hostnet2,mac=52:54:00:d1:67:6f \
|
||||
-netdev tap,id=hostnet2,script=<path-to-scripts>/br0-qemu-ifup,downscript=<path-to-scripts>/br0-qemu-ifdown \
|
||||
-device e1000,netdev=hostnet3,mac=52:54:00:d1:67:5f \
|
||||
-netdev tap,id=hostnet3,script=<path-to-scripts>/virbr0-qemu-ifup,downscript=<path-to-scripts>/virbr0-qemu-ifdown \
|
||||
-drive if=virtio,file=<path-to-disk-image>/ovsa_runtime_vm_disk.qcow2,cache=none \
|
||||
-chardev socket,id=chrtpm,path=/var/OVSA/vtpm/vtpm_runtime/swtpm-sock \
|
||||
-tpmdev emulator,id=tpm0,chardev=chrtpm \
|
||||
-device tpm-tis,tpmdev=tpm0 \
|
||||
-vnc :2
|
||||
```
|
||||
Use the QEMU runtime options in the command to change the memory amount or CPU assigned to this Guest VM.
|
||||
5. Use a VNC client to log on to the Guest VM at `<host-ip-address>:<x>` where `<x>` corresponds to the vnc number in the `start_ovsa_isv_vm.sh` or in step 8.
|
||||
|
||||
## How to Build and Install the OpenVINO™ Security Add-on Software <a name="install-ovsa"></a>
|
||||
|
||||
Follow the below steps to build and Install OpenVINO™ Security Add-on on host and different VMs.
|
||||
|
||||
### Step 1: Build the OpenVINO™ Model Server image
|
||||
Building OpenVINO™ Security Add-on depends on OpenVINO™ Model Server docker containers. Download and build OpenVINO™ Model Server first on the host.
|
||||
|
||||
1. Download the [OpenVINO™ Model Server software](https://github.com/openvinotoolkit/model_server)
|
||||
2. Build the [OpenVINO™ Model Server Docker images](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md)
|
||||
```sh
|
||||
git clone https://github.com/openvinotoolkit/model_server.git
|
||||
cd model_server
|
||||
make docker_build
|
||||
```
|
||||
### Step 2: Build the software required for all roles
|
||||
|
||||
This step is for the combined role of Model Developer and Independent Software Vendor, and the User
|
||||
|
||||
1. Download the [OpenVINO™ Security Add-on](https://github.com/openvinotoolkit/security_addon)
|
||||
|
||||
2. Go to the top-level OpenVINO™ Security Add-on source directory.
|
||||
```sh
|
||||
cd security_addon
|
||||
```
|
||||
3. Build the OpenVINO™ Security Add-on:
|
||||
```sh
|
||||
make clean all
|
||||
sudo make package
|
||||
```
|
||||
The following packages are created under the `release_files` directory:
|
||||
- `ovsa-kvm-host.tar.gz`: Host Machine file
|
||||
- `ovsa-developer.tar.gz`: For the Model Developer and the Independent Software Developer
|
||||
- `ovsa-model-hosting.tar.gz`: For the User
|
||||
|
||||
### Step 3: Install the host software
|
||||
This step is for the combined role of Model Developer and Independent Software Vendor, and the User.
|
||||
|
||||
1. Go to the `release_files` directory:
|
||||
```sh
|
||||
cd release_files
|
||||
```
|
||||
2. Set up the path:
|
||||
```sh
|
||||
export OVSA_RELEASE_PATH=$PWD
|
||||
```
|
||||
3. Install the OpenVINO™ Security Add-on Software on the Host Machine:
|
||||
```sh
|
||||
cd $OVSA_RELEASE_PATH
|
||||
tar xvfz ovsa-kvm-host.tar.gz
|
||||
cd ovsa-kvm-host
|
||||
./install.sh
|
||||
```
|
||||
|
||||
If you are using more than one Host Machine repeat Step 3 on each.
|
||||
|
||||
### Step 4: Set up packages on the Guest VM
|
||||
This step is for the combined role of Model Developer and Independent Software Vendor. References to the Guest VM are to `ovsa_isv_dev`.
|
||||
|
||||
1. Log on to the Guest VM.
|
||||
2. Create the OpenVINO™ Security Add-on directory in the home directory
|
||||
```sh
|
||||
mkdir OVSA
|
||||
```
|
||||
3. Go to the Host Machine, outside of the Guest VM.
|
||||
4. Copy `ovsa-developer.tar.gz` from `release_files` to the Guest VM:
|
||||
```sh
|
||||
cd $OVSA_RELEASE_PATH
|
||||
scp ovsa-developer.tar.gz username@<isv-developer-vm-ip-address>:/<username-home-directory>/OVSA
|
||||
```
|
||||
5. Go to the Guest VM.
|
||||
6. Install the software to the Guest VM:
|
||||
```sh
|
||||
cd OVSA
|
||||
tar xvfz ovsa-developer.tar.gz
|
||||
cd ovsa-developer
|
||||
sudo -s
|
||||
./install.sh
|
||||
```
|
||||
7. Create a directory named `artefacts`. This directory will hold artefacts required to create licenses:
|
||||
```sh
|
||||
cd /<username-home-directory>/OVSA
|
||||
mkdir artefacts
|
||||
cd artefacts
|
||||
```
|
||||
8. Start the license server on a separate terminal.
|
||||
```sh
|
||||
sudo -s
|
||||
source /opt/ovsa/scripts/setupvars.sh
|
||||
cd /opt/ovsa/bin
|
||||
./license_server
|
||||
```
|
||||
|
||||
### Step 5: Install the OpenVINO™ Security Add-on Model Hosting Component
|
||||
|
||||
This step is for the User. References to the Guest VM are to `ovsa_runtime`.
|
||||
|
||||
The Model Hosting components install the OpenVINO™ Security Add-on Runtime Docker container based on OpenVINO™ Model Server NGINX Docker to host a access controlled model.
|
||||
|
||||
1. Log on to the Guest VM as `<user>`.
|
||||
2. Create the OpenVINO™ Security Add-on directory in the home directory
|
||||
```sh
|
||||
mkdir OVSA
|
||||
```
|
||||
3. While on the Host Machine copy the ovsa-model-hosting.tar.gz from release_files to the Guest VM:
|
||||
```sh
|
||||
cd $OVSA_RELEASE_PATH
|
||||
scp ovsa-model-hosting.tar.gz username@<isv-developer-vm-ip-address>:/<username-home-directory>/OVSA
|
||||
```
|
||||
4. Install the software to the Guest VM:
|
||||
```sh
|
||||
cd OVSA
|
||||
tar xvfz ovsa-model-hosting.tar.gz
|
||||
cd ovsa-model-hosting
|
||||
sudo -s
|
||||
./install.sh
|
||||
```
|
||||
5. Create a directory named `artefacts`:
|
||||
```sh
|
||||
cd /<username-home-directory>/OVSA
|
||||
mkdir artefacts
|
||||
cd artefacts
|
||||
```
|
||||
|
||||
## How to Use the OpenVINO™ Security Add-on
|
||||
|
||||
This section requires interactions between the Model Developer/Independent Software vendor and the User. All roles must complete all applicable <a href="#setup-host">set up steps</a> and <a href="#ovsa-install">installation steps</a> before beginning this section.
|
||||
|
||||
This document uses the [face-detection-retail-0004](@ref omz_models_intel_face_detection_retail_0004_description_face_detection_retail_0004) model as an example.
|
||||
|
||||
The following figure describes the interactions between the Model Developer, Independent Software Vendor, and User.
|
||||
|
||||
**Remember**: The Model Developer/Independent Software Vendor and User roles are related to virtual machine use and one person might fill the tasks required by multiple roles. In this document the tasks of Model Developer and Independent Software Vendor are combined and use the Guest VM named `ovsa_isv`. It is possible to have all roles set up on the same Host Machine.
|
||||
|
||||

|
||||
|
||||
### Model Developer Instructions
|
||||
|
||||
The Model Developer creates model, defines access control and creates the user license. References to the Guest VM are to `ovsa_isv_dev`. After the model is created, access control enabled, and the license is ready, the Model Developer provides the license details to the Independent Software Vendor before sharing to the Model User.
|
||||
|
||||
#### Step 1: Create a key store and add a certificate to it
|
||||
|
||||
1. Set up a path to the artefacts directory:
|
||||
```sh
|
||||
sudo -s
|
||||
cd /<username-home-directory>/OVSA/artefacts
|
||||
export OVSA_RUNTIME_ARTEFACTS=$PWD
|
||||
source /opt/ovsa/scripts/setupvars.sh
|
||||
```
|
||||
2. Create files to request a certificate:<br>
|
||||
This example uses a self-signed certificate for demonstration purposes. In a production environment, use CSR files to request for a CA-signed certificate.
|
||||
```sh
|
||||
cd $OVSA_DEV_ARTEFACTS
|
||||
/opt/ovsa/bin/ovsatool keygen -storekey -t ECDSA -n Intel -k isv_keystore -r isv_keystore.csr -e "/C=IN/CN=localhost"
|
||||
```
|
||||
Two files are created:
|
||||
- `isv_keystore.csr`- A Certificate Signing Request (CSR)
|
||||
- `isv_keystore.csr.crt` - A self-signed certificate
|
||||
|
||||
In a production environment, send `isv_keystore.csr` to a CA to request a CA-signed certificate.
|
||||
|
||||
3. Add the certificate to the key store
|
||||
```sh
|
||||
/opt/ovsa/bin/ovsatool keygen -storecert -c isv_keystore.csr.crt -k isv_keystore
|
||||
```
|
||||
|
||||
#### Step 2: Create the model
|
||||
|
||||
This example uses `curl` to download the `face-detection-retail-004` model from the OpenVINO Model Zoo. If you are behind a firewall, check and set your proxy settings.
|
||||
|
||||
1. Log on to the Guest VM.
|
||||
|
||||
2. Download a model from the Model Zoo:
|
||||
```sh
|
||||
cd $OVSA_DEV_ARTEFACTS
|
||||
curl --create-dirs https://download.01.org/opencv/2021/openvinotoolkit/2021.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.xml https:// download.01.org/opencv/2021/openvinotoolkit/2021.1/open_model_zoo/models_bin/1/face-detection-retail-0004/FP32/face-detection-retail-0004.bin -o model/face-detection-retail-0004.xml -o model/face-detection-retail-0004.bin
|
||||
```
|
||||
The model is downloaded to the `OVSA_DEV_ARTEFACTS/model` directory.
|
||||
|
||||
#### Step 3: Define access control for the model and create a master license for it
|
||||
|
||||
1. Go to the `artefacts` directory:
|
||||
```sh
|
||||
cd $OVSA_DEV_ARTEFACTS
|
||||
```
|
||||
2. Run the `uuidgen` command:
|
||||
```sh
|
||||
uuidgen
|
||||
```
|
||||
3. Define and enable the model access control and master license:
|
||||
```sh
|
||||
/opt/ovsa/bin/ovsatool protect -i model/face-detection-retail-0004.xml model/face-detection-retail-0004.bin -n "face detection" -d "face detection retail" -v 0004 -p face_detection_model.dat -m face_detection_model.masterlic -k isv_keystore -g <output-of-uuidgen>
|
||||
```
|
||||
The Intermediate Representation files for the `face-detection-retail-0004` model are encrypted as `face_detection_model.dat` and a master license is generated as `face_detection_model.masterlic`.
|
||||
|
||||
#### Step 4: Create a Runtime Reference TCB
|
||||
|
||||
Use the runtime reference TCB to create a customer license for the access controlled model and the specific runtime.
|
||||
|
||||
Generate the reference TCB for the runtime
|
||||
```sh
|
||||
cd $OVSA_DEV_ARTEFACTS
|
||||
source /opt/ovsa/scripts/setupvars.sh
|
||||
/opt/ovsa/bin/ovsaruntime gen-tcb-signature -n "Face Detect @ Runtime VM" -v "1.0" -f face_detect_runtime_vm.tcb -k isv_keystore
|
||||
```
|
||||
|
||||
#### Step 5: Publish the access controlled Model and Runtime Reference TCB
|
||||
The access controlled model is ready to be shared with the User and the reference TCB is ready to perform license checks.
|
||||
|
||||
#### Step 6: Receive a User Request
|
||||
1. Obtain artefacts from the User who needs access to a access controlled model:
|
||||
* Customer certificate from the customer's key store.
|
||||
* Other information that apply to your licensing practices, such as the length of time the user needs access to the model
|
||||
|
||||
2. Create a customer license configuration
|
||||
```sh
|
||||
cd $OVSA_DEV_ARTEFACTS
|
||||
/opt/ovsa/bin/ovsatool licgen -t TimeLimit -l30 -n "Time Limit License Config" -v 1.0 -u "<isv-developer-vm-ip-address>:<license_server-port>" -k isv_keystore -o 30daylicense.config
|
||||
```
|
||||
3. Create the customer license
|
||||
```sh
|
||||
cd $OVSA_DEV_ARTEFACTS
|
||||
/opt/ovsa/bin/ovsatool sale -m face_detection_model.masterlic -k isv_keystore -l 30daylicense.config -t face_detect_runtime_vm.tcb -p custkeystore.csr.crt -c face_detection_model.lic
|
||||
```
|
||||
|
||||
4. Update the license server database with the license.
|
||||
```sh
|
||||
cd /opt/ovsa/DB
|
||||
python3 ovsa_store_customer_lic_cert_db.py ovsa.db $OVSA_DEV_ARTEFACTS/face_detection_model.lic $OVSA_DEV_ARTEFACTS/custkeystore.csr.crt
|
||||
```
|
||||
|
||||
5. Provide these files to the User:
|
||||
* `face_detection_model.dat`
|
||||
* `face_detection_model.lic`
|
||||
|
||||
### User Instructions
|
||||
References to the Guest VM are to `ovsa_rumtime`.
|
||||
|
||||
#### Step 1: Add a CA-Signed Certificate to a Key Store
|
||||
|
||||
1. Set up a path to the artefacts directory:
|
||||
```sh
|
||||
sudo -s
|
||||
cd /<username-home-directory>/OVSA/artefacts
|
||||
export OVSA_RUNTIME_ARTEFACTS=$PWD
|
||||
source /opt/ovsa/scripts/setupvars.sh
|
||||
```
|
||||
2. Generate a Customer key store file:
|
||||
```sh
|
||||
cd $OVSA_RUNTIME_ARTEFACTS
|
||||
/opt/ovsa/bin/ovsatool keygen -storekey -t ECDSA -n Intel -k custkeystore -r custkeystore.csr -e "/C=IN/CN=localhost"
|
||||
```
|
||||
Two files are created:
|
||||
* `custkeystore.csr` - A Certificate Signing Request (CSR)
|
||||
* `custkeystore.csr.crt` - A self-signed certificate
|
||||
|
||||
3. Send `custkeystore.csr` to the CA to request a CA-signed certificate.
|
||||
|
||||
4. Add the certificate to the key store:
|
||||
```sh
|
||||
/opt/ovsa/bin/ovsatool keygen -storecert -c custkeystore.csr.crt -k custkeystore
|
||||
```
|
||||
|
||||
#### Step 2: Request an access controlled Model from the Model Developer
|
||||
This example uses scp to share data between the ovsa_runtime and ovsa_dev Guest VMs on the same Host Machine.
|
||||
|
||||
1. Communicate your need for a model to the Model Developer. The Developer will ask you to provide the certificate from your key store and other information. This example uses the length of time the model needs to be available.
|
||||
2. Generate an artefact file to provide to the Developer:
|
||||
```sh
|
||||
cd $OVSA_RUNTIME_ARTEFACTS
|
||||
scp custkeystore.csr.crt username@<developer-vm-ip-address>:/<username-home-directory>/OVSA/artefacts
|
||||
```
|
||||
#### Step 3: Receive and load the access controlled model into the OpenVINO™ Model Server
|
||||
1. Receive the model as files named
|
||||
* `face_detection_model.dat`
|
||||
* `face_detection_model.lic`
|
||||
2. Prepare the environment:
|
||||
```sh
|
||||
cd $OVSA_RUNTIME_ARTEFACTS/..
|
||||
cp /opt/ovsa/example_runtime ovms -r
|
||||
cd ovms
|
||||
mkdir -vp model/fd/1
|
||||
```
|
||||
The `$OVSA_RUNTIME_ARTEFACTS/../ovms` directory contains scripts and a sample configuration JSON file to start the model server.
|
||||
3. Copy the artefacts from the Model Developer:
|
||||
```sh
|
||||
cd $OVSA_RUNTIME_ARTEFACTS/../ovms
|
||||
cp $OVSA_RUNTIME_ARTEFACTS/face_detection_model.dat model/fd/1/.
|
||||
cp $OVSA_RUNTIME_ARTEFACTS/face_detection_model.lic model/fd/1/.
|
||||
cp $OVSA_RUNTIME_ARTEFACTS/custkeystore model/fd/1/.
|
||||
```
|
||||
4. Rename and edit `sample.json` to include the names of the access controlled model artefacts you received from the Model Developer. The file looks like this:
|
||||
```sh
|
||||
{
|
||||
"custom_loader_config_list":[
|
||||
{
|
||||
"config":{
|
||||
"loader_name":"ovsa",
|
||||
"library_path": "/ovsa-runtime/lib/libovsaruntime.so"
|
||||
}
|
||||
}
|
||||
],
|
||||
"model_config_list":[
|
||||
{
|
||||
"config":{
|
||||
"name":"protected-model",
|
||||
"base_path":"/sampleloader/model/fd",
|
||||
"custom_loader_options": {"loader_name": "ovsa", "keystore": "custkeystore", "protected_file": "face_detection_model"}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
#### Step 4: Start the NGINX Model Server
|
||||
The NGINX Model Server publishes the access controlled model.
|
||||
```sh
|
||||
./start_secure_ovsa_model_server.sh
|
||||
```
|
||||
For information about the NGINX interface, see https://github.com/openvinotoolkit/model_server/blob/main/extras/nginx-mtls-auth/README.md
|
||||
|
||||
#### Step 5: Prepare to run Inference
|
||||
|
||||
1. Log on to the Guest VM from another terminal.
|
||||
|
||||
2. Install the Python dependencies for your set up. For example:
|
||||
```sh
|
||||
sudo apt install pip3
|
||||
pip3 install cmake
|
||||
pip3 install scikit-build
|
||||
pip3 install opencv-python
|
||||
pip3 install futures==3.1.1
|
||||
pip3 install tensorflow-serving-api==1.14.0
|
||||
```
|
||||
3. Copy the `face_detection.py` from the example_client in `/opt/ovsa/example_client`
|
||||
```sh
|
||||
cd /home/intel/OVSA/ovms
|
||||
cp /opt/ovsa/example_client/* .
|
||||
```
|
||||
4. Copy the sample images for inferencing. An image directory is created that includes a sample image for inferencing.
|
||||
```sh
|
||||
curl --create-dirs https://raw.githubusercontent.com/openvinotoolkit/model_server/master/example_client/images/people/people1.jpeg -o images/people1.jpeg
|
||||
```
|
||||
#### Step 6: Run Inference
|
||||
|
||||
Run the `face_detection.py` script:
|
||||
```sh
|
||||
python3 face_detection.py --grpc_port 3335 --batch_size 1 --width 300 --height 300 --input_images_dir images --output_dir results --tls --server_cert server.pem --client_cert client.pem --client_key client.key --model_name protected-model
|
||||
```
|
||||
|
||||
## Summary
|
||||
You have completed these tasks:
|
||||
- Set up one or more computers (Host Machines) with one KVM per machine and one or more virtual machines (Guest VMs) on the Host Machines
|
||||
- Installed the OpenVINO™ Security Add-on
|
||||
- Used the OpenVINO™ Model Server to work with OpenVINO™ Security Add-on
|
||||
- As a Model Developer or Independent Software Vendor, you access controlled a model and prepared a license for it.
|
||||
- As a Model Developer or Independent Software Vendor, you prepared and ran a License Server and used the License Server to verify a User had a valid license to use a access controlled model.
|
||||
- As a User, you provided information to a Model Developer or Independent Software Vendor to get a access controlled model and the license for the model.
|
||||
- As a User, you set up and launched a Host Server on which you can run licensed and access controlled models.
|
||||
- As a User, you loaded a access controlled model, validated the license for the model, and used the model to run inference.
|
||||
|
||||
## References
|
||||
Use these links for more information:
|
||||
- [OpenVINO™ toolkit](https://software.intel.com/en-us/openvino-toolkit)
|
||||
- [OpenVINO Model Server Quick Start Guide](https://github.com/openvinotoolkit/model_server/blob/main/docs/ovms_quickstart.md)
|
||||
- [Model repository](https://github.com/openvinotoolkit/model_server/blob/main/docs/models_repository.md)
|
||||
@@ -1,109 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <iostream>
|
||||
#include <inference_engine.hpp>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
// --------------------------- 1. Load inference engine -------------------------------------
|
||||
std::cout << "Loading Inference Engine" << std::endl;
|
||||
Core ie;
|
||||
|
||||
// 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin files) or ONNX (.onnx file) format
|
||||
std::cout << "Loading network files" << std::endl;
|
||||
CNNNetwork network;
|
||||
network = ie.ReadNetwork(std::string("c:\\work\\git\\github_dldt3\\openvino\\model-optimizer\\summator.xml"));
|
||||
network.setBatchSize(1);
|
||||
|
||||
// 3. Load network to CPU
|
||||
ExecutableNetwork executableNet = ie.LoadNetwork(network, "CPU");
|
||||
// 4. Create Infer Request
|
||||
InferRequest inferRequest = executableNet.CreateInferRequest();
|
||||
|
||||
// 5. Prepare inputs
|
||||
ConstInputsDataMap cInputInfo = executableNet.GetInputsInfo();
|
||||
std::vector<Blob::Ptr> ptrInputBlobs;
|
||||
for (const auto& input : cInputInfo) {
|
||||
ptrInputBlobs.push_back(inferRequest.GetBlob(input.first));
|
||||
}
|
||||
InputsDataMap inputInfo;
|
||||
inputInfo = network.getInputsInfo();
|
||||
for (auto &item : inputInfo) {
|
||||
Precision inputPrecision = Precision::FP32;
|
||||
item.second->setPrecision(inputPrecision);
|
||||
}
|
||||
|
||||
// 6. Prepare outputs
|
||||
std::vector<Blob::Ptr> ptrOutputBlobs;
|
||||
ConstOutputsDataMap cOutputInfo = executableNet.GetOutputsInfo();
|
||||
for (const auto& output : cOutputInfo) {
|
||||
ptrOutputBlobs.push_back(inferRequest.GetBlob(output.first));
|
||||
}
|
||||
|
||||
// 7. Initialize memory state before starting
|
||||
for (auto &&state : inferRequest.QueryState()) {
|
||||
state.Reset();
|
||||
}
|
||||
|
||||
//! [part1]
|
||||
// input data
|
||||
std::vector<float> data = { 1,2,3,4,5,6};
|
||||
// infer the first utterance
|
||||
for (size_t next_input = 0; next_input < data.size()/2; next_input++) {
|
||||
MemoryBlob::Ptr minput = as<MemoryBlob>(ptrInputBlobs[0]);
|
||||
auto minputHolder = minput->wmap();
|
||||
|
||||
std::memcpy(minputHolder.as<void *>(),
|
||||
&data[next_input],
|
||||
sizeof(float));
|
||||
|
||||
inferRequest.Infer();
|
||||
// check states
|
||||
auto states = inferRequest.QueryState();
|
||||
auto mstate = as<MemoryBlob>(states[0].GetState());
|
||||
auto state_buf = mstate->rmap();
|
||||
float * state =state_buf.as<float*>();
|
||||
std::cout << state[0] << "\n";
|
||||
}
|
||||
|
||||
// resetting state between utterances
|
||||
std::cout<<"Reset state\n";
|
||||
for (auto &&state : inferRequest.QueryState()) {
|
||||
state.Reset();
|
||||
}
|
||||
|
||||
// infer the second utterance
|
||||
for (size_t next_input = data.size()/2; next_input < data.size(); next_input++) {
|
||||
MemoryBlob::Ptr minput = as<MemoryBlob>(ptrInputBlobs[0]);
|
||||
auto minputHolder = minput->wmap();
|
||||
|
||||
std::memcpy(minputHolder.as<void *>(),
|
||||
&data[next_input],
|
||||
sizeof(float));
|
||||
|
||||
inferRequest.Infer();
|
||||
// check states
|
||||
auto states = inferRequest.QueryState();
|
||||
auto mstate = as<MemoryBlob>(states[0].GetState());
|
||||
auto state_buf = mstate->rmap();
|
||||
float * state =state_buf.as<float*>();
|
||||
std::cout << state[0] << "\n";
|
||||
}
|
||||
//! [part1]
|
||||
}
|
||||
catch (const std::exception &error) {
|
||||
std::cerr << error.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
catch (...) {
|
||||
std::cerr << "Unknown/internal exception happened" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::cerr << "Execution successful" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user