Merge remote-tracking branch 'upstream/master' into debian-packages
@ -113,7 +113,7 @@ jobs:
|
|||||||
# For opencv-python: python3-setuptools and pip upgrade
|
# For opencv-python: python3-setuptools and pip upgrade
|
||||||
python3 -m pip install --upgrade pip
|
python3 -m pip install --upgrade pip
|
||||||
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt
|
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt
|
||||||
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
|
python3 -m pip install -r $(REPO_DIR)/src/bindings/python/wheel/requirements-dev.txt
|
||||||
# For running Python API tests
|
# For running Python API tests
|
||||||
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
|
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
|
||||||
# For running Paddle frontend unit tests
|
# For running Paddle frontend unit tests
|
||||||
@ -208,7 +208,7 @@ jobs:
|
|||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
set -e
|
set -e
|
||||||
mkdir $(INSTALL_DIR)/opencv/
|
mkdir -p $(INSTALL_DIR)/opencv/
|
||||||
cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake
|
cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake
|
||||||
cp -R $(REPO_DIR)/temp/opencv_4.5.2_ubuntu20/opencv/* $(INSTALL_DIR)/opencv/
|
cp -R $(REPO_DIR)/temp/opencv_4.5.2_ubuntu20/opencv/* $(INSTALL_DIR)/opencv/
|
||||||
workingDirectory: $(BUILD_DIR)
|
workingDirectory: $(BUILD_DIR)
|
||||||
|
@ -119,7 +119,7 @@ jobs:
|
|||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
set -e
|
set -e
|
||||||
mkdir $(INSTALL_DIR)/opencv/
|
mkdir -p $(INSTALL_DIR)/opencv/
|
||||||
cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake
|
cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake
|
||||||
cp -R $(REPO_DIR)/temp/opencv_4.5.2_osx/opencv/* $(INSTALL_DIR)/opencv/
|
cp -R $(REPO_DIR)/temp/opencv_4.5.2_osx/opencv/* $(INSTALL_DIR)/opencv/
|
||||||
workingDirectory: $(BUILD_DIR)
|
workingDirectory: $(BUILD_DIR)
|
||||||
|
4
.github/workflows/py_checks.yml
vendored
@ -54,13 +54,13 @@ jobs:
|
|||||||
path: src_diff.diff
|
path: src_diff.diff
|
||||||
- name: Run Flake on wheel
|
- name: Run Flake on wheel
|
||||||
run: python -m flake8 ./ --config=../setup.cfg
|
run: python -m flake8 ./ --config=../setup.cfg
|
||||||
working-directory: inference-engine/ie_bridges/python/wheel
|
working-directory: src/bindings/python/wheel
|
||||||
- name: Create code style diff for wheel
|
- name: Create code style diff for wheel
|
||||||
if: failure()
|
if: failure()
|
||||||
run: |
|
run: |
|
||||||
python -m black -l 160 -S ./
|
python -m black -l 160 -S ./
|
||||||
git diff > wheel_diff.diff
|
git diff > wheel_diff.diff
|
||||||
working-directory: inference-engine/ie_bridges/python/wheel
|
working-directory: src/bindings/python/wheel
|
||||||
- uses: actions/upload-artifact@v2
|
- uses: actions/upload-artifact@v2
|
||||||
if: failure()
|
if: failure()
|
||||||
with:
|
with:
|
||||||
|
2
.gitmodules
vendored
@ -54,7 +54,7 @@
|
|||||||
path = cmake/developer_package/ncc_naming_style/ncc
|
path = cmake/developer_package/ncc_naming_style/ncc
|
||||||
url = https://github.com/nithinn/ncc.git
|
url = https://github.com/nithinn/ncc.git
|
||||||
[submodule "thirdparty/onednn_gpu"]
|
[submodule "thirdparty/onednn_gpu"]
|
||||||
path = thirdparty/onednn_gpu
|
path = src/plugins/intel_gpu/thirdparty/onednn_gpu
|
||||||
url = https://github.com/oneapi-src/oneDNN.git
|
url = https://github.com/oneapi-src/oneDNN.git
|
||||||
[submodule "tools/pot/thirdparty/open_model_zoo"]
|
[submodule "tools/pot/thirdparty/open_model_zoo"]
|
||||||
path = thirdparty/open_model_zoo
|
path = thirdparty/open_model_zoo
|
||||||
|
@ -45,8 +45,7 @@ Jenkinsfile @openvinotoolkit/openvino-admins
|
|||||||
# IE GPU:
|
# IE GPU:
|
||||||
/src/inference/include/ie/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
/src/inference/include/ie/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
||||||
/src/inference/include/ie/cldnn/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
/src/inference/include/ie/cldnn/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
||||||
/src/inference/include/openvino/runtime/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
/src/inference/include/openvino/runtime/intel_gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
||||||
/inference-engine/thirdparty/clDNN/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
|
||||||
/src/plugins/intel_gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
/src/plugins/intel_gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
|
||||||
|
|
||||||
# IE VPU:
|
# IE VPU:
|
||||||
|
@ -276,8 +276,8 @@ if(ENABLE_INTEL_GNA)
|
|||||||
GNA_LIB_DIR
|
GNA_LIB_DIR
|
||||||
libGNA_INCLUDE_DIRS
|
libGNA_INCLUDE_DIRS
|
||||||
libGNA_LIBRARIES_BASE_PATH)
|
libGNA_LIBRARIES_BASE_PATH)
|
||||||
set(GNA_VERSION "03.00.00.1455")
|
set(GNA_VERSION "03.00.00.1455.0")
|
||||||
set(GNA_HASH "8ac1af18eb32777b00193f4f8c252ee4f8bd64a9069138b4a5aaeebd82ead464")
|
set(GNA_HASH "99891696269d8fa10116c96e6b7bda4362736881f0df8df8b56c751ee18e5820")
|
||||||
|
|
||||||
set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)
|
set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
|
@ -37,7 +37,7 @@ The implementation `CompileNetwork` is fully device-specific.
|
|||||||
|
|
||||||
The function accepts a const shared pointer to `ngraph::Function` object and performs the following steps:
|
The function accepts a const shared pointer to `ngraph::Function` object and performs the following steps:
|
||||||
|
|
||||||
1. Applies ngraph passes using `TransformNetwork` function, which defines plugin-specific conversion pipeline.
|
1. Applies ngraph passes using `TransformNetwork` function, which defines plugin-specific conversion pipeline. To support low precision inference, the pipeline can include Low Precision Transformations. These transformations are usually hardware specific. You can find how to use and configure Low Precisions Transformations in [Low Precision Transformations](@ref openvino_docs_IE_DG_lpt) guide.
|
||||||
2. Maps the transformed graph to a backend specific graph representation (for example, to MKLDNN graph for Intel CPU).
|
2. Maps the transformed graph to a backend specific graph representation (for example, to MKLDNN graph for Intel CPU).
|
||||||
3. Allocates and fills memory for graph weights, backend specific memory handles and so on.
|
3. Allocates and fills memory for graph weights, backend specific memory handles and so on.
|
||||||
|
|
||||||
|
@ -52,6 +52,7 @@ Detailed guides
|
|||||||
* [Build](@ref openvino_docs_ie_plugin_dg_plugin_build) a plugin library using CMake\*
|
* [Build](@ref openvino_docs_ie_plugin_dg_plugin_build) a plugin library using CMake\*
|
||||||
* Plugin and its components [testing](@ref openvino_docs_ie_plugin_dg_plugin_testing)
|
* Plugin and its components [testing](@ref openvino_docs_ie_plugin_dg_plugin_testing)
|
||||||
* [Quantized networks](@ref openvino_docs_ie_plugin_dg_quantized_networks)
|
* [Quantized networks](@ref openvino_docs_ie_plugin_dg_quantized_networks)
|
||||||
|
* [Low precision transformations](@ref openvino_docs_IE_DG_lpt) guide
|
||||||
* [Writing nGraph transformations](@ref ngraph_transformation) guide
|
* [Writing nGraph transformations](@ref ngraph_transformation) guide
|
||||||
|
|
||||||
API References
|
API References
|
||||||
|
@ -4,7 +4,78 @@
|
|||||||
<tab type="usergroup" url="index.html" visibile="yes" title="GUIDE">
|
<tab type="usergroup" url="index.html" visibile="yes" title="GUIDE">
|
||||||
<tab type="usergroup" url="index.html" title="Developer Guide for Inference Engine Plugin Library">
|
<tab type="usergroup" url="index.html" title="Developer Guide for Inference Engine Plugin Library">
|
||||||
<tab type="user" url="@ref plugin" visibile="yes" title="Implement Plugin Functionality"/>
|
<tab type="user" url="@ref plugin" visibile="yes" title="Implement Plugin Functionality"/>
|
||||||
<tab type="user" url="@ref executable_network" visibile="yes" title="Implement Executable Network Functionality"/>
|
<tab type="user" url="@ref executable_network" visibile="yes" title="Implement Executable Network Functionality">
|
||||||
|
<tab type="usergroup" title="Low Precision Transformations" url="@ref openvino_docs_IE_DG_lpt">
|
||||||
|
<tab type="user" title="Attributes" url="@ref openvino_docs_IE_DG_lpt_attributes">
|
||||||
|
<tab type="user" title="AvgPoolPrecisionPreserved" url="@ref openvino_docs_IE_DG_lpt_AvgPoolPrecisionPreserved"/>
|
||||||
|
<tab type="user" title="IntervalsAlignment" url="@ref openvino_docs_IE_DG_lpt_IntervalsAlignment"/>
|
||||||
|
<tab type="user" title="PerTensorQuantization" url="@ref openvino_docs_IE_DG_lpt_PerTensorQuantization"/>
|
||||||
|
<tab type="user" title="PrecisionPreserved" url="@ref openvino_docs_IE_DG_lpt_PrecisionPreserved"/>
|
||||||
|
<tab type="user" title="Precisions" url="@ref openvino_docs_IE_DG_lpt_Precisions"/>
|
||||||
|
<tab type="user" title="QuantizationAlignment" url="@ref openvino_docs_IE_DG_lpt_QuantizationAlignment"/>
|
||||||
|
</tab>
|
||||||
|
<tab type="user" title="Step 1. Prerequisites transformations" url="@ref openvino_docs_IE_DG_lpt_step1_prerequisites">
|
||||||
|
<tab type="user" title="LinOpSequenceFusion" url="@ref openvino_docs_IE_DG_lpt_LinOpSequenceFusion"/>
|
||||||
|
<tab type="user" title="PullReshapeThroughDequantization" url="@ref openvino_docs_IE_DG_lpt_PullReshapeThroughDequantization"/>
|
||||||
|
<tab type="user" title="PullTransposeThroughDequantization" url="@ref openvino_docs_IE_DG_lpt_PullTransposeThroughDequantization"/>
|
||||||
|
</tab>
|
||||||
|
<tab type="user" title="Step 2. Markup transformations" url="@ref openvino_docs_IE_DG_lpt_step2_markup">
|
||||||
|
<tab type="user" title="AlignQuantizationIntervals" url="@ref openvino_docs_IE_DG_lpt_AlignQuantizationIntervals"/>
|
||||||
|
<tab type="user" title="AlignQuantizationParameters" url="@ref openvino_docs_IE_DG_lpt_AlignQuantizationParameters"/>
|
||||||
|
<tab type="user" title="CreateAttribute" url="@ref openvino_docs_IE_DG_lpt_CreateAttribute"/>
|
||||||
|
<tab type="user" title="CreatePrecisionsDependentAttribute" url="@ref openvino_docs_IE_DG_lpt_CreatePrecisionsDependentAttribute"/>
|
||||||
|
<tab type="user" title="MarkupAvgPoolPrecisionPreserved" url="@ref openvino_docs_IE_DG_lpt_MarkupAvgPoolPrecisionPreserved"/>
|
||||||
|
<tab type="user" title="MarkupCanBeQuantized" url="@ref openvino_docs_IE_DG_lpt_MarkupCanBeQuantized"/>
|
||||||
|
<tab type="user" title="MarkupPerTensorQuantization" url="@ref openvino_docs_IE_DG_lpt_MarkupPerTensorQuantization"/>
|
||||||
|
<tab type="user" title="MarkupPrecisions" url="@ref openvino_docs_IE_DG_lpt_MarkupPrecisions"/>
|
||||||
|
<tab type="user" title="PropagatePrecisions" url="@ref openvino_docs_IE_DG_lpt_PropagatePrecisions"/>
|
||||||
|
<tab type="user" title="PropagateThroughPrecisionPreserved" url="@ref openvino_docs_IE_DG_lpt_PropagateThroughPrecisionPreserved"/>
|
||||||
|
<tab type="user" title="PropagateToInput" url="@ref openvino_docs_IE_DG_lpt_PropagateToInput"/>
|
||||||
|
<tab type="user" title="UpdateSharedPrecisionPreserved" url="@ref openvino_docs_IE_DG_lpt_UpdateSharedPrecisionPreserved"/>
|
||||||
|
</tab>
|
||||||
|
<tab type="user" title="Step 3. Main transformations" url="@ref openvino_docs_IE_DG_lpt_step3_main">
|
||||||
|
<tab type="user" title="AddTransformation" url="@ref openvino_docs_IE_DG_lpt_AddTransformation"/>
|
||||||
|
<tab type="user" title="AvgPoolTransformation" url="@ref openvino_docs_IE_DG_lpt_AvgPoolTransformation"/>
|
||||||
|
<tab type="user" title="ClampTransformation" url="@ref openvino_docs_IE_DG_lpt_ClampTransformation"/>
|
||||||
|
<tab type="user" title="ConcatTransformation" url="@ref openvino_docs_IE_DG_lpt_ConcatTransformation"/>
|
||||||
|
<tab type="user" title="ConvolutionTransformation" url="@ref openvino_docs_IE_DG_lpt_ConvolutionTransformation"/>
|
||||||
|
<tab type="user" title="ConvolutionBackpropDataTransformation" url="@ref openvino_docs_IE_DG_lpt_ConvolutionBackpropDataTransformation"/>
|
||||||
|
<tab type="user" title="DepthToSpaceTransformation" url="@ref openvino_docs_IE_DG_lpt_DepthToSpaceTransformation"/>
|
||||||
|
<tab type="user" title="FakeQuantizeDecompositionTransformation" url="@ref openvino_docs_IE_DG_lpt_FakeQuantizeDecompositionTransformation"/>
|
||||||
|
<tab type="user" title="FakeQuantizeTransformation" url="@ref openvino_docs_IE_DG_lpt_FakeQuantizeTransformation"/>
|
||||||
|
<tab type="user" title="InterpolateTransformation" url="@ref openvino_docs_IE_DG_lpt_InterpolateTransformation"/>
|
||||||
|
<tab type="user" title="GroupConvolutionTransformation" url="@ref openvino_docs_IE_DG_lpt_GroupConvolutionTransformation"/>
|
||||||
|
<tab type="user" title="MatMulTransformation" url="@ref openvino_docs_IE_DG_lpt_MatMulTransformation"/>
|
||||||
|
<tab type="user" title="MaxPoolTransformation" url="@ref openvino_docs_IE_DG_lpt_MaxPoolTransformation"/>
|
||||||
|
<tab type="user" title="MultiplyTransformation" url="@ref openvino_docs_IE_DG_lpt_MultiplyTransformation"/>
|
||||||
|
<tab type="user" title="MVNTransformation" url="@ref openvino_docs_IE_DG_lpt_MVNTransformation"/>
|
||||||
|
<tab type="user" title="NormalizeL2Transformation" url="@ref openvino_docs_IE_DG_lpt_NormalizeL2Transformation"/>
|
||||||
|
<tab type="user" title="PadTransformation" url="@ref openvino_docs_IE_DG_lpt_PadTransformation"/>
|
||||||
|
<tab type="user" title="PReluTransformation" url="@ref openvino_docs_IE_DG_lpt_PReluTransformation"/>
|
||||||
|
<tab type="user" title="ReduceMaxTransformation" url="@ref openvino_docs_IE_DG_lpt_ReduceMaxTransformation"/>
|
||||||
|
<tab type="user" title="ReduceMeanTransformation" url="@ref openvino_docs_IE_DG_lpt_ReduceMeanTransformation"/>
|
||||||
|
<tab type="user" title="ReduceMinTransformation" url="@ref openvino_docs_IE_DG_lpt_ReduceMinTransformation"/>
|
||||||
|
<tab type="user" title="ReduceSumTransformation" url="@ref openvino_docs_IE_DG_lpt_ReduceSumTransformation"/>
|
||||||
|
<tab type="user" title="ReluTransformation" url="@ref openvino_docs_IE_DG_lpt_ReluTransformation"/>
|
||||||
|
<tab type="user" title="ReshapeTransformation" url="@ref openvino_docs_IE_DG_lpt_ReshapeTransformation"/>
|
||||||
|
<tab type="user" title="SqueezeTransformation" url="@ref openvino_docs_IE_DG_lpt_SqueezeTransformation"/>
|
||||||
|
<tab type="user" title="ShuffleChannelsTransformation" url="@ref openvino_docs_IE_DG_lpt_ShuffleChannelsTransformation"/>
|
||||||
|
<tab type="user" title="SplitTransformation" url="@ref openvino_docs_IE_DG_lpt_SplitTransformation"/>
|
||||||
|
<tab type="user" title="StridedSliceTransformation" url="@ref openvino_docs_IE_DG_lpt_StridedSliceTransformation"/>
|
||||||
|
<tab type="user" title="TransposeTransformation" url="@ref openvino_docs_IE_DG_lpt_TransposeTransformation"/>
|
||||||
|
<tab type="user" title="UnsqueezeTransformation" url="@ref openvino_docs_IE_DG_lpt_UnsqueezeTransformation"/>
|
||||||
|
<tab type="user" title="VariadicSplitTransformation" url="@ref openvino_docs_IE_DG_lpt_VariadicSplitTransformation"/>
|
||||||
|
</tab>
|
||||||
|
<tab type="user" title="Step 4. Cleanup transformations" url="@ref openvino_docs_IE_DG_lpt_step4_cleanup">
|
||||||
|
<tab type="user" title="FoldConvertTransformation" url="@ref openvino_docs_IE_DG_lpt_FoldConvertTransformation"/>
|
||||||
|
<tab type="user" title="FoldFakeQuantizeTransformation" url="@ref openvino_docs_IE_DG_lpt_FoldFakeQuantizeTransformation"/>
|
||||||
|
<tab type="user" title="FuseConvertTransformation" url="@ref openvino_docs_IE_DG_lpt_FuseConvertTransformation"/>
|
||||||
|
<tab type="user" title="FuseMultiplyToFakeQuantizeTransformation" url="@ref openvino_docs_IE_DG_lpt_FuseMultiplyToFakeQuantizeTransformation"/>
|
||||||
|
<tab type="user" title="FuseSubtractToFakeQuantizeTransformation" url="@ref openvino_docs_IE_DG_lpt_FuseSubtractToFakeQuantizeTransformation"/>
|
||||||
|
<tab type="user" title="MultiplyToGroupConvolutionTransformation" url="@ref openvino_docs_IE_DG_lpt_MultiplyToGroupConvolutionTransformation"/>
|
||||||
|
</tab>
|
||||||
|
</tab>
|
||||||
|
</tab>
|
||||||
<tab type="user" url="@ref infer_request" visibile="yes" title="Implement Synchronous Inference Request"/>
|
<tab type="user" url="@ref infer_request" visibile="yes" title="Implement Synchronous Inference Request"/>
|
||||||
<tab type="user" url="@ref async_infer_request" visibile="yes" title="Implement Asynchronous Inference Request"/>
|
<tab type="user" url="@ref async_infer_request" visibile="yes" title="Implement Asynchronous Inference Request"/>
|
||||||
</tab>
|
</tab>
|
||||||
|
@ -0,0 +1,17 @@
|
|||||||
|
# Plugin Transformation Pipeline {#openvino_docs_IE_DG_plugin_transformation_pipeline}
|
||||||
|
|
||||||
|
@sphinxdirective
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
:caption: Executable Network
|
||||||
|
:hidden:
|
||||||
|
|
||||||
|
Low Precision Transformations <openvino_docs_IE_DG_lpt>
|
||||||
|
|
||||||
|
@endsphinxdirective
|
||||||
|
|
||||||
|
Typical plugin transformation pipeline includes steps:
|
||||||
|
1. Common transformations
|
||||||
|
2. [Low precision transformations](@ref openvino_docs_IE_DG_lpt)
|
||||||
|
3. Plugin specific transformations
|
@ -0,0 +1,11 @@
|
|||||||
|
# AvgPoolPrecisionPreserved attribute {#openvino_docs_IE_DG_lpt_AvgPoolPrecisionPreserved}
|
||||||
|
|
||||||
|
ngraph::AvgPoolPrecisionPreservedAttribute class represents the `AvgPoolPrecisionPreserved` attribute.
|
||||||
|
|
||||||
|
Utility attribute, which is used only during `AvgPool` operation, precision preserved property definition.
|
||||||
|
|
||||||
|
| Property name | Values |
|
||||||
|
|---------------|----------------------------------------------|
|
||||||
|
| Required | Yes |
|
||||||
|
| Defined | Operation |
|
||||||
|
| Properties | value (boolean) |
|
@ -0,0 +1,11 @@
|
|||||||
|
# IntervalsAlignment attribute {#openvino_docs_IE_DG_lpt_IntervalsAlignment}
|
||||||
|
|
||||||
|
ngraph::IntervalsAlignmentAttribute class represents the `IntervalsAlignment` attribute.
|
||||||
|
|
||||||
|
The attribute defines a subgraph with the same quantization intervals alignment. `FakeQuantize` operations are included. The attribute is used by quantization operations.
|
||||||
|
|
||||||
|
| Property name | Values |
|
||||||
|
|---------------|----------------------------------------------|
|
||||||
|
| Required | Yes |
|
||||||
|
| Defined | Operation |
|
||||||
|
| Properties | combined interval, minimal interval, minimal levels, preferable precisions |
|
@ -0,0 +1,11 @@
|
|||||||
|
# PerTensorQuantization attribute {#openvino_docs_IE_DG_lpt_PerTensorQuantization}
|
||||||
|
|
||||||
|
ngraph::PerTensorQuantizationAttribute class represents the `PerTensorQuantization` attribute.
|
||||||
|
|
||||||
|
The attribute defines if the operation input port requires per-tensor quantization.
|
||||||
|
|
||||||
|
| Property name | Values |
|
||||||
|
|---------------|----------------------------------------------|
|
||||||
|
| Required | Yes |
|
||||||
|
| Defined | Operation, input ports |
|
||||||
|
| Properties | |
|
@ -0,0 +1,11 @@
|
|||||||
|
# PrecisionPreserved attribute {#openvino_docs_IE_DG_lpt_PrecisionPreserved}
|
||||||
|
|
||||||
|
ngraph::PrecisionPreservedAttribute class represents the `PrecisionPreserved` attribute.
|
||||||
|
|
||||||
|
The attribute defines a precision preserved operation. If the attribute is absent, then an operation is not precision preserved.
|
||||||
|
|
||||||
|
| Property name | Values |
|
||||||
|
|---------------|----------------------------------------------|
|
||||||
|
| Required | Yes |
|
||||||
|
| Defined | Operation |
|
||||||
|
| Properties | value (boolean) |
|
@ -0,0 +1,11 @@
|
|||||||
|
# Precisions attribute {#openvino_docs_IE_DG_lpt_Precisions}
|
||||||
|
|
||||||
|
ngraph::PrecisionsAttribute class represents the `Precisions` attribute.
|
||||||
|
|
||||||
|
The attribute defines precision which is required for input/output port or an operation.
|
||||||
|
|
||||||
|
| Property name | Values |
|
||||||
|
|---------------|----------------------------------------------|
|
||||||
|
| Required | Yes |
|
||||||
|
| Defined | Operation, input port, output port |
|
||||||
|
| Properties | precisions |
|
@ -0,0 +1,11 @@
|
|||||||
|
# QuantizationAlignment attribute {#openvino_docs_IE_DG_lpt_QuantizationAlignment}
|
||||||
|
|
||||||
|
ngraph::QuantizationAlignmentAttribute class represents the `QuantizationAlignment` attribute.
|
||||||
|
|
||||||
|
The attribute defines a subgraph with the same quantization alignment. `FakeQuantize` operations are not included. The attribute is used by quantization operations.
|
||||||
|
|
||||||
|
| Property name | Values |
|
||||||
|
|---------------|----------------------------------------------|
|
||||||
|
| Required | Yes |
|
||||||
|
| Defined | Operation |
|
||||||
|
| Properties | value (boolean) |
|
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:3ee64e2c942110b8dbbc7cb3d200ed7061da6a12a55c0f379378e31db9ae2180
|
||||||
|
size 366513
|
After Width: | Height: | Size: 22 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:b1d9a68912b2dde17c731ed31b090077e6812a84231544ce3d212c0e02b13dfb
|
||||||
|
size 204085
|
After Width: | Height: | Size: 26 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:79b2fd14f9ff7655e4a5abe7e71748e153a095fe1f5eb07c168f53cb12fbb406
|
||||||
|
size 216703
|
After Width: | Height: | Size: 29 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:4d3e9a9eddfdcd50eedb035c500848b982b9317ba23f28809a831bbe66300bec
|
||||||
|
size 167226
|
After Width: | Height: | Size: 28 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:ec31aa62c0e1da3caf1531f2d92270f321857aca3044445ec242f33ee224f91b
|
||||||
|
size 297353
|
After Width: | Height: | Size: 38 KiB |
@ -0,0 +1,319 @@
|
|||||||
|
# OpenVINO™ Low Precision Transformations {#openvino_docs_IE_DG_lpt}
|
||||||
|
|
||||||
|
@sphinxdirective
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
:caption: Low Precision Transformations
|
||||||
|
:hidden:
|
||||||
|
|
||||||
|
Low Precision Transformations <openvino_docs_IE_DG_lpt>
|
||||||
|
|
||||||
|
Attributes <openvino_docs_IE_DG_lpt_attributes>
|
||||||
|
Step 1. Prerequisites transformations <openvino_docs_IE_DG_lpt_step1_prerequisites>
|
||||||
|
Step 2. Markup transformations <openvino_docs_IE_DG_lpt_step2_markup>
|
||||||
|
Step 3. Main transformations <openvino_docs_IE_DG_lpt_step3_main>
|
||||||
|
Step 4. Cleanup transformations <openvino_docs_IE_DG_lpt_step4_cleanup>
|
||||||
|
|
||||||
|
@endsphinxdirective
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
Low precision transformations (known as LPT) are a set of nGraph transformations, which are combined in one library. The library is mandatory part of OpenVINO to infer quantized model in low precision with the maximum performance on Intel CPU, GPU and ARM platforms. The library includes more than 45 transformations and supports more then 30 operations. Some transformations are mandatory, some of them are optional and developed for specific device.
|
||||||
|
|
||||||
|
The goal of Low Precision Transformations (LPT) is to transform a quantized model from its original precision (FP16 or FP32) to a low precision (INT8: `signed int8` or `unsigned int8`), so that it is prepared for low precision inference in OpenVINO™ plugin. It is achieved by two main principles:
|
||||||
|
1. `FakeQuantize` operation decomposition to two parts:
|
||||||
|
- part #1: quantize operation - new `FakeQuantize` operation with output quantization intervals in low precision range (signed int8: [-128, 127] or [-127, 127], unsigned int8: [0, 255] or [0, 256]) and with low precision output (`signed int8` or `unsigned int8`),
|
||||||
|
- part #2: dequantization operations with low precision input and original precision output.
|
||||||
|
2. Propagation of the dequantization operation through original model's operations. It is done to avoid dequantization operations before original model operations, thus the quantize operations with low precision output remain before the original model operations.
|
||||||
|
|
||||||
|
As result, operation input tensor precisions will be changed from original to low precision and operations can be inferred by OpenVINO™ plugin in low precision.
|
||||||
|
|
||||||
|
For a more detailed description on how to quantize a model, see the [Low precision tools](#low-precision-tools) section below. For more information about model quantization, refer to **Brief History of Lower Precision in Deep Learning** section in [this whitepaper](https://software.intel.com/en-us/articles/lower-numerical-precision-deep-learning-inference-and-training).
|
||||||
|
|
||||||
|
## Input model requirements
|
||||||
|
|
||||||
|
LPT transformations propagate dequantization operations through the following operations:
|
||||||
|
* [Add-1](@ref openvino_docs_ops_arithmetic_Add_1)
|
||||||
|
* [AvgPool-1](@ref openvino_docs_ops_pooling_AvgPool_1)
|
||||||
|
* [Clamp-1](@ref openvino_docs_ops_activation_Clamp_1)
|
||||||
|
* [Concat-1](@ref openvino_docs_ops_movement_Concat_1)
|
||||||
|
* [Convolution-1](@ref openvino_docs_ops_convolution_Convolution_1)
|
||||||
|
* [ConvolutionBackpropData-1](@ref openvino_docs_ops_convolution_ConvolutionBackpropData_1)
|
||||||
|
* [DepthToSpace-1](@ref openvino_docs_ops_movement_DepthToSpace_1)
|
||||||
|
* [FakeQuantize-1](@ref openvino_docs_ops_quantization_FakeQuantize_1)
|
||||||
|
* [GroupConvolution-1](@ref openvino_docs_ops_convolution_GroupConvolution_1)
|
||||||
|
* [Interpolate-1](@ref openvino_docs_ops_image_Interpolate_1)
|
||||||
|
* [Interpolate-4](@ref openvino_docs_ops_image_Interpolate_4)
|
||||||
|
* [MatMul-1](@ref openvino_docs_ops_matrix_MatMul_1)
|
||||||
|
* [MaxPool-1](@ref openvino_docs_ops_pooling_MaxPool_1)
|
||||||
|
* [Multiply-1](@ref openvino_docs_ops_arithmetic_Multiply_1)
|
||||||
|
* [MVN-1](@ref openvino_docs_ops_normalization_MVN_1)
|
||||||
|
* [NormalizeL2-1](@ref openvino_docs_ops_normalization_NormalizeL2_1)
|
||||||
|
* [PRelu-1](@ref openvino_docs_ops_activation_PReLU_1)
|
||||||
|
* [ReduceMax-1](@ref openvino_docs_ops_reduction_ReduceMax_1)
|
||||||
|
* [ReduceMean-1](@ref openvino_docs_ops_reduction_ReduceMean_1)
|
||||||
|
* [ReduceMin-1](@ref openvino_docs_ops_reduction_ReduceMin_1)
|
||||||
|
* [ReduceSum-1](@ref openvino_docs_ops_reduction_ReduceSum_1)
|
||||||
|
* [Relu-1](@ref openvino_docs_ops_activation_ReLU_1)
|
||||||
|
* [Reshape-1](@ref openvino_docs_ops_shape_Reshape_1)
|
||||||
|
* [Split-1](@ref openvino_docs_ops_movement_Split_1)
|
||||||
|
* [Squeeze-1](@ref openvino_docs_ops_shape_Reshape_1)
|
||||||
|
* [StridedSlice-1](@ref openvino_docs_ops_movement_StridedSlice_1)
|
||||||
|
* [Transpose-1](@ref openvino_docs_ops_movement_Transpose_1)
|
||||||
|
* [Unsqueeze-1](@ref openvino_docs_ops_shape_Unsqueeze_1)
|
||||||
|
* [VariadicSplit-1](@ref openvino_docs_ops_movement_VariadicSplit_1)
|
||||||
|
|
||||||
|
If operation is not supported by LPT then dequantization operation will not be propagated, input tensor precisions will not be changed to low precision and operation will be executed in original precision.
|
||||||
|
|
||||||
|
For example, if you would like to infer a model with `Convolution` operation in low precision then the model can look as on picture below:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
> There are several supported quantization approaches on activations and on weights. All supported approaches are described in [Quantization approaches](#quantization-approaches) section below. In demonstrated model [FakeQuantize operation quantization](#fakequantize-operation) approach is used.
|
||||||
|
|
||||||
|
### Low precision tools
|
||||||
|
There are two tools to quantize a model:
|
||||||
|
1. [Post-Training Optimization Toolkit](@ref pot_docs_LowPrecisionOptimizationGuide) (POT)
|
||||||
|
2. [Neural Network Compression Framework](https://github.com/openvinotoolkit/nncf) (NNCF)
|
||||||
|
|
||||||
|
Additionally, low precision transformations can handle ONNX quantized models.
|
||||||
|
|
||||||
|
## Quantization approaches
|
||||||
|
LPT transformations support two quantization approaches:
|
||||||
|
1. `FakeQuantize` operation,
|
||||||
|
2. Quantize and dequantization operations
|
||||||
|
|
||||||
|
Let's explore both approaches in details on `Convolution` operation.
|
||||||
|
### FakeQuantize operation
|
||||||
|
In this case `FakeQuantize` operation is used on activations and quantized constant on weights. Original input model:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Quantize and dequantization operations
|
||||||
|
In this case `FakeQuantize` operation and `Convert` are used as quantize operation and return quantized low precision tensor. After quantize operation on activations there are `Convert` and dequantization operations to compensate decomposition. Original input model:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
In both cases result is the same. In LPT result model you can see, that:
|
||||||
|
1. if necessary, `FakeQuantize` operations on activations were decomposed to two part:
|
||||||
|
- new `FakeQuantize`operation with updated output intervals in low precision range and low precision output,
|
||||||
|
- dequantization operations on activations;
|
||||||
|
2. if necessary, an existing `FakeQuantize` decomposition can be reworked to get better precision;
|
||||||
|
3. dequantization operations were propagated through `Convolution`.
|
||||||
|
|
||||||
|
LPT result model:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Low precision transformations pipeline
|
||||||
|
LPT transformation pipeline has several steps. For each transformation inside one step pattern matcher is unique per transformation, but each operation can be assigned to several transformations.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Inside each step LPT transformations handle input model operation by operation, applying transformation matching pattern for each transformation from the step to an operation, and execute transformation if pattern is matched. Decomposition transformation decomposes `FakeQuantize` to quantize and dequantization operations. Dequantization operations from previous transformation result is used for the current one and so on, until the end of the model is achieved.
|
||||||
|
|
||||||
|
As result, usually all operations are inferred by plugin in low precision. If plugin doesn't support an operation inference in low precision, then corresponding LPT transformation can be disabled, and input tensor precisions for the operation will not be changed. In this case the operation is inferred in the original precision.
|
||||||
|
|
||||||
|
Low precision transformations pipeline includes four steps:
|
||||||
|
* [Step #1: Prerequisites](@ref openvino_docs_IE_DG_lpt_step1_prerequisites)
|
||||||
|
* [Step #2: Markup transformations](@ref openvino_docs_IE_DG_lpt_step2_markup)
|
||||||
|
* [Step #3: Main transformations](@ref openvino_docs_IE_DG_lpt_step3_main)
|
||||||
|
* [Step #4: Cleanup transformations](@ref openvino_docs_IE_DG_lpt_step4_cleanup)
|
||||||
|
|
||||||
|
### Step 1. Prerequisites
|
||||||
|
This step fuses and propagates some operations in the model to prepare for the next step. It is required for OpenVINO plugins. Transformations:
|
||||||
|
* [PullReshapeThroughDequantization](@ref openvino_docs_IE_DG_lpt_PullReshapeThroughDequantization)
|
||||||
|
* [PullTransposeThroughDequantization](@ref openvino_docs_IE_DG_lpt_PullTransposeThroughDequantization)
|
||||||
|
* [LinOpSequenceFusion](@ref openvino_docs_IE_DG_lpt_LinOpSequenceFusion)
|
||||||
|
|
||||||
|
The model on this step is changed. There are more details in developer guide [Prerequisites transformations](@ref openvino_docs_IE_DG_lpt_step1_prerequisites).
|
||||||
|
|
||||||
|
### Step 2. Markup
|
||||||
|
This step creates runtime attributes for operations. These attributes will be used in next step. Transformations:
|
||||||
|
* [MarkupCanBeQuantized](@ref openvino_docs_IE_DG_lpt_MarkupCanBeQuantized)
|
||||||
|
* [MarkupPrecisions](@ref openvino_docs_IE_DG_lpt_MarkupPrecisions)
|
||||||
|
* [MarkupPerTensorQuantization](@ref openvino_docs_IE_DG_lpt_MarkupPerTensorQuantization)
|
||||||
|
* [MarkupAvgPoolPrecisionPreserved](@ref openvino_docs_IE_DG_lpt_MarkupAvgPoolPrecisionPreserved)
|
||||||
|
* [PropagatePrecisions](@ref openvino_docs_IE_DG_lpt_PropagatePrecisions)
|
||||||
|
* [AlignQuantizationIntervals](@ref openvino_docs_IE_DG_lpt_AlignQuantizationIntervals)
|
||||||
|
* [AlignQuantizationParameters](@ref openvino_docs_IE_DG_lpt_AlignQuantizationParameters)
|
||||||
|
|
||||||
|
The model on this step is changed: only new attributes are added to some operations. There are more details in developer guide [Markup transformations](@ref openvino_docs_IE_DG_lpt_step2_markup).
|
||||||
|
|
||||||
|
### Step 3. Main transformations, FakeQuantize decomposition and dequantization operations handling
|
||||||
|
This step has the most transformations. These transformations can be separated in two groups: decomposition transformation and dequantization operations handling. There are more details in developer guide [Main transformations](@ref openvino_docs_IE_DG_lpt_step3_main). Transformations:
|
||||||
|
* [AddTransformation](@ref openvino_docs_IE_DG_lpt_AddTransformation)
|
||||||
|
* [AvgPoolTransformation](@ref openvino_docs_IE_DG_lpt_AvgPoolTransformation)
|
||||||
|
* [ClampTransformation](@ref openvino_docs_IE_DG_lpt_AvgPoolTransformation)
|
||||||
|
* [ConcatTransformation](@ref openvino_docs_IE_DG_lpt_ConcatTransformation)
|
||||||
|
* [ConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_ConvolutionTransformation)
|
||||||
|
* [ConvolutionBackpropDataTransformation](@ref openvino_docs_IE_DG_lpt_ConvolutionBackpropDataTransformation)
|
||||||
|
* [DepthToSpaceTransformation](@ref openvino_docs_IE_DG_lpt_DepthToSpaceTransformation)
|
||||||
|
* [FakeQuantizeDecompositionTransformation](@ref openvino_docs_IE_DG_lpt_FakeQuantizeDecompositionTransformation)
|
||||||
|
* [FakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FakeQuantizeTransformation)
|
||||||
|
* [InterpolateTransformation](@ref openvino_docs_IE_DG_lpt_InterpolateTransformation)
|
||||||
|
* [GroupConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_GroupConvolutionTransformation)
|
||||||
|
* [MatMulTransformation](@ref openvino_docs_IE_DG_lpt_MatMulTransformation)
|
||||||
|
* [MaxPoolTransformation](@ref openvino_docs_IE_DG_lpt_MaxPoolTransformation)
|
||||||
|
* [MultiplyTransformation](@ref openvino_docs_IE_DG_lpt_MultiplyTransformation)
|
||||||
|
* [MVNTransformation](@ref openvino_docs_IE_DG_lpt_MVNTransformation)
|
||||||
|
* [NormalizeL2Transformation](@ref openvino_docs_IE_DG_lpt_NormalizeL2Transformation)
|
||||||
|
* [PReluTransformation](@ref openvino_docs_IE_DG_lpt_PReluTransformation)
|
||||||
|
* [ReduceMaxTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMaxTransformation)
|
||||||
|
* [ReduceMeanTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMeanTransformation)
|
||||||
|
* [ReduceMinTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMinTransformation)
|
||||||
|
* [ReduceSumTransformation](@ref openvino_docs_IE_DG_lpt_ReduceSumTransformation)
|
||||||
|
* [ReluTransformation](@ref openvino_docs_IE_DG_lpt_ReluTransformation)
|
||||||
|
* [ReshapeTransformation](@ref openvino_docs_IE_DG_lpt_ReshapeTransformation)
|
||||||
|
* [SqueezeTransformation](@ref openvino_docs_IE_DG_lpt_SqueezeTransformation)
|
||||||
|
* [ShuffleChannelsTransformation](@ref openvino_docs_IE_DG_lpt_ShuffleChannelsTransformation)
|
||||||
|
* [SplitTransformation](@ref openvino_docs_IE_DG_lpt_SplitTransformation)
|
||||||
|
* [StridedSliceTransformation](@ref openvino_docs_IE_DG_lpt_StridedSliceTransformation)
|
||||||
|
* [TransposeTransformation](@ref openvino_docs_IE_DG_lpt_TransposeTransformation)
|
||||||
|
* [UnsqueezeTransformation](@ref openvino_docs_IE_DG_lpt_UnsqueezeTransformation)
|
||||||
|
* [VariadicSplitTransformation](@ref openvino_docs_IE_DG_lpt_VariadicSplitTransformation)
|
||||||
|
|
||||||
|
#### Decomposition transformations
|
||||||
|
Decomposition transformations decompose the `FakeQuantize` operation to: quantize (`FakeQuantize` with low precision output) and dequantization operations (opposite to quantize, with low precision input and the original precision output). For dequantization operations LPT uses three operations: `Convert`, `Subtract` and `Multiply`. Element-wise operations `Subtract` and `Multiply` have constants on the second branches. If dequantization operations are not handled at the end of LPT pipeline, then they will be fused back to the `FakeQuantize`.
|
||||||
|
|
||||||
|
|
||||||
|
Original `FakeQuantize`:
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
`FakeQuantize` after decomposition to quantization and dequantization operations:
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
#### Dequantization operations handling transformations
|
||||||
|
|
||||||
|
In this step, LPT transformations fuse dequantization operations or move them through existing model operations as much as possible.
|
||||||
|
|
||||||
|
Original `Convolution` operation in FP32 with dequantization operations before:
|
||||||
|

|
||||||
|
|
||||||
|
`Convolution` operation in INT8 after decomposition and dequantization operations handling:
|
||||||
|

|
||||||
|
|
||||||
|
### Step 4: Cleanup of the result model
|
||||||
|
LPT cleanup transformations is final stage in LPT pipeline. In this step LPT transformations clean up the result model to avoid not handled dequantization operations: fuse dequantization operations if possible (fuse at least `Convert` operations if not) to other model operations to cleanup result model. Transformations:
|
||||||
|
* [FoldConvertTransformation](@ref openvino_docs_IE_DG_lpt_FoldConvertTransformation)
|
||||||
|
* [FoldFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FoldFakeQuantizeTransformation)
|
||||||
|
* [FuseConvertTransformation](@ref openvino_docs_IE_DG_lpt_FuseConvertTransformation)
|
||||||
|
* [FuseMultiplyToFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FuseMultiplyToFakeQuantizeTransformation)
|
||||||
|
* [FuseSubtractToFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FuseSubtractToFakeQuantizeTransformation)
|
||||||
|
* [MultiplyToGroupConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_MultiplyToGroupConvolutionTransformation)
|
||||||
|
|
||||||
|
There are more details in developer guide [Cleanup transformations](@ref openvino_docs_IE_DG_lpt_step4_cleanup).
|
||||||
|
|
||||||
|
`FakeQuantize` operation with not handled dequantization operations:
|
||||||
|

|
||||||
|
|
||||||
|
`FakeQuantize` operation with fused dequantization operations:
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Low precision transformations in plugin transformation pipeline
|
||||||
|
Typical transformation pipeline described below.
|
||||||
|
|
||||||
|
### Step 1. Common optimizations
|
||||||
|
This step is optional for LPT but typically is presented in OpenVINO™ plugins. The step doesn't use any LPT transformation. Firstly, the step disables dequantization operations constant folding on constant subgraph on weights to prevent the lost of dequantization info on the next plugin transformations. After that, it optimizes nGraph function and convert operations to operation set 1. Typically, usage of this step is the simplest way to meet LPT requirements for the input quantized model. If plugin can guarantee that LPT input requirements are met, then this step can be skipped.
|
||||||
|
|
||||||
|
@snippet snippets/lpt_mkldnn_plugin.cpp lpt_common
|
||||||
|
|
||||||
|
### Step 2. Low precision transformations execution
|
||||||
|
This step is mandatory. It configures and runs LPT transformations.
|
||||||
|
|
||||||
|
@snippet snippets/lpt_mkldnn_plugin.cpp lpt_execution
|
||||||
|
|
||||||
|
### Step 3. Plugin-specific transformations
|
||||||
|
This step is optional. It modifies the nGraph function to a device-specific operation set.
|
||||||
|
|
||||||
|
@snippet snippets/lpt_mkldnn_plugin.cpp lpt_device
|
||||||
|
|
||||||
|
## Result model overview
|
||||||
|
|
||||||
|
Let's explore quantized [TensorFlow* implementation of ResNet-50](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) model. Use [Model Downloader](@ref omz_tools_downloader) tool to download the `fp16` model from [OpenVINO™ Toolkit - Open Model Zoo repository](https://github.com/openvinotoolkit/open_model_zoo):
|
||||||
|
```sh
|
||||||
|
./downloader.py --name resnet-50-tf --precisions FP16-INT8
|
||||||
|
```
|
||||||
|
After that you should quantize model by the [Model Quantizer](@ref omz_tools_downloader) tool.
|
||||||
|
```sh
|
||||||
|
./quantizer.py --model_dir public/resnet-50-tf --dataset_dir <DATASET_DIR> --precisions=FP16-INT8
|
||||||
|
```
|
||||||
|
|
||||||
|
### Inference
|
||||||
|
|
||||||
|
The simplest way to infer the model and collect performance counters is [Benchmark Application](../../../../samples/cpp/benchmark_app/README.md).
|
||||||
|
```sh
|
||||||
|
./benchmark_app -m resnet-50-tf.xml -d CPU -niter 1 -api sync -report_type average_counters -report_folder pc_report_dir
|
||||||
|
```
|
||||||
|
If you infer the model with the OpenVINO™ CPU plugin and collect performance counters, all operations (except last not quantized SoftMax) are executed in INT8 precision.
|
||||||
|
|
||||||
|
### Results analysis
|
||||||
|
|
||||||
|
Result model depends on different factors:
|
||||||
|
* The original model quantization possibility and quantization quality. For some models, some operations are not possible to be quantized by POT and NNCF tools. In this case `FakeQuantize` operations are absent before these operations and they will be inferred in original precision.
|
||||||
|
* LPT customization and plugin supported operations. If plugin doesn't support INT8 inference for some operation then corresponding LPT transformation should be disabled and the operation will be inferred in original precision.
|
||||||
|
|
||||||
|
|
||||||
|
Information about layer precision is stored in the performance counters that are
|
||||||
|
available from the Inference Engine API. For example, the part of performance counters table for quantized [TensorFlow* implementation of ResNet-50](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) model inference on CPU Plugin looks as follows:
|
||||||
|
|
||||||
|
|
||||||
|
| layerName | execStatus | layerType | execType | realTime (ms) | cpuTime (ms) |
|
||||||
|
| --------------------------------------------------------- | ---------- | ------------ | -------------------- | ------------- | ------------ |
|
||||||
|
| resnet\_model/batch\_normalization\_15/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_1x1\_I8 | 0.377 | 0.377 |
|
||||||
|
| resnet\_model/conv2d\_16/Conv2D/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
|
||||||
|
| resnet\_model/batch\_normalization\_16/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_I8 | 0.499 | 0.499 |
|
||||||
|
| resnet\_model/conv2d\_17/Conv2D/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
|
||||||
|
| resnet\_model/batch\_normalization\_17/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_1x1\_I8 | 0.399 | 0.399 |
|
||||||
|
| resnet\_model/add\_4/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
|
||||||
|
| resnet\_model/add\_4 | NOT\_RUN | Eltwise | undef | 0 | 0 |
|
||||||
|
| resnet\_model/add\_5/fq\_input\_1 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
|
||||||
|
|
||||||
|
|
||||||
|
> The `execStatus` column of the table includes possible values:
|
||||||
|
> - `EXECUTED` - layer was executed by standalone primitive,
|
||||||
|
> - `NOT_RUN` - layer was not executed by standalone primitive or was fused with another operation and executed in another layer primitive.
|
||||||
|
>
|
||||||
|
> The `execType` column of the table includes inference primitives with specific suffixes. The layers have the following marks:
|
||||||
|
> * Suffix `I8` for layers that had 8-bit data type input and were computed in 8-bit precision
|
||||||
|
> * Suffix `FP32` for layers computed in 32-bit precision
|
||||||
|
|
||||||
|
As result all operations (except not quantized `SoftMax` at the end of the model) in OpenVINO™ CPU plugin are inferred in low precision. Note, please, in the result model there are `FakeQuantize` operations in FP32 but the plugin responsibility is fuse these operations with previous operations. OpenVINO™ CPU plugin achieves maximum optimized inference for all operations by fusing INT8 `Convolution` with FP32 output with `FakeQuantize` operation with FP32 input and INT8 output. In this case OpenVINO™ CPU plugin uses INT8 and FP32 vectorized instructions but reports about one INT8 kernel usage for inference, which is the most optimized for this case.
|
||||||
|
|
||||||
|
## Mixed precision
|
||||||
|
If LPT input model operation output has `fp16` precision then dequantization computations still occurs in `fp32` precision. This approach is used to avoid accuracy loss in `fp16` arithmetic computations. Note, the latest dequantization operation output has `fp16` precision.
|
||||||
|
|
||||||
|
## Customization
|
||||||
|
Low Precision Transformations can be customizable. Build-in customization options:
|
||||||
|
* operation precision restrictions,
|
||||||
|
* operation per tensor quantization restrictions,
|
||||||
|
* update precisions,
|
||||||
|
* dequantization precision.
|
||||||
|
|
||||||
|
|
||||||
|
### Operation precision restrictions
|
||||||
|
This option defines precisions which allowed for the operation input ports. The option value is passed as input argument for `LowPrecision` constructor. For example:
|
||||||
|
|
||||||
|
@snippet snippets/lpt_mkldnn_plugin.cpp lpt_supported_precisions
|
||||||
|
|
||||||
|
In provided example in result model `Convolution` operation inputs must have specific precisions: `u8` (unsigned int8) precision on input 0 (on activations) and `i8` (signed int8) precision on input 1 (on weights).
|
||||||
|
|
||||||
|
### Operation per tensor quantization restrictions
|
||||||
|
This option defines if operation supports per-tensor quantization only. The option value is passed as input argument for `LowPrecision` constructor. For example:
|
||||||
|
|
||||||
|
@snippet snippets/lpt_mkldnn_plugin.cpp per_tensor_quantization
|
||||||
|
|
||||||
|
In provided example in result model `Convolution` operations must have per-tensor quantization on input 0 (on activations).
|
||||||
|
|
||||||
|
### Update precisions
|
||||||
|
This option defines if each LPT transformation updates precision or not. The option value is boolean and is passed as `updatePrecisions` member of `LayerTransformation::Params` which is input argument for `LowPrecision` constructor. All transformations are affected. If `true` then low precision transformations update precisions to low precision and doesn't if `false`. Typically this option is used for plugin debugging.
|
||||||
|
|
||||||
|
### Typical customization use cases
|
||||||
|
|
||||||
|
Plugin specific customization can be implemented via nGraph transformation callbacks. For example: asymmetric quantization support can be easily customizable via `LayerTransformation::isAsymmetricQuantization` and `WeightableLayerTransformation::isAsymmetricOnWeights` methods usage in callbacks. For example:
|
||||||
|
|
||||||
|
@snippet snippets/lpt_mkldnn_plugin.cpp asymmetric_quantization
|
@ -0,0 +1,56 @@
|
|||||||
|
# Attributes {#openvino_docs_IE_DG_lpt_attributes}
|
||||||
|
|
||||||
|
@sphinxdirective
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
:caption: Attributes
|
||||||
|
:hidden:
|
||||||
|
|
||||||
|
AvgPoolPrecisionPreserved <openvino_docs_IE_DG_lpt_AvgPoolPrecisionPreserved>
|
||||||
|
IntervalsAlignment <openvino_docs_IE_DG_lpt_IntervalsAlignment>
|
||||||
|
PerTensorQuantization <openvino_docs_IE_DG_lpt_PerTensorQuantization>
|
||||||
|
PrecisionPreserved <openvino_docs_IE_DG_lpt_PrecisionPreserved>
|
||||||
|
Precisions <openvino_docs_IE_DG_lpt_Precisions>
|
||||||
|
QuantizationAlignment <openvino_docs_IE_DG_lpt_QuantizationAlignment>
|
||||||
|
|
||||||
|
@endsphinxdirective
|
||||||
|
|
||||||
|
## Introduction
|
||||||
|
|
||||||
|
| Name | Target | Required | Mutable |
|
||||||
|
|-------------------------------------------------------------------------------------|------------------------|----------|---------|
|
||||||
|
| [AvgPoolPrecisionPreserved](@ref openvino_docs_IE_DG_lpt_AvgPoolPrecisionPreserved) | Precision | No | Yes |
|
||||||
|
| [IntervalsAlignment](@ref openvino_docs_IE_DG_lpt_IntervalsAlignment) | Quantization interval | Yes | Yes |
|
||||||
|
| [PerTensorQuantization](@ref openvino_docs_IE_DG_lpt_PerTensorQuantization) | Precision | Yes | No |
|
||||||
|
| [PrecisionPreserved](@ref openvino_docs_IE_DG_lpt_PrecisionPreserved) | Precision | Yes | Yes |
|
||||||
|
| [Precisions](@ref openvino_docs_IE_DG_lpt_Precisions) | Precision | Yes | Yes |
|
||||||
|
| [QuantizationAlignment](@ref openvino_docs_IE_DG_lpt_QuantizationAlignment) | Quantization alignment | Yes | Yes |
|
||||||
|
|
||||||
|
> `Target` attribute group defines attribute usage during model transformation for the best performance:
|
||||||
|
> - `Precision` - the attribute defines the most optimal output port precision.
|
||||||
|
> - `Quantization interval` - the attribute defines quantization interval.
|
||||||
|
> - `Quantization alignment` - the attribute defines quantization alignment: per-channel or per-tensor quantization.
|
||||||
|
>
|
||||||
|
> `Required` attribute group defines if attribute usage is required to get an optimal model during transformation:
|
||||||
|
> - `Yes` - the attribute is used by all OpenVINO plugins for low-precision optimization.
|
||||||
|
> - `No` - the attribute is used in a specific OpenVINO plugin.
|
||||||
|
>
|
||||||
|
> `Mutable` attribute group defines if transformation can update an existing attribute:
|
||||||
|
> - `Yes` - the attribute can be updated by the next transformations in the pipeline. But attribute update order is still important.
|
||||||
|
> - `No` - existing attribute can not be updated by the next transformation. Previous handled transformation has optimized a model according to the current value.
|
||||||
|
|
||||||
|
`FakeQuantize` decomposition is a mandatory part of low precision transformations. Attributes used during decomposition are mandatory. Optional attributes are required only for certain operations.
|
||||||
|
|
||||||
|
Attributes usage by transformations:
|
||||||
|
|
||||||
|
| Attribute name | Created by transformations | Used by transformations |
|
||||||
|
|---------------------------|---------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| PrecisionPreserved | MarkupPrecisions, MarkupAvgPoolPrecisionPreserved | AlignQuantizationIntervals, AlignQuantizationParameters, FakeQuantizeDecompositionTransformation, MarkupAvgPoolPrecisionPreserved |
|
||||||
|
| AvgPoolPrecisionPreserved | MarkupAvgPoolPrecisionPreserved | |
|
||||||
|
| Precisions | MarkupCanBeQuantized, MarkupPrecisions | FakeQuantizeDecompositionTransformation |
|
||||||
|
| PerTensorQuantization | MarkupPerTensorQuantization | |
|
||||||
|
| IntervalsAlignment | AlignQuantizationIntervals | FakeQuantizeDecompositionTransformation |
|
||||||
|
| QuantizationAlignment | AlignQuantizationParameters | FakeQuantizeDecompositionTransformation |
|
||||||
|
|
||||||
|
> **Note:** the same type of attribute instances can be created in different transformations. This approach is the result of the transformation single-responsibility principle. For example, `Precision` attribute instances are created in `MarkupCanBeQuantized` and `MarkupPrecisions` transformations, but the reasons for their creation are different.
|
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:3a79d152dae50fd3afaa78d8e18de7d279bb1c79b3e4d5c68fffed52a7c51b18
|
||||||
|
size 383875
|
After Width: | Height: | Size: 61 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d54234622f538249dd5ccb5156cc10dd9b5bb40e800f6d1d906a0ff44ecabcf4
|
||||||
|
size 388893
|
After Width: | Height: | Size: 62 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:3132bad01388adf7f788592538194bceb6b94f76f1c3788ffb73b76b19a74990
|
||||||
|
size 393300
|
After Width: | Height: | Size: 62 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:4f5a98e0ae8dc1f21dd0458ad9ed61de68b134e1128279c3e8b4e700ff3648f8
|
||||||
|
size 398967
|
After Width: | Height: | Size: 64 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:2618a80fd1be4d25dfc1f7e57e046a7844c9933a6fed316a0660c3051325557e
|
||||||
|
size 474998
|
After Width: | Height: | Size: 67 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:3b7750b3424540912ec590aa5b56cba9e4f2f9db6d45c23aed1d78d094321230
|
||||||
|
size 488940
|
After Width: | Height: | Size: 78 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:7836c25a0db5a5f08adf5539fb5ee29f52bc7923148dc42f4c78d3354b7b8464
|
||||||
|
size 520539
|
After Width: | Height: | Size: 77 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:911d9730e6762a9919fe3a48f0c87a44a5aeac97468f2d28c5174c13c69ad74b
|
||||||
|
size 351583
|
After Width: | Height: | Size: 58 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:06caa4dc97b00f150395abc230bc90822f3bfa4e0bb3b65019f111a5a40e1d1c
|
||||||
|
size 520155
|
After Width: | Height: | Size: 77 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:8f19d8f068afa4aa62fc04cfa0d2678e6bfe3f90c164a08f588bff9685854030
|
||||||
|
size 661189
|
After Width: | Height: | Size: 95 KiB |
@ -0,0 +1,6 @@
|
|||||||
|
# Step 1. Prerequisites Transformations {#openvino_docs_IE_DG_lpt_step1_prerequisites}
|
||||||
|
|
||||||
|
Prerequisites transformations are optional. The transformations prepare a model before running other low precision transformations. The transformations do not operate with dequantization operations or update precisions. Prerequisites transformations include:
|
||||||
|
* [PullReshapeThroughDequantization](@ref openvino_docs_IE_DG_lpt_PullReshapeThroughDequantization)
|
||||||
|
* [PullTransposeThroughDequantization](@ref openvino_docs_IE_DG_lpt_PullTransposeThroughDequantization)
|
||||||
|
* [LinOpSequenceFusion](@ref openvino_docs_IE_DG_lpt_LinOpSequenceFusion)
|
@ -0,0 +1,140 @@
|
|||||||
|
# Step 2. Markup Transformations {#openvino_docs_IE_DG_lpt_step2_markup}
|
||||||
|
|
||||||
|
This step defines the optimal `FakeQuantize` decomposition precisions for the best inference performance via operations markup with runtime attribute instances. Attributes are created for input and output ports and operations. Transformations do not change the operation output port precisions. A model markup low precision logic is decomposed and implemented into the following common markup transformations. The order of transformations is important:
|
||||||
|
|
||||||
|
1. [MarkupCanBeQuantized](@ref openvino_docs_IE_DG_lpt_MarkupCanBeQuantized)
|
||||||
|
2. [MarkupPrecisions](@ref openvino_docs_IE_DG_lpt_MarkupPrecisions)
|
||||||
|
3. [MarkupPerTensorQuantization](@ref openvino_docs_IE_DG_lpt_MarkupPerTensorQuantization)
|
||||||
|
4. [MarkupAvgPoolPrecisionPreserved](@ref openvino_docs_IE_DG_lpt_MarkupAvgPoolPrecisionPreserved)
|
||||||
|
5. [PropagatePrecisions](@ref openvino_docs_IE_DG_lpt_PropagatePrecisions)
|
||||||
|
6. [AlignQuantizationIntervals](@ref openvino_docs_IE_DG_lpt_AlignQuantizationIntervals)
|
||||||
|
7. [AlignQuantizationParameters](@ref openvino_docs_IE_DG_lpt_AlignQuantizationParameters)
|
||||||
|
|
||||||
|
The table of transformations and used attributes:
|
||||||
|
|
||||||
|
| Transformation name | Create attributes | Use attributes |
|
||||||
|
|---------------------------------|-------------------------------|-------------------------------------------|
|
||||||
|
| MarkupCanBeQuantized | Precisions | |
|
||||||
|
| MarkupPrecisions | Precisions,PrecisionPreserved | |
|
||||||
|
| MarkupPerTensorQuantization | PerTensorQuantization | |
|
||||||
|
| MarkupAvgPoolPrecisionPreserved | AvgPoolPrecisionPreserved | Precisions, PrecisionPreserved |
|
||||||
|
| PropagatePrecisions | Precisions | Precisions, PrecisionPreserved |
|
||||||
|
| AlignQuantizationIntervals | IntervalsAlignment | PrecisionPreserved |
|
||||||
|
| AlignQuantizationParameters | QuantizationAlignment | PrecisionPreserved, PerTensorQuantization |
|
||||||
|
|
||||||
|
> **Note:** the same type of attribute instances can be created in different transformations. This approach is the result of the transformation single-responsibility principle. For example, `Precision` attribute instances are created in `MarkupCanBeQuantized` and `MarkupPrecisions` transformations, but the reasons for their creation are different
|
||||||
|
|
||||||
|
Common markup transformations can be decomposed into simpler utility markup transformations. The order of Markup utility transformations is not important:
|
||||||
|
* [CreateAttribute](@ref openvino_docs_IE_DG_lpt_CreateAttribute)
|
||||||
|
* [CreatePrecisionsDependentAttribute](@ref openvino_docs_IE_DG_lpt_CreatePrecisionsDependentAttribute)
|
||||||
|
* [PropagateThroughPrecisionPreserved](@ref openvino_docs_IE_DG_lpt_PropagateThroughPrecisionPreserved)
|
||||||
|
* [PropagateToInput](@ref openvino_docs_IE_DG_lpt_PropagateToInput)
|
||||||
|
* [UpdateSharedPrecisionPreserved](@ref openvino_docs_IE_DG_lpt_UpdateSharedPrecisionPreserved)
|
||||||
|
|
||||||
|
Let's explore all transformations and their relations in detail, using one and the same model:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
The original model key features:
|
||||||
|
* The first `concat1` concatenation operation has not quantized `convolution1` consumer.
|
||||||
|
* The second `concat2` concatenation operation has quantized `convolution2` consumer with requirements:
|
||||||
|
- support `unsigned int8` on activations,
|
||||||
|
- per-tensor quantization.
|
||||||
|
* Between the `concat2` concatenation operation and `Convolution` there is an `AvgPool` operation, which mathematically should return an `f32` tensor. But the `MarkupAvgPoolPrecisionPreserved` transformation is active. This allows the low precision transformation, that goes after the `AvgPool`, to propagate low precision tensor to the next consumer.
|
||||||
|
|
||||||
|
Transformations are run with the following parameters:
|
||||||
|
|
||||||
|
@snippet snippets/lpt_mkldnn_plugin.cpp lpt_markup_pipeline
|
||||||
|
|
||||||
|
## 1. MarkupCanBeQuantized
|
||||||
|
The transformation marks operations that cannot be quantized. No attributes are required before the transformation.
|
||||||
|
|
||||||
|
Changes in the example model after `MarkupCanBeQuantized` transformation:
|
||||||
|
* Not quantized `convolution1` operation is marked by the `Precisions` attribute with empty values. This attribute allows the next transformation to ignore not quantized operation.
|
||||||
|
|
||||||
|
Result model:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Model display features (here and below):
|
||||||
|
* The attributes added by the current transformation are marked in bold.
|
||||||
|
* If attributes do not fit into one line, then one line consists of only one attribute.
|
||||||
|
|
||||||
|
## 2. MarkupPrecisions
|
||||||
|
The transformation is required and includes two tasks:
|
||||||
|
1. Mark operation input ports (create `Precision` attribute instance) by provided restrictions: input port index and required precisions. Restrictions are provided as input argument in `ngraph::pass::low_precision::LowPrecision` constructor.
|
||||||
|
2. Mark precision preserved operations.
|
||||||
|
|
||||||
|
No attributes are required before the transformation. Changes in the example model after `MarkupPrecisions` transformation:
|
||||||
|
* Both concatenation operations are marked as precision preserved operations. It allows to propagate precision via these operations.
|
||||||
|
* Quantized `convolution2` operation is marked by the `Precisions` attribute with `u8` precision on activations and `i8` precisions on weights according to the provided restrictions. This attribute instance allows to specify which precisions are required for quantized `Convolution` operation.
|
||||||
|
|
||||||
|
Result model:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## 3. MarkupPerTensorQuantization
|
||||||
|
The transformation is required and marks operations (create `PerTensorQuantization` attribute instance) by provided restrictions: an operation that requires per-tensor quantization. No attributes are required before the transformation.
|
||||||
|
|
||||||
|
Changes in the example model after `MarkupPerTensorQuantization` transformation:
|
||||||
|
* both `Convolution` operations are marked by `PerTensorQuantization`
|
||||||
|
|
||||||
|
Result model:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## 4. MarkupAvgPoolPrecisionPreserved
|
||||||
|
The transformation is optional. `MarkupAvgPoolPrecisionPreserved` marks `AvgPool` operations as precision preserved or not precision preserved. `AvgPool` operation is precision preserved if next not precision preserved operation can be inferred in low precision. In other words, `AvgPool` operations become precision preserved operations to speed up model inference. The transformation uses `PrecisionPreserved` attributes created before. The transformation is combined and uses:
|
||||||
|
* CreatePrecisionsDependentAttribute
|
||||||
|
* PropagateThroughPrecisionPreserved
|
||||||
|
* UpdateSharedPrecisionPreserved
|
||||||
|
|
||||||
|
Changes in the example model after `MarkupAvgPoolPrecisionPreserved` transformation:
|
||||||
|
* `AvgPool` operations are marked by `PrecisionPreserved` and `AvgPoolPrecisionPreserved` (not used below).
|
||||||
|
|
||||||
|
Result model:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## 5. PropagatePrecisions
|
||||||
|
The transformation is required. `PropagatePrecision` is a key transformation in the markup pipeline, which marks `FakeQuantize` output port precisions. The transformation uses `PrecisionPreserved` attribute instances created before. The transformation is combined and uses:
|
||||||
|
|
||||||
|
* CreateAttribute
|
||||||
|
* PropagateThroughPrecisionPreserved
|
||||||
|
* PropagateToInput
|
||||||
|
|
||||||
|
Changes in the example model after `PropagatePrecisions` transformation:
|
||||||
|
* All precision preserved operations are marked by the `Precisions` attribute instance, which defines the required precision for the operation.
|
||||||
|
* `FakeQuantize` operation output ports are marked by `Precisions` attribute instances, which define target precision for decomposition. In the sample model, `FakeQuantize` operations have signed intervals, but the `Precisions` attributes are initialized by `u8` (`unsigned int8`) values as the result applied during transformations restrictions for `Convolution` operations.
|
||||||
|
|
||||||
|
Result model:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
> **NOTE**: `AlignQuantizationIntervals` and `AlignQuantizationParameters` transformations are required if the model has quantized concatenation operations.
|
||||||
|
|
||||||
|
## 6. AlignQuantizationIntervals
|
||||||
|
The transformation is required for models with the quantized operation. The transformation marks `FakeQuantize` operation and precision preserved consumers to combine quantization information from different `FakeQuantize` operations for future quantization intervals alignment. The transformation is combined and uses:
|
||||||
|
* CreateAttribute
|
||||||
|
* PropagateThroughPrecisionPreserved
|
||||||
|
|
||||||
|
Changes in the example model after `AlignQuantizationIntervals` transformation:
|
||||||
|
* All `FakeQuantize` operations and their precision preserved consumers are marked by the `IntervalsAlignment` attribute instance.
|
||||||
|
|
||||||
|
Result model:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## 7. AlignQuantizationParameters
|
||||||
|
The transformation is required for models with quantized concatenation operation. The transformation marks `FakeQuantize` precision preserved consumers to align quantization intervals. The transformation is combined and uses:
|
||||||
|
* CreateAttribute
|
||||||
|
* PropagateThroughPrecisionPreserved
|
||||||
|
* UpdateSharedPrecisionPreserved
|
||||||
|
|
||||||
|
|
||||||
|
Changes in the example model after `AlignQuantizationParameters` transformation:
|
||||||
|
* All `FakeQuantize` precision preserved consumers are marked by `QuantizationAlignment` attribute instance. `convolution1` input ports are marked by `Precisions` attribute instances with empty precisions collection. As a result, the `convolution1` operation was detected as not quantized, and the `QuantizationAlignment` attribute default value `false` does not change. `convolution2` input ports are marked by `Precisions` attribute instances with not empty precisions collection. `convolution2` operation was detected as quantized with the `PerTensorQuantization` attribute, and the `QuantizationAlignment` attribute default value changed to `true`.
|
||||||
|
|
||||||
|
Final model:
|
||||||
|
|
||||||
|

|
@ -0,0 +1,49 @@
|
|||||||
|
# Step 3. Main Transformations {#openvino_docs_IE_DG_lpt_step3_main}
|
||||||
|
|
||||||
|
Main transformations are the majority of low precision transformations. Transformations operate with dequantization operations. Main transformations include:
|
||||||
|
* [AddTransformation](@ref openvino_docs_IE_DG_lpt_AddTransformation)
|
||||||
|
* [AvgPoolTransformation](@ref openvino_docs_IE_DG_lpt_AvgPoolTransformation)
|
||||||
|
* [ClampTransformation](@ref openvino_docs_IE_DG_lpt_AvgPoolTransformation)
|
||||||
|
* [ConcatTransformation](@ref openvino_docs_IE_DG_lpt_ConcatTransformation)
|
||||||
|
* [ConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_ConvolutionTransformation)
|
||||||
|
* [ConvolutionBackpropDataTransformation](@ref openvino_docs_IE_DG_lpt_ConvolutionBackpropDataTransformation)
|
||||||
|
* [DepthToSpaceTransformation](@ref openvino_docs_IE_DG_lpt_DepthToSpaceTransformation)
|
||||||
|
* [FakeQuantizeDecompositionTransformation](@ref openvino_docs_IE_DG_lpt_FakeQuantizeDecompositionTransformation)
|
||||||
|
* [FakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FakeQuantizeTransformation)
|
||||||
|
* [InterpolateTransformation](@ref openvino_docs_IE_DG_lpt_InterpolateTransformation)
|
||||||
|
* [GroupConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_GroupConvolutionTransformation)
|
||||||
|
* [MatMulTransformation](@ref openvino_docs_IE_DG_lpt_MatMulTransformation)
|
||||||
|
* [MaxPoolTransformation](@ref openvino_docs_IE_DG_lpt_MaxPoolTransformation)
|
||||||
|
* [MultiplyTransformation](@ref openvino_docs_IE_DG_lpt_MultiplyTransformation)
|
||||||
|
* [MVNTransformation](@ref openvino_docs_IE_DG_lpt_MVNTransformation)
|
||||||
|
* [NormalizeL2Transformation](@ref openvino_docs_IE_DG_lpt_NormalizeL2Transformation)
|
||||||
|
* [PReluTransformation](@ref openvino_docs_IE_DG_lpt_PReluTransformation)
|
||||||
|
* [ReduceMaxTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMaxTransformation)
|
||||||
|
* [ReduceMeanTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMeanTransformation)
|
||||||
|
* [ReduceMinTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMinTransformation)
|
||||||
|
* [ReduceSumTransformation](@ref openvino_docs_IE_DG_lpt_ReduceSumTransformation)
|
||||||
|
* [ReluTransformation](@ref openvino_docs_IE_DG_lpt_ReluTransformation)
|
||||||
|
* [ReshapeTransformation](@ref openvino_docs_IE_DG_lpt_ReshapeTransformation)
|
||||||
|
* [SqueezeTransformation](@ref openvino_docs_IE_DG_lpt_SqueezeTransformation)
|
||||||
|
* [ShuffleChannelsTransformation](@ref openvino_docs_IE_DG_lpt_ShuffleChannelsTransformation)
|
||||||
|
* [SplitTransformation](@ref openvino_docs_IE_DG_lpt_SplitTransformation)
|
||||||
|
* [StridedSliceTransformation](@ref openvino_docs_IE_DG_lpt_StridedSliceTransformation)
|
||||||
|
* [TransposeTransformation](@ref openvino_docs_IE_DG_lpt_TransposeTransformation)
|
||||||
|
* [UnsqueezeTransformation](@ref openvino_docs_IE_DG_lpt_UnsqueezeTransformation)
|
||||||
|
* [VariadicSplitTransformation](@ref openvino_docs_IE_DG_lpt_VariadicSplitTransformation)
|
||||||
|
|
||||||
|
Let's explore some main transformations on the example model. Original model:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Result model after main transformations:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Changes in the example model after main transformation:
|
||||||
|
* All `FakeQuantize` operations (`fakeQuantize1`, `fakeQuantize2` and `fakeQuantize3`) were decomposed:
|
||||||
|
- original `FakeQuantize` operations were replaced with new operations with other output intervals and output port precision,
|
||||||
|
- dequantization operations.
|
||||||
|
* Dequantization operations were moved via precision preserved (`concat1` and `concat2`) and quantized (`convolution2`) operations.
|
||||||
|
|
||||||
|
> **Note:** the left branch (branch #1) does not require per-tensor quantization. As a result, the `fakeQuantize1`output interval is [0, 255]. But quantized `convolution2` requires per-tensor quantization on the right branch (branch #2). Then all connected `FakeQuantize` interval operations (`fakeQuantize1` and `fakeQuantize2`) are aligned to have per-tensor quantization after the concatenation (`concat2`) operation.
|
@ -0,0 +1,8 @@
|
|||||||
|
# Step 4. Cleanup Transformations {#openvino_docs_IE_DG_lpt_step4_cleanup}
|
||||||
|
|
||||||
|
* [FoldConvertTransformation](@ref openvino_docs_IE_DG_lpt_FoldConvertTransformation)
|
||||||
|
* [FoldFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FoldFakeQuantizeTransformation)
|
||||||
|
* [FuseConvertTransformation](@ref openvino_docs_IE_DG_lpt_FuseConvertTransformation)
|
||||||
|
* [FuseMultiplyToFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FuseMultiplyToFakeQuantizeTransformation)
|
||||||
|
* [FuseSubtractToFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FuseSubtractToFakeQuantizeTransformation)
|
||||||
|
* [MultiplyToGroupConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_MultiplyToGroupConvolutionTransformation)
|
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:288dec05908449cc3fa5e07700fac5cbdff17bb4b4035a4ee83c44cbc6c22c70
|
||||||
|
size 59664
|
After Width: | Height: | Size: 15 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:8e345c0b2b5fe365ed298d40d3add4b06a8106096186f68dccb5131c01194e72
|
||||||
|
size 102546
|
After Width: | Height: | Size: 23 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
# ConvertSubtractConstant transformation {#openvino_docs_IE_DG_lpt_ConvertSubtractConstant}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::ConvertSubtractConstant class represents the `ConvertSubtractConstant` transformation.
|
@ -0,0 +1,5 @@
|
|||||||
|
# LinOpSequenceFusion transformation {#openvino_docs_IE_DG_lpt_LinOpSequenceFusion}
|
||||||
|
|
||||||
|
ngraph::pass::LinOpSequenceFusion class represents the `LinOpSequenceFusion` transformation.
|
||||||
|
|
||||||
|
`LinOpSequenceFusion` is common nGraph transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# PullReshapeThroughDequantization transformation {#openvino_docs_IE_DG_lpt_PullReshapeThroughDequantization}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::PullReshapeThroughDequantization class represents the `PullReshapeThroughDequantization` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# PullTransposeThroughDequantization transformation {#openvino_docs_IE_DG_lpt_PullTransposeThroughDequantization}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::PullTransposeThroughDequantization class represents the `PullTransposeThroughDequantization` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# AlignQuantizationIntervals transformation {#openvino_docs_IE_DG_lpt_AlignQuantizationIntervals}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::AlignQuantizationIntervals class represents the `AlignQuantizationIntervals` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# AlignQuantizationParameters transformation {#openvino_docs_IE_DG_lpt_AlignQuantizationParameters}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::AlignQuantizationParameters class represents the `AlignQuantizationParameters` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# CreateAttribute transformation {#openvino_docs_IE_DG_lpt_CreateAttribute}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::CreateAttribute class represents the `CreateAttribute` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# CreatePrecisionsDependentAttribute transformation {#openvino_docs_IE_DG_lpt_CreatePrecisionsDependentAttribute}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::CreatePrecisionsDependentAttribute class represents the `CreatePrecisionsDependentAttribute` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# MarkupAvgPoolPrecisionPreserved transformation {#openvino_docs_IE_DG_lpt_MarkupAvgPoolPrecisionPreserved}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved class represents the `MarkupAvgPoolPrecisionPreserved` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# MarkupCanBeQuantized transformation {#openvino_docs_IE_DG_lpt_MarkupCanBeQuantized}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::MarkupCanBeQuantized class represents the `MarkupCanBeQuantized` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# MarkupPerTensorQuantization transformation {#openvino_docs_IE_DG_lpt_MarkupPerTensorQuantization}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::MarkupPerTensorQuantization class represents the `MarkupPerTensorQuantization` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# MarkupPrecisions transformation {#openvino_docs_IE_DG_lpt_MarkupPrecisions}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::MarkupPrecisions class represents the `MarkupPrecisions` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# PropagatePrecisions transformation {#openvino_docs_IE_DG_lpt_PropagatePrecisions}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::PropagatePrecisions class represents the `PropagatePrecisions` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# PropagateSharedValue transformation {#openvino_docs_IE_DG_lpt_PropagateSharedValue}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::PropagateSharedValue class represents the `PropagateSharedValue` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# PropagateThroughPrecisionPreserved transformation {#openvino_docs_IE_DG_lpt_PropagateThroughPrecisionPreserved}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::PropagateThroughPrecisionPreserved class represents the `PropagateThroughPrecisionPreserved` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# PropagateToInput transformation {#openvino_docs_IE_DG_lpt_PropagateToInput}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::PropagateToInput class represents the `PropagateToInput` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# UpdateSharedPrecisionPreserved transformation {#openvino_docs_IE_DG_lpt_UpdateSharedPrecisionPreserved}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::UpdateSharedPrecisionPreserved class represents the `UpdateSharedPrecisionPreserved` transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# ClampTransformation transformation {#openvino_docs_IE_DG_lpt_ClampTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::ClampTransformation class represents the `Clamp` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# PReluTransformation transformation {#openvino_docs_IE_DG_lpt_PReluTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::PReluTransformation class represents the `PRelu` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# ReluTransformation transformation {#openvino_docs_IE_DG_lpt_ReluTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::ReluTransformation class represents the `Relu` operation transformation.
|
@ -0,0 +1,57 @@
|
|||||||
|
# AddTransformation transformation {#openvino_docs_IE_DG_lpt_AddTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::AddTransformation class represents the `Add` operation transformation.
|
||||||
|
|
||||||
|
The transformation propagates dequantization subtraction from one input branch to another and propagates dequantization multiplication from the same branch through `Add` operation. In transformation result, one `Add` operation input branch is in low precision without dequantization operations (empty branch), another input branch is in original precision with updated dequantization operations (full branch).
|
||||||
|
|
||||||
|
Criteria for selecting an empty branch in order of priority:
|
||||||
|
|
||||||
|
*Step 1.* If one branch is quantized only, then the quantized branch is an empty branch.
|
||||||
|
|
||||||
|
*Step 2.* If only one branch has `FakeQuantize` before dequantization operations, then another branch is an empty branch.
|
||||||
|
|
||||||
|
*Step 3.* If some `FakeQuantize` has more than one consumer and another has only one, then the branch with `FakeQuantize` with several consumers is an empty branch.
|
||||||
|
|
||||||
|
*Step 4.* Constant branch is in original precision, data branch is an empty branch. In this case, dequantization operations are propagated to a constant branch and will be fused in one constant.
|
||||||
|
|
||||||
|
*Step 5.* If both branches have operations from the following list before `FakeQuantize`: `Convolution`, `GroupConvolution`, and `MatMul`, or do not have any operations from the list, then the branch with larger shape volume is empty.
|
||||||
|
|
||||||
|
*Step 6.* If the operation before `FakeQuantize` has several consumers in any branch, then the branch is empty.
|
||||||
|
|
||||||
|
If dequantization operations on the full branch have a `FakeQuantize` operation parent, then they will be fused with `FakeQuantize` during another low precision transformation. If a `FakeQuantize` operation has a parent operation from the list: `Convolution`, `GroupConvolution`, and `MatMul`, then during inference the `FakeQuantize` can be inferred in one plugin kernel with the parent operation.
|
||||||
|
|
||||||
|
Depending on the plugin instruction set, low precision inference for the `Add` operation can be implemented in two logical steps in one plugin kernel:
|
||||||
|
|
||||||
|
* Inference step #1: Operations in the full branch, for example, `Convolution` and `FakeQuantize` with fused dequantization operations, and `Add` can be inferred in the original precision.
|
||||||
|
|
||||||
|
* Inference step #2: Inference step #1 result can be added with the empty branch tensor in low precision.
|
||||||
|
|
||||||
|
This approach allows to infer the `Add` operation in the optimal way.
|
||||||
|
|
||||||
|
## Subgraph before transformation
|
||||||
|
The subgraph with quantized `Add` operation before transformation:
|
||||||
|
|
||||||
|
\f[
|
||||||
|
y_{ch,i}=(scale1_{ch} * (x1_{ch,i} - shift1_{ch})) + (scale2_{ch} * (x2_{ch,i} - shift2_{ch}))
|
||||||
|
\f]
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## Subgraph after transformation
|
||||||
|
The subgraph with the `Add` operation after the transformation:
|
||||||
|
|
||||||
|
\f[
|
||||||
|
y_{ch,i}=scale2_{ch} * (scale1_{ch}' * (x1_{ch,i} - shift1_{ch}') + x2_{ch,i})
|
||||||
|
\f]
|
||||||
|
|
||||||
|
where:
|
||||||
|
|
||||||
|
\f[
|
||||||
|
scale1_{ch}' = scale1_{ch} / scale2_{ch}
|
||||||
|
\f]
|
||||||
|
|
||||||
|
\f[
|
||||||
|
shift1_{ch}' = shift1_{ch} + scale2_{ch} * shift2_{ch} / scale1_{ch}
|
||||||
|
\f]
|
||||||
|
|
||||||
|

|
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d8d3621c4be5d3382cb164a19676253412f85b5f47fac27b024c726f1571647e
|
||||||
|
size 380663
|
After Width: | Height: | Size: 54 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:ff2d26dc0b86f339458a2fafbbd6a88daf3d3dc6fcefb636243f42a6e91bc328
|
||||||
|
size 492066
|
After Width: | Height: | Size: 51 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
# MultiplyTransformation transformation {#openvino_docs_IE_DG_lpt_MultiplyTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::MultiplyTransformation class represents the `Multiply` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# SubtractTransformation transformation {#openvino_docs_IE_DG_lpt_SubtractTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::SubtractTransformation class represents the `Subtract` operation transformation.
|
@ -0,0 +1,34 @@
|
|||||||
|
# ConvolutionTransformation transformation {#openvino_docs_IE_DG_lpt_ConvolutionTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::ConvolutionTransformation class represents the `Convolution` operation transformation.
|
||||||
|
|
||||||
|
The transformation propagates dequantization operations on activations and weights through the `Convolution` operation. The transformation supports several weights quantization approaches:
|
||||||
|
* quantized weights in low precision with dequantization operations,
|
||||||
|
* weights in original precision with `FakeQuantize` operation.
|
||||||
|
|
||||||
|
Result dequantization `Multiply` constant value *result* is calculated as multiplication for dequantization `Multiply` constant value on activations *a* and dequantization `Multiply` constant value on weights *b* :
|
||||||
|
|
||||||
|
\f[
|
||||||
|
result_{i} = a_{i} \cdot b_{i}
|
||||||
|
\f]
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
|
||||||
|
* Dequantization on activations must be per-tensor. It means that dequantization `Multiply` constant value on activations must be scalar.
|
||||||
|
|
||||||
|
## Subgraph before transformation
|
||||||
|
|
||||||
|
### Quantized weights in low precision with dequantization operations
|
||||||
|
The subgraph with quantized `Convolution` before transformation with quantized weights in low precision constant and dequantization operations:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Weights in original precision with FakeQuantize operation
|
||||||
|
The subgraph with quantized `Convolution` before transformation with weights in original precision and `FakeQuantize` operation:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## Subgraph after transformation
|
||||||
|
The subgraph with `Convolution` operation after the transformation:
|
||||||
|
|
||||||
|

|
@ -0,0 +1,3 @@
|
|||||||
|
# ConvolutionBackpropDataTransformation transformation {#openvino_docs_IE_DG_lpt_ConvolutionBackpropDataTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::ConvolutionBackpropDataTransformation class represents the `ConvolutionBackpropData` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# GroupConvolutionTransformation transformation {#openvino_docs_IE_DG_lpt_GroupConvolutionTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::GroupConvolutionTransformation class represents the `GroupConvolution` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:9e5bfd5ca52ea6660e0ff67afefc98d64941eab6e8b464116242a6e044f318f5
|
||||||
|
size 207602
|
After Width: | Height: | Size: 26 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:756c225ee8e1da046e0210bf0696185b3939378f10b4ed6d757e43070d379436
|
||||||
|
size 135804
|
After Width: | Height: | Size: 15 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:08d4116490ab329636fced24c292636fbe00856976b19e5219e433bc2c6e4e16
|
||||||
|
size 190590
|
After Width: | Height: | Size: 28 KiB |
@ -0,0 +1,3 @@
|
|||||||
|
# InterpolateTransformation transformation {#openvino_docs_IE_DG_lpt_InterpolateTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::InterpolateTransformation class represents the `Interpolate` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# MatMulTransformation transformation {#openvino_docs_IE_DG_lpt_MatMulTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::MatMulTransformation class represents the `MatMul` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# ConcatTransformation transformation {#openvino_docs_IE_DG_lpt_ConcatTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::ConcatTransformation class represents the `Concat` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# DepthToSpaceTransformation transformation {#openvino_docs_IE_DG_lpt_DepthToSpaceTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::DepthToSpaceTransformation class represents the `DepthToSpace` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# PadTransformation transformation {#openvino_docs_IE_DG_lpt_PadTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::PadTransformation class represents the `Pad` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# ShuffleChannelsTransformation transformation {#openvino_docs_IE_DG_lpt_ShuffleChannelsTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::ShuffleChannelsTransformation class represents the `ShuffleChannels` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# SplitTransformation transformation {#openvino_docs_IE_DG_lpt_SplitTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::SplitTransformation class represents the `Split` operation transformation.
|
@ -0,0 +1,3 @@
|
|||||||
|
# StridedSliceTransformation transformation {#openvino_docs_IE_DG_lpt_StridedSliceTransformation}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::StridedSliceTransformation class represents the `StridedSlice` operation transformation.
|