Merge remote-tracking branch 'upstream/master' into debian-packages

This commit is contained in:
Ilya Lavrenov 2022-01-11 18:29:46 +03:00
commit c19ecf16a8
598 changed files with 27481 additions and 15410 deletions

View File

@ -113,7 +113,7 @@ jobs:
# For opencv-python: python3-setuptools and pip upgrade
python3 -m pip install --upgrade pip
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
python3 -m pip install -r $(REPO_DIR)/src/bindings/python/wheel/requirements-dev.txt
# For running Python API tests
python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
# For running Paddle frontend unit tests
@ -208,7 +208,7 @@ jobs:
- script: |
set -e
mkdir $(INSTALL_DIR)/opencv/
mkdir -p $(INSTALL_DIR)/opencv/
cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake
cp -R $(REPO_DIR)/temp/opencv_4.5.2_ubuntu20/opencv/* $(INSTALL_DIR)/opencv/
workingDirectory: $(BUILD_DIR)

View File

@ -119,7 +119,7 @@ jobs:
- script: |
set -e
mkdir $(INSTALL_DIR)/opencv/
mkdir -p $(INSTALL_DIR)/opencv/
cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake
cp -R $(REPO_DIR)/temp/opencv_4.5.2_osx/opencv/* $(INSTALL_DIR)/opencv/
workingDirectory: $(BUILD_DIR)

View File

@ -54,13 +54,13 @@ jobs:
path: src_diff.diff
- name: Run Flake on wheel
run: python -m flake8 ./ --config=../setup.cfg
working-directory: inference-engine/ie_bridges/python/wheel
working-directory: src/bindings/python/wheel
- name: Create code style diff for wheel
if: failure()
run: |
python -m black -l 160 -S ./
git diff > wheel_diff.diff
working-directory: inference-engine/ie_bridges/python/wheel
working-directory: src/bindings/python/wheel
- uses: actions/upload-artifact@v2
if: failure()
with:

2
.gitmodules vendored
View File

@ -54,7 +54,7 @@
path = cmake/developer_package/ncc_naming_style/ncc
url = https://github.com/nithinn/ncc.git
[submodule "thirdparty/onednn_gpu"]
path = thirdparty/onednn_gpu
path = src/plugins/intel_gpu/thirdparty/onednn_gpu
url = https://github.com/oneapi-src/oneDNN.git
[submodule "tools/pot/thirdparty/open_model_zoo"]
path = thirdparty/open_model_zoo

View File

@ -45,8 +45,7 @@ Jenkinsfile @openvinotoolkit/openvino-admins
# IE GPU:
/src/inference/include/ie/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
/src/inference/include/ie/cldnn/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
/src/inference/include/openvino/runtime/gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
/inference-engine/thirdparty/clDNN/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
/src/inference/include/openvino/runtime/intel_gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
/src/plugins/intel_gpu/ @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
# IE VPU:

View File

@ -276,8 +276,8 @@ if(ENABLE_INTEL_GNA)
GNA_LIB_DIR
libGNA_INCLUDE_DIRS
libGNA_LIBRARIES_BASE_PATH)
set(GNA_VERSION "03.00.00.1455")
set(GNA_HASH "8ac1af18eb32777b00193f4f8c252ee4f8bd64a9069138b4a5aaeebd82ead464")
set(GNA_VERSION "03.00.00.1455.0")
set(GNA_HASH "99891696269d8fa10116c96e6b7bda4362736881f0df8df8b56c751ee18e5820")
set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)
if(WIN32)

View File

@ -37,7 +37,7 @@ The implementation `CompileNetwork` is fully device-specific.
The function accepts a const shared pointer to `ngraph::Function` object and performs the following steps:
1. Applies ngraph passes using `TransformNetwork` function, which defines plugin-specific conversion pipeline.
1. Applies ngraph passes using `TransformNetwork` function, which defines plugin-specific conversion pipeline. To support low precision inference, the pipeline can include Low Precision Transformations. These transformations are usually hardware specific. You can find how to use and configure Low Precisions Transformations in [Low Precision Transformations](@ref openvino_docs_IE_DG_lpt) guide.
2. Maps the transformed graph to a backend specific graph representation (for example, to MKLDNN graph for Intel CPU).
3. Allocates and fills memory for graph weights, backend specific memory handles and so on.

View File

@ -52,6 +52,7 @@ Detailed guides
* [Build](@ref openvino_docs_ie_plugin_dg_plugin_build) a plugin library using CMake\*
* Plugin and its components [testing](@ref openvino_docs_ie_plugin_dg_plugin_testing)
* [Quantized networks](@ref openvino_docs_ie_plugin_dg_quantized_networks)
* [Low precision transformations](@ref openvino_docs_IE_DG_lpt) guide
* [Writing nGraph transformations](@ref ngraph_transformation) guide
API References

View File

@ -4,7 +4,78 @@
<tab type="usergroup" url="index.html" visibile="yes" title="GUIDE">
<tab type="usergroup" url="index.html" title="Developer Guide for Inference Engine Plugin Library">
<tab type="user" url="@ref plugin" visibile="yes" title="Implement Plugin Functionality"/>
<tab type="user" url="@ref executable_network" visibile="yes" title="Implement Executable Network Functionality"/>
<tab type="user" url="@ref executable_network" visibile="yes" title="Implement Executable Network Functionality">
<tab type="usergroup" title="Low Precision Transformations" url="@ref openvino_docs_IE_DG_lpt">
<tab type="user" title="Attributes" url="@ref openvino_docs_IE_DG_lpt_attributes">
<tab type="user" title="AvgPoolPrecisionPreserved" url="@ref openvino_docs_IE_DG_lpt_AvgPoolPrecisionPreserved"/>
<tab type="user" title="IntervalsAlignment" url="@ref openvino_docs_IE_DG_lpt_IntervalsAlignment"/>
<tab type="user" title="PerTensorQuantization" url="@ref openvino_docs_IE_DG_lpt_PerTensorQuantization"/>
<tab type="user" title="PrecisionPreserved" url="@ref openvino_docs_IE_DG_lpt_PrecisionPreserved"/>
<tab type="user" title="Precisions" url="@ref openvino_docs_IE_DG_lpt_Precisions"/>
<tab type="user" title="QuantizationAlignment" url="@ref openvino_docs_IE_DG_lpt_QuantizationAlignment"/>
</tab>
<tab type="user" title="Step 1. Prerequisites transformations" url="@ref openvino_docs_IE_DG_lpt_step1_prerequisites">
<tab type="user" title="LinOpSequenceFusion" url="@ref openvino_docs_IE_DG_lpt_LinOpSequenceFusion"/>
<tab type="user" title="PullReshapeThroughDequantization" url="@ref openvino_docs_IE_DG_lpt_PullReshapeThroughDequantization"/>
<tab type="user" title="PullTransposeThroughDequantization" url="@ref openvino_docs_IE_DG_lpt_PullTransposeThroughDequantization"/>
</tab>
<tab type="user" title="Step 2. Markup transformations" url="@ref openvino_docs_IE_DG_lpt_step2_markup">
<tab type="user" title="AlignQuantizationIntervals" url="@ref openvino_docs_IE_DG_lpt_AlignQuantizationIntervals"/>
<tab type="user" title="AlignQuantizationParameters" url="@ref openvino_docs_IE_DG_lpt_AlignQuantizationParameters"/>
<tab type="user" title="CreateAttribute" url="@ref openvino_docs_IE_DG_lpt_CreateAttribute"/>
<tab type="user" title="CreatePrecisionsDependentAttribute" url="@ref openvino_docs_IE_DG_lpt_CreatePrecisionsDependentAttribute"/>
<tab type="user" title="MarkupAvgPoolPrecisionPreserved" url="@ref openvino_docs_IE_DG_lpt_MarkupAvgPoolPrecisionPreserved"/>
<tab type="user" title="MarkupCanBeQuantized" url="@ref openvino_docs_IE_DG_lpt_MarkupCanBeQuantized"/>
<tab type="user" title="MarkupPerTensorQuantization" url="@ref openvino_docs_IE_DG_lpt_MarkupPerTensorQuantization"/>
<tab type="user" title="MarkupPrecisions" url="@ref openvino_docs_IE_DG_lpt_MarkupPrecisions"/>
<tab type="user" title="PropagatePrecisions" url="@ref openvino_docs_IE_DG_lpt_PropagatePrecisions"/>
<tab type="user" title="PropagateThroughPrecisionPreserved" url="@ref openvino_docs_IE_DG_lpt_PropagateThroughPrecisionPreserved"/>
<tab type="user" title="PropagateToInput" url="@ref openvino_docs_IE_DG_lpt_PropagateToInput"/>
<tab type="user" title="UpdateSharedPrecisionPreserved" url="@ref openvino_docs_IE_DG_lpt_UpdateSharedPrecisionPreserved"/>
</tab>
<tab type="user" title="Step 3. Main transformations" url="@ref openvino_docs_IE_DG_lpt_step3_main">
<tab type="user" title="AddTransformation" url="@ref openvino_docs_IE_DG_lpt_AddTransformation"/>
<tab type="user" title="AvgPoolTransformation" url="@ref openvino_docs_IE_DG_lpt_AvgPoolTransformation"/>
<tab type="user" title="ClampTransformation" url="@ref openvino_docs_IE_DG_lpt_ClampTransformation"/>
<tab type="user" title="ConcatTransformation" url="@ref openvino_docs_IE_DG_lpt_ConcatTransformation"/>
<tab type="user" title="ConvolutionTransformation" url="@ref openvino_docs_IE_DG_lpt_ConvolutionTransformation"/>
<tab type="user" title="ConvolutionBackpropDataTransformation" url="@ref openvino_docs_IE_DG_lpt_ConvolutionBackpropDataTransformation"/>
<tab type="user" title="DepthToSpaceTransformation" url="@ref openvino_docs_IE_DG_lpt_DepthToSpaceTransformation"/>
<tab type="user" title="FakeQuantizeDecompositionTransformation" url="@ref openvino_docs_IE_DG_lpt_FakeQuantizeDecompositionTransformation"/>
<tab type="user" title="FakeQuantizeTransformation" url="@ref openvino_docs_IE_DG_lpt_FakeQuantizeTransformation"/>
<tab type="user" title="InterpolateTransformation" url="@ref openvino_docs_IE_DG_lpt_InterpolateTransformation"/>
<tab type="user" title="GroupConvolutionTransformation" url="@ref openvino_docs_IE_DG_lpt_GroupConvolutionTransformation"/>
<tab type="user" title="MatMulTransformation" url="@ref openvino_docs_IE_DG_lpt_MatMulTransformation"/>
<tab type="user" title="MaxPoolTransformation" url="@ref openvino_docs_IE_DG_lpt_MaxPoolTransformation"/>
<tab type="user" title="MultiplyTransformation" url="@ref openvino_docs_IE_DG_lpt_MultiplyTransformation"/>
<tab type="user" title="MVNTransformation" url="@ref openvino_docs_IE_DG_lpt_MVNTransformation"/>
<tab type="user" title="NormalizeL2Transformation" url="@ref openvino_docs_IE_DG_lpt_NormalizeL2Transformation"/>
<tab type="user" title="PadTransformation" url="@ref openvino_docs_IE_DG_lpt_PadTransformation"/>
<tab type="user" title="PReluTransformation" url="@ref openvino_docs_IE_DG_lpt_PReluTransformation"/>
<tab type="user" title="ReduceMaxTransformation" url="@ref openvino_docs_IE_DG_lpt_ReduceMaxTransformation"/>
<tab type="user" title="ReduceMeanTransformation" url="@ref openvino_docs_IE_DG_lpt_ReduceMeanTransformation"/>
<tab type="user" title="ReduceMinTransformation" url="@ref openvino_docs_IE_DG_lpt_ReduceMinTransformation"/>
<tab type="user" title="ReduceSumTransformation" url="@ref openvino_docs_IE_DG_lpt_ReduceSumTransformation"/>
<tab type="user" title="ReluTransformation" url="@ref openvino_docs_IE_DG_lpt_ReluTransformation"/>
<tab type="user" title="ReshapeTransformation" url="@ref openvino_docs_IE_DG_lpt_ReshapeTransformation"/>
<tab type="user" title="SqueezeTransformation" url="@ref openvino_docs_IE_DG_lpt_SqueezeTransformation"/>
<tab type="user" title="ShuffleChannelsTransformation" url="@ref openvino_docs_IE_DG_lpt_ShuffleChannelsTransformation"/>
<tab type="user" title="SplitTransformation" url="@ref openvino_docs_IE_DG_lpt_SplitTransformation"/>
<tab type="user" title="StridedSliceTransformation" url="@ref openvino_docs_IE_DG_lpt_StridedSliceTransformation"/>
<tab type="user" title="TransposeTransformation" url="@ref openvino_docs_IE_DG_lpt_TransposeTransformation"/>
<tab type="user" title="UnsqueezeTransformation" url="@ref openvino_docs_IE_DG_lpt_UnsqueezeTransformation"/>
<tab type="user" title="VariadicSplitTransformation" url="@ref openvino_docs_IE_DG_lpt_VariadicSplitTransformation"/>
</tab>
<tab type="user" title="Step 4. Cleanup transformations" url="@ref openvino_docs_IE_DG_lpt_step4_cleanup">
<tab type="user" title="FoldConvertTransformation" url="@ref openvino_docs_IE_DG_lpt_FoldConvertTransformation"/>
<tab type="user" title="FoldFakeQuantizeTransformation" url="@ref openvino_docs_IE_DG_lpt_FoldFakeQuantizeTransformation"/>
<tab type="user" title="FuseConvertTransformation" url="@ref openvino_docs_IE_DG_lpt_FuseConvertTransformation"/>
<tab type="user" title="FuseMultiplyToFakeQuantizeTransformation" url="@ref openvino_docs_IE_DG_lpt_FuseMultiplyToFakeQuantizeTransformation"/>
<tab type="user" title="FuseSubtractToFakeQuantizeTransformation" url="@ref openvino_docs_IE_DG_lpt_FuseSubtractToFakeQuantizeTransformation"/>
<tab type="user" title="MultiplyToGroupConvolutionTransformation" url="@ref openvino_docs_IE_DG_lpt_MultiplyToGroupConvolutionTransformation"/>
</tab>
</tab>
</tab>
<tab type="user" url="@ref infer_request" visibile="yes" title="Implement Synchronous Inference Request"/>
<tab type="user" url="@ref async_infer_request" visibile="yes" title="Implement Asynchronous Inference Request"/>
</tab>

View File

@ -0,0 +1,17 @@
# Plugin Transformation Pipeline {#openvino_docs_IE_DG_plugin_transformation_pipeline}
@sphinxdirective
.. toctree::
:maxdepth: 1
:caption: Executable Network
:hidden:
Low Precision Transformations <openvino_docs_IE_DG_lpt>
@endsphinxdirective
Typical plugin transformation pipeline includes steps:
1. Common transformations
2. [Low precision transformations](@ref openvino_docs_IE_DG_lpt)
3. Plugin specific transformations

View File

@ -0,0 +1,11 @@
# AvgPoolPrecisionPreserved attribute {#openvino_docs_IE_DG_lpt_AvgPoolPrecisionPreserved}
ngraph::AvgPoolPrecisionPreservedAttribute class represents the `AvgPoolPrecisionPreserved` attribute.
Utility attribute, which is used only during `AvgPool` operation, precision preserved property definition.
| Property name | Values |
|---------------|----------------------------------------------|
| Required | Yes |
| Defined | Operation |
| Properties | value (boolean) |

View File

@ -0,0 +1,11 @@
# IntervalsAlignment attribute {#openvino_docs_IE_DG_lpt_IntervalsAlignment}
ngraph::IntervalsAlignmentAttribute class represents the `IntervalsAlignment` attribute.
The attribute defines a subgraph with the same quantization intervals alignment. `FakeQuantize` operations are included. The attribute is used by quantization operations.
| Property name | Values |
|---------------|----------------------------------------------|
| Required | Yes |
| Defined | Operation |
| Properties | combined interval, minimal interval, minimal levels, preferable precisions |

View File

@ -0,0 +1,11 @@
# PerTensorQuantization attribute {#openvino_docs_IE_DG_lpt_PerTensorQuantization}
ngraph::PerTensorQuantizationAttribute class represents the `PerTensorQuantization` attribute.
The attribute defines if the operation input port requires per-tensor quantization.
| Property name | Values |
|---------------|----------------------------------------------|
| Required | Yes |
| Defined | Operation, input ports |
| Properties | |

View File

@ -0,0 +1,11 @@
# PrecisionPreserved attribute {#openvino_docs_IE_DG_lpt_PrecisionPreserved}
ngraph::PrecisionPreservedAttribute class represents the `PrecisionPreserved` attribute.
The attribute defines a precision preserved operation. If the attribute is absent, then an operation is not precision preserved.
| Property name | Values |
|---------------|----------------------------------------------|
| Required | Yes |
| Defined | Operation |
| Properties | value (boolean) |

View File

@ -0,0 +1,11 @@
# Precisions attribute {#openvino_docs_IE_DG_lpt_Precisions}
ngraph::PrecisionsAttribute class represents the `Precisions` attribute.
The attribute defines precision which is required for input/output port or an operation.
| Property name | Values |
|---------------|----------------------------------------------|
| Required | Yes |
| Defined | Operation, input port, output port |
| Properties | precisions |

View File

@ -0,0 +1,11 @@
# QuantizationAlignment attribute {#openvino_docs_IE_DG_lpt_QuantizationAlignment}
ngraph::QuantizationAlignmentAttribute class represents the `QuantizationAlignment` attribute.
The attribute defines a subgraph with the same quantization alignment. `FakeQuantize` operations are not included. The attribute is used by quantization operations.
| Property name | Values |
|---------------|----------------------------------------------|
| Required | Yes |
| Defined | Operation |
| Properties | value (boolean) |

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3ee64e2c942110b8dbbc7cb3d200ed7061da6a12a55c0f379378e31db9ae2180
size 366513

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 22 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b1d9a68912b2dde17c731ed31b090077e6812a84231544ce3d212c0e02b13dfb
size 204085

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 26 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:79b2fd14f9ff7655e4a5abe7e71748e153a095fe1f5eb07c168f53cb12fbb406
size 216703

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4d3e9a9eddfdcd50eedb035c500848b982b9317ba23f28809a831bbe66300bec
size 167226

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ec31aa62c0e1da3caf1531f2d92270f321857aca3044445ec242f33ee224f91b
size 297353

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 38 KiB

View File

@ -0,0 +1,319 @@
# OpenVINO™ Low Precision Transformations {#openvino_docs_IE_DG_lpt}
@sphinxdirective
.. toctree::
:maxdepth: 1
:caption: Low Precision Transformations
:hidden:
Low Precision Transformations <openvino_docs_IE_DG_lpt>
Attributes <openvino_docs_IE_DG_lpt_attributes>
Step 1. Prerequisites transformations <openvino_docs_IE_DG_lpt_step1_prerequisites>
Step 2. Markup transformations <openvino_docs_IE_DG_lpt_step2_markup>
Step 3. Main transformations <openvino_docs_IE_DG_lpt_step3_main>
Step 4. Cleanup transformations <openvino_docs_IE_DG_lpt_step4_cleanup>
@endsphinxdirective
## Introduction
Low precision transformations (known as LPT) are a set of nGraph transformations, which are combined in one library. The library is mandatory part of OpenVINO to infer quantized model in low precision with the maximum performance on Intel CPU, GPU and ARM platforms. The library includes more than 45 transformations and supports more then 30 operations. Some transformations are mandatory, some of them are optional and developed for specific device.
The goal of Low Precision Transformations (LPT) is to transform a quantized model from its original precision (FP16 or FP32) to a low precision (INT8: `signed int8` or `unsigned int8`), so that it is prepared for low precision inference in OpenVINO™ plugin. It is achieved by two main principles:
1. `FakeQuantize` operation decomposition to two parts:
- part #1: quantize operation - new `FakeQuantize` operation with output quantization intervals in low precision range (signed int8: [-128, 127] or [-127, 127], unsigned int8: [0, 255] or [0, 256]) and with low precision output (`signed int8` or `unsigned int8`),
- part #2: dequantization operations with low precision input and original precision output.
2. Propagation of the dequantization operation through original model's operations. It is done to avoid dequantization operations before original model operations, thus the quantize operations with low precision output remain before the original model operations.
As result, operation input tensor precisions will be changed from original to low precision and operations can be inferred by OpenVINO™ plugin in low precision.
For a more detailed description on how to quantize a model, see the [Low precision tools](#low-precision-tools) section below. For more information about model quantization, refer to **Brief History of Lower Precision in Deep Learning** section in [this whitepaper](https://software.intel.com/en-us/articles/lower-numerical-precision-deep-learning-inference-and-training).
## Input model requirements
LPT transformations propagate dequantization operations through the following operations:
* [Add-1](@ref openvino_docs_ops_arithmetic_Add_1)
* [AvgPool-1](@ref openvino_docs_ops_pooling_AvgPool_1)
* [Clamp-1](@ref openvino_docs_ops_activation_Clamp_1)
* [Concat-1](@ref openvino_docs_ops_movement_Concat_1)
* [Convolution-1](@ref openvino_docs_ops_convolution_Convolution_1)
* [ConvolutionBackpropData-1](@ref openvino_docs_ops_convolution_ConvolutionBackpropData_1)
* [DepthToSpace-1](@ref openvino_docs_ops_movement_DepthToSpace_1)
* [FakeQuantize-1](@ref openvino_docs_ops_quantization_FakeQuantize_1)
* [GroupConvolution-1](@ref openvino_docs_ops_convolution_GroupConvolution_1)
* [Interpolate-1](@ref openvino_docs_ops_image_Interpolate_1)
* [Interpolate-4](@ref openvino_docs_ops_image_Interpolate_4)
* [MatMul-1](@ref openvino_docs_ops_matrix_MatMul_1)
* [MaxPool-1](@ref openvino_docs_ops_pooling_MaxPool_1)
* [Multiply-1](@ref openvino_docs_ops_arithmetic_Multiply_1)
* [MVN-1](@ref openvino_docs_ops_normalization_MVN_1)
* [NormalizeL2-1](@ref openvino_docs_ops_normalization_NormalizeL2_1)
* [PRelu-1](@ref openvino_docs_ops_activation_PReLU_1)
* [ReduceMax-1](@ref openvino_docs_ops_reduction_ReduceMax_1)
* [ReduceMean-1](@ref openvino_docs_ops_reduction_ReduceMean_1)
* [ReduceMin-1](@ref openvino_docs_ops_reduction_ReduceMin_1)
* [ReduceSum-1](@ref openvino_docs_ops_reduction_ReduceSum_1)
* [Relu-1](@ref openvino_docs_ops_activation_ReLU_1)
* [Reshape-1](@ref openvino_docs_ops_shape_Reshape_1)
* [Split-1](@ref openvino_docs_ops_movement_Split_1)
* [Squeeze-1](@ref openvino_docs_ops_shape_Reshape_1)
* [StridedSlice-1](@ref openvino_docs_ops_movement_StridedSlice_1)
* [Transpose-1](@ref openvino_docs_ops_movement_Transpose_1)
* [Unsqueeze-1](@ref openvino_docs_ops_shape_Unsqueeze_1)
* [VariadicSplit-1](@ref openvino_docs_ops_movement_VariadicSplit_1)
If operation is not supported by LPT then dequantization operation will not be propagated, input tensor precisions will not be changed to low precision and operation will be executed in original precision.
For example, if you would like to infer a model with `Convolution` operation in low precision then the model can look as on picture below:
![Quantized Convolution](img/model_fq_and_convolution.common.png)
> There are several supported quantization approaches on activations and on weights. All supported approaches are described in [Quantization approaches](#quantization-approaches) section below. In demonstrated model [FakeQuantize operation quantization](#fakequantize-operation) approach is used.
### Low precision tools
There are two tools to quantize a model:
1. [Post-Training Optimization Toolkit](@ref pot_docs_LowPrecisionOptimizationGuide) (POT)
2. [Neural Network Compression Framework](https://github.com/openvinotoolkit/nncf) (NNCF)
Additionally, low precision transformations can handle ONNX quantized models.
## Quantization approaches
LPT transformations support two quantization approaches:
1. `FakeQuantize` operation,
2. Quantize and dequantization operations
Let's explore both approaches in details on `Convolution` operation.
### FakeQuantize operation
In this case `FakeQuantize` operation is used on activations and quantized constant on weights. Original input model:
![Original model with FakeQuantize](img/model_fq_and_convolution.common.png)
### Quantize and dequantization operations
In this case `FakeQuantize` operation and `Convert` are used as quantize operation and return quantized low precision tensor. After quantize operation on activations there are `Convert` and dequantization operations to compensate decomposition. Original input model:
![Original model with Q/DQ](img/model_qdq_and_convolution.common.png)
In both cases result is the same. In LPT result model you can see, that:
1. if necessary, `FakeQuantize` operations on activations were decomposed to two part:
- new `FakeQuantize`operation with updated output intervals in low precision range and low precision output,
- dequantization operations on activations;
2. if necessary, an existing `FakeQuantize` decomposition can be reworked to get better precision;
3. dequantization operations were propagated through `Convolution`.
LPT result model:
![Result model](img/model_fq_and_convolution.transformed.png)
### Low precision transformations pipeline
LPT transformation pipeline has several steps. For each transformation inside one step pattern matcher is unique per transformation, but each operation can be assigned to several transformations.
![Low precision transformations pipeline](img/low_precision_transformation_pipeline.png)
Inside each step LPT transformations handle input model operation by operation, applying transformation matching pattern for each transformation from the step to an operation, and execute transformation if pattern is matched. Decomposition transformation decomposes `FakeQuantize` to quantize and dequantization operations. Dequantization operations from previous transformation result is used for the current one and so on, until the end of the model is achieved.
As result, usually all operations are inferred by plugin in low precision. If plugin doesn't support an operation inference in low precision, then corresponding LPT transformation can be disabled, and input tensor precisions for the operation will not be changed. In this case the operation is inferred in the original precision.
Low precision transformations pipeline includes four steps:
* [Step #1: Prerequisites](@ref openvino_docs_IE_DG_lpt_step1_prerequisites)
* [Step #2: Markup transformations](@ref openvino_docs_IE_DG_lpt_step2_markup)
* [Step #3: Main transformations](@ref openvino_docs_IE_DG_lpt_step3_main)
* [Step #4: Cleanup transformations](@ref openvino_docs_IE_DG_lpt_step4_cleanup)
### Step 1. Prerequisites
This step fuses and propagates some operations in the model to prepare for the next step. It is required for OpenVINO plugins. Transformations:
* [PullReshapeThroughDequantization](@ref openvino_docs_IE_DG_lpt_PullReshapeThroughDequantization)
* [PullTransposeThroughDequantization](@ref openvino_docs_IE_DG_lpt_PullTransposeThroughDequantization)
* [LinOpSequenceFusion](@ref openvino_docs_IE_DG_lpt_LinOpSequenceFusion)
The model on this step is changed. There are more details in developer guide [Prerequisites transformations](@ref openvino_docs_IE_DG_lpt_step1_prerequisites).
### Step 2. Markup
This step creates runtime attributes for operations. These attributes will be used in next step. Transformations:
* [MarkupCanBeQuantized](@ref openvino_docs_IE_DG_lpt_MarkupCanBeQuantized)
* [MarkupPrecisions](@ref openvino_docs_IE_DG_lpt_MarkupPrecisions)
* [MarkupPerTensorQuantization](@ref openvino_docs_IE_DG_lpt_MarkupPerTensorQuantization)
* [MarkupAvgPoolPrecisionPreserved](@ref openvino_docs_IE_DG_lpt_MarkupAvgPoolPrecisionPreserved)
* [PropagatePrecisions](@ref openvino_docs_IE_DG_lpt_PropagatePrecisions)
* [AlignQuantizationIntervals](@ref openvino_docs_IE_DG_lpt_AlignQuantizationIntervals)
* [AlignQuantizationParameters](@ref openvino_docs_IE_DG_lpt_AlignQuantizationParameters)
The model on this step is changed: only new attributes are added to some operations. There are more details in developer guide [Markup transformations](@ref openvino_docs_IE_DG_lpt_step2_markup).
### Step 3. Main transformations, FakeQuantize decomposition and dequantization operations handling
This step has the most transformations. These transformations can be separated in two groups: decomposition transformation and dequantization operations handling. There are more details in developer guide [Main transformations](@ref openvino_docs_IE_DG_lpt_step3_main). Transformations:
* [AddTransformation](@ref openvino_docs_IE_DG_lpt_AddTransformation)
* [AvgPoolTransformation](@ref openvino_docs_IE_DG_lpt_AvgPoolTransformation)
* [ClampTransformation](@ref openvino_docs_IE_DG_lpt_AvgPoolTransformation)
* [ConcatTransformation](@ref openvino_docs_IE_DG_lpt_ConcatTransformation)
* [ConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_ConvolutionTransformation)
* [ConvolutionBackpropDataTransformation](@ref openvino_docs_IE_DG_lpt_ConvolutionBackpropDataTransformation)
* [DepthToSpaceTransformation](@ref openvino_docs_IE_DG_lpt_DepthToSpaceTransformation)
* [FakeQuantizeDecompositionTransformation](@ref openvino_docs_IE_DG_lpt_FakeQuantizeDecompositionTransformation)
* [FakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FakeQuantizeTransformation)
* [InterpolateTransformation](@ref openvino_docs_IE_DG_lpt_InterpolateTransformation)
* [GroupConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_GroupConvolutionTransformation)
* [MatMulTransformation](@ref openvino_docs_IE_DG_lpt_MatMulTransformation)
* [MaxPoolTransformation](@ref openvino_docs_IE_DG_lpt_MaxPoolTransformation)
* [MultiplyTransformation](@ref openvino_docs_IE_DG_lpt_MultiplyTransformation)
* [MVNTransformation](@ref openvino_docs_IE_DG_lpt_MVNTransformation)
* [NormalizeL2Transformation](@ref openvino_docs_IE_DG_lpt_NormalizeL2Transformation)
* [PReluTransformation](@ref openvino_docs_IE_DG_lpt_PReluTransformation)
* [ReduceMaxTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMaxTransformation)
* [ReduceMeanTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMeanTransformation)
* [ReduceMinTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMinTransformation)
* [ReduceSumTransformation](@ref openvino_docs_IE_DG_lpt_ReduceSumTransformation)
* [ReluTransformation](@ref openvino_docs_IE_DG_lpt_ReluTransformation)
* [ReshapeTransformation](@ref openvino_docs_IE_DG_lpt_ReshapeTransformation)
* [SqueezeTransformation](@ref openvino_docs_IE_DG_lpt_SqueezeTransformation)
* [ShuffleChannelsTransformation](@ref openvino_docs_IE_DG_lpt_ShuffleChannelsTransformation)
* [SplitTransformation](@ref openvino_docs_IE_DG_lpt_SplitTransformation)
* [StridedSliceTransformation](@ref openvino_docs_IE_DG_lpt_StridedSliceTransformation)
* [TransposeTransformation](@ref openvino_docs_IE_DG_lpt_TransposeTransformation)
* [UnsqueezeTransformation](@ref openvino_docs_IE_DG_lpt_UnsqueezeTransformation)
* [VariadicSplitTransformation](@ref openvino_docs_IE_DG_lpt_VariadicSplitTransformation)
#### Decomposition transformations
Decomposition transformations decompose the `FakeQuantize` operation to: quantize (`FakeQuantize` with low precision output) and dequantization operations (opposite to quantize, with low precision input and the original precision output). For dequantization operations LPT uses three operations: `Convert`, `Subtract` and `Multiply`. Element-wise operations `Subtract` and `Multiply` have constants on the second branches. If dequantization operations are not handled at the end of LPT pipeline, then they will be fused back to the `FakeQuantize`.
Original `FakeQuantize`:
![FakeQuantize operation before LPT](quantization/img/fq.common.png)
`FakeQuantize` after decomposition to quantization and dequantization operations:
![FakeQuantize operation after LPT](quantization/img/fq.transformed.png)
#### Dequantization operations handling transformations
In this step, LPT transformations fuse dequantization operations or move them through existing model operations as much as possible.
Original `Convolution` operation in FP32 with dequantization operations before:
![Convolution operation before LPT](img/model_fq_and_convolution.common.png)
`Convolution` operation in INT8 after decomposition and dequantization operations handling:
![Convolution operation after LPT](img/model_fq_and_convolution.transformed.png)
### Step 4: Cleanup of the result model
LPT cleanup transformations is final stage in LPT pipeline. In this step LPT transformations clean up the result model to avoid not handled dequantization operations: fuse dequantization operations if possible (fuse at least `Convert` operations if not) to other model operations to cleanup result model. Transformations:
* [FoldConvertTransformation](@ref openvino_docs_IE_DG_lpt_FoldConvertTransformation)
* [FoldFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FoldFakeQuantizeTransformation)
* [FuseConvertTransformation](@ref openvino_docs_IE_DG_lpt_FuseConvertTransformation)
* [FuseMultiplyToFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FuseMultiplyToFakeQuantizeTransformation)
* [FuseSubtractToFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FuseSubtractToFakeQuantizeTransformation)
* [MultiplyToGroupConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_MultiplyToGroupConvolutionTransformation)
There are more details in developer guide [Cleanup transformations](@ref openvino_docs_IE_DG_lpt_step4_cleanup).
`FakeQuantize` operation with not handled dequantization operations:
![TODO: FakeQuantize operation with dequantization operations before LPT](quantization/img/fq.transformed.png)
`FakeQuantize` operation with fused dequantization operations:
![TODO: FakeQuantize operation with fused operations after LPT](quantization/img/fq.common.png)
## Low precision transformations in plugin transformation pipeline
Typical transformation pipeline described below.
### Step 1. Common optimizations
This step is optional for LPT but typically is presented in OpenVINO™ plugins. The step doesn't use any LPT transformation. Firstly, the step disables dequantization operations constant folding on constant subgraph on weights to prevent the lost of dequantization info on the next plugin transformations. After that, it optimizes nGraph function and convert operations to operation set 1. Typically, usage of this step is the simplest way to meet LPT requirements for the input quantized model. If plugin can guarantee that LPT input requirements are met, then this step can be skipped.
@snippet snippets/lpt_mkldnn_plugin.cpp lpt_common
### Step 2. Low precision transformations execution
This step is mandatory. It configures and runs LPT transformations.
@snippet snippets/lpt_mkldnn_plugin.cpp lpt_execution
### Step 3. Plugin-specific transformations
This step is optional. It modifies the nGraph function to a device-specific operation set.
@snippet snippets/lpt_mkldnn_plugin.cpp lpt_device
## Result model overview
Let's explore quantized [TensorFlow* implementation of ResNet-50](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) model. Use [Model Downloader](@ref omz_tools_downloader) tool to download the `fp16` model from [OpenVINO™ Toolkit - Open Model Zoo repository](https://github.com/openvinotoolkit/open_model_zoo):
```sh
./downloader.py --name resnet-50-tf --precisions FP16-INT8
```
After that you should quantize model by the [Model Quantizer](@ref omz_tools_downloader) tool.
```sh
./quantizer.py --model_dir public/resnet-50-tf --dataset_dir <DATASET_DIR> --precisions=FP16-INT8
```
### Inference
The simplest way to infer the model and collect performance counters is [Benchmark Application](../../../../samples/cpp/benchmark_app/README.md).
```sh
./benchmark_app -m resnet-50-tf.xml -d CPU -niter 1 -api sync -report_type average_counters -report_folder pc_report_dir
```
If you infer the model with the OpenVINO™ CPU plugin and collect performance counters, all operations (except last not quantized SoftMax) are executed in INT8 precision.
### Results analysis
Result model depends on different factors:
* The original model quantization possibility and quantization quality. For some models, some operations are not possible to be quantized by POT and NNCF tools. In this case `FakeQuantize` operations are absent before these operations and they will be inferred in original precision.
* LPT customization and plugin supported operations. If plugin doesn't support INT8 inference for some operation then corresponding LPT transformation should be disabled and the operation will be inferred in original precision.
Information about layer precision is stored in the performance counters that are
available from the Inference Engine API. For example, the part of performance counters table for quantized [TensorFlow* implementation of ResNet-50](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/resnet-50-tf) model inference on CPU Plugin looks as follows:
| layerName | execStatus | layerType | execType | realTime (ms) | cpuTime (ms) |
| --------------------------------------------------------- | ---------- | ------------ | -------------------- | ------------- | ------------ |
| resnet\_model/batch\_normalization\_15/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_1x1\_I8 | 0.377 | 0.377 |
| resnet\_model/conv2d\_16/Conv2D/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
| resnet\_model/batch\_normalization\_16/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_I8 | 0.499 | 0.499 |
| resnet\_model/conv2d\_17/Conv2D/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
| resnet\_model/batch\_normalization\_17/FusedBatchNorm/Add | EXECUTED | Convolution | jit\_avx512\_1x1\_I8 | 0.399 | 0.399 |
| resnet\_model/add\_4/fq\_input\_0 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
| resnet\_model/add\_4 | NOT\_RUN | Eltwise | undef | 0 | 0 |
| resnet\_model/add\_5/fq\_input\_1 | NOT\_RUN | FakeQuantize | undef | 0 | 0 |
> The `execStatus` column of the table includes possible values:
> - `EXECUTED` - layer was executed by standalone primitive,
> - `NOT_RUN` - layer was not executed by standalone primitive or was fused with another operation and executed in another layer primitive.
>
> The `execType` column of the table includes inference primitives with specific suffixes. The layers have the following marks:
> * Suffix `I8` for layers that had 8-bit data type input and were computed in 8-bit precision
> * Suffix `FP32` for layers computed in 32-bit precision
As result all operations (except not quantized `SoftMax` at the end of the model) in OpenVINO™ CPU plugin are inferred in low precision. Note, please, in the result model there are `FakeQuantize` operations in FP32 but the plugin responsibility is fuse these operations with previous operations. OpenVINO™ CPU plugin achieves maximum optimized inference for all operations by fusing INT8 `Convolution` with FP32 output with `FakeQuantize` operation with FP32 input and INT8 output. In this case OpenVINO™ CPU plugin uses INT8 and FP32 vectorized instructions but reports about one INT8 kernel usage for inference, which is the most optimized for this case.
## Mixed precision
If LPT input model operation output has `fp16` precision then dequantization computations still occurs in `fp32` precision. This approach is used to avoid accuracy loss in `fp16` arithmetic computations. Note, the latest dequantization operation output has `fp16` precision.
## Customization
Low Precision Transformations can be customizable. Build-in customization options:
* operation precision restrictions,
* operation per tensor quantization restrictions,
* update precisions,
* dequantization precision.
### Operation precision restrictions
This option defines precisions which allowed for the operation input ports. The option value is passed as input argument for `LowPrecision` constructor. For example:
@snippet snippets/lpt_mkldnn_plugin.cpp lpt_supported_precisions
In provided example in result model `Convolution` operation inputs must have specific precisions: `u8` (unsigned int8) precision on input 0 (on activations) and `i8` (signed int8) precision on input 1 (on weights).
### Operation per tensor quantization restrictions
This option defines if operation supports per-tensor quantization only. The option value is passed as input argument for `LowPrecision` constructor. For example:
@snippet snippets/lpt_mkldnn_plugin.cpp per_tensor_quantization
In provided example in result model `Convolution` operations must have per-tensor quantization on input 0 (on activations).
### Update precisions
This option defines if each LPT transformation updates precision or not. The option value is boolean and is passed as `updatePrecisions` member of `LayerTransformation::Params` which is input argument for `LowPrecision` constructor. All transformations are affected. If `true` then low precision transformations update precisions to low precision and doesn't if `false`. Typically this option is used for plugin debugging.
### Typical customization use cases
Plugin specific customization can be implemented via nGraph transformation callbacks. For example: asymmetric quantization support can be easily customizable via `LayerTransformation::isAsymmetricQuantization` and `WeightableLayerTransformation::isAsymmetricOnWeights` methods usage in callbacks. For example:
@snippet snippets/lpt_mkldnn_plugin.cpp asymmetric_quantization

View File

@ -0,0 +1,56 @@
# Attributes {#openvino_docs_IE_DG_lpt_attributes}
@sphinxdirective
.. toctree::
:maxdepth: 1
:caption: Attributes
:hidden:
AvgPoolPrecisionPreserved <openvino_docs_IE_DG_lpt_AvgPoolPrecisionPreserved>
IntervalsAlignment <openvino_docs_IE_DG_lpt_IntervalsAlignment>
PerTensorQuantization <openvino_docs_IE_DG_lpt_PerTensorQuantization>
PrecisionPreserved <openvino_docs_IE_DG_lpt_PrecisionPreserved>
Precisions <openvino_docs_IE_DG_lpt_Precisions>
QuantizationAlignment <openvino_docs_IE_DG_lpt_QuantizationAlignment>
@endsphinxdirective
## Introduction
| Name | Target | Required | Mutable |
|-------------------------------------------------------------------------------------|------------------------|----------|---------|
| [AvgPoolPrecisionPreserved](@ref openvino_docs_IE_DG_lpt_AvgPoolPrecisionPreserved) | Precision | No | Yes |
| [IntervalsAlignment](@ref openvino_docs_IE_DG_lpt_IntervalsAlignment) | Quantization interval | Yes | Yes |
| [PerTensorQuantization](@ref openvino_docs_IE_DG_lpt_PerTensorQuantization) | Precision | Yes | No |
| [PrecisionPreserved](@ref openvino_docs_IE_DG_lpt_PrecisionPreserved) | Precision | Yes | Yes |
| [Precisions](@ref openvino_docs_IE_DG_lpt_Precisions) | Precision | Yes | Yes |
| [QuantizationAlignment](@ref openvino_docs_IE_DG_lpt_QuantizationAlignment) | Quantization alignment | Yes | Yes |
> `Target` attribute group defines attribute usage during model transformation for the best performance:
> - `Precision` - the attribute defines the most optimal output port precision.
> - `Quantization interval` - the attribute defines quantization interval.
> - `Quantization alignment` - the attribute defines quantization alignment: per-channel or per-tensor quantization.
>
> `Required` attribute group defines if attribute usage is required to get an optimal model during transformation:
> - `Yes` - the attribute is used by all OpenVINO plugins for low-precision optimization.
> - `No` - the attribute is used in a specific OpenVINO plugin.
>
> `Mutable` attribute group defines if transformation can update an existing attribute:
> - `Yes` - the attribute can be updated by the next transformations in the pipeline. But attribute update order is still important.
> - `No` - existing attribute can not be updated by the next transformation. Previous handled transformation has optimized a model according to the current value.
`FakeQuantize` decomposition is a mandatory part of low precision transformations. Attributes used during decomposition are mandatory. Optional attributes are required only for certain operations.
Attributes usage by transformations:
| Attribute name | Created by transformations | Used by transformations |
|---------------------------|---------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------|
| PrecisionPreserved | MarkupPrecisions, MarkupAvgPoolPrecisionPreserved | AlignQuantizationIntervals, AlignQuantizationParameters, FakeQuantizeDecompositionTransformation, MarkupAvgPoolPrecisionPreserved |
| AvgPoolPrecisionPreserved | MarkupAvgPoolPrecisionPreserved | |
| Precisions | MarkupCanBeQuantized, MarkupPrecisions | FakeQuantizeDecompositionTransformation |
| PerTensorQuantization | MarkupPerTensorQuantization | |
| IntervalsAlignment | AlignQuantizationIntervals | FakeQuantizeDecompositionTransformation |
| QuantizationAlignment | AlignQuantizationParameters | FakeQuantizeDecompositionTransformation |
> **Note:** the same type of attribute instances can be created in different transformations. This approach is the result of the transformation single-responsibility principle. For example, `Precision` attribute instances are created in `MarkupCanBeQuantized` and `MarkupPrecisions` transformations, but the reasons for their creation are different.

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3a79d152dae50fd3afaa78d8e18de7d279bb1c79b3e4d5c68fffed52a7c51b18
size 383875

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 61 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d54234622f538249dd5ccb5156cc10dd9b5bb40e800f6d1d906a0ff44ecabcf4
size 388893

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 62 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3132bad01388adf7f788592538194bceb6b94f76f1c3788ffb73b76b19a74990
size 393300

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 62 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4f5a98e0ae8dc1f21dd0458ad9ed61de68b134e1128279c3e8b4e700ff3648f8
size 398967

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 64 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2618a80fd1be4d25dfc1f7e57e046a7844c9933a6fed316a0660c3051325557e
size 474998

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 67 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3b7750b3424540912ec590aa5b56cba9e4f2f9db6d45c23aed1d78d094321230
size 488940

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 78 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7836c25a0db5a5f08adf5539fb5ee29f52bc7923148dc42f4c78d3354b7b8464
size 520539

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 77 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:911d9730e6762a9919fe3a48f0c87a44a5aeac97468f2d28c5174c13c69ad74b
size 351583

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 58 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:06caa4dc97b00f150395abc230bc90822f3bfa4e0bb3b65019f111a5a40e1d1c
size 520155

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 77 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8f19d8f068afa4aa62fc04cfa0d2678e6bfe3f90c164a08f588bff9685854030
size 661189

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 95 KiB

View File

@ -0,0 +1,6 @@
# Step 1. Prerequisites Transformations {#openvino_docs_IE_DG_lpt_step1_prerequisites}
Prerequisites transformations are optional. The transformations prepare a model before running other low precision transformations. The transformations do not operate with dequantization operations or update precisions. Prerequisites transformations include:
* [PullReshapeThroughDequantization](@ref openvino_docs_IE_DG_lpt_PullReshapeThroughDequantization)
* [PullTransposeThroughDequantization](@ref openvino_docs_IE_DG_lpt_PullTransposeThroughDequantization)
* [LinOpSequenceFusion](@ref openvino_docs_IE_DG_lpt_LinOpSequenceFusion)

View File

@ -0,0 +1,140 @@
# Step 2. Markup Transformations {#openvino_docs_IE_DG_lpt_step2_markup}
This step defines the optimal `FakeQuantize` decomposition precisions for the best inference performance via operations markup with runtime attribute instances. Attributes are created for input and output ports and operations. Transformations do not change the operation output port precisions. A model markup low precision logic is decomposed and implemented into the following common markup transformations. The order of transformations is important:
1. [MarkupCanBeQuantized](@ref openvino_docs_IE_DG_lpt_MarkupCanBeQuantized)
2. [MarkupPrecisions](@ref openvino_docs_IE_DG_lpt_MarkupPrecisions)
3. [MarkupPerTensorQuantization](@ref openvino_docs_IE_DG_lpt_MarkupPerTensorQuantization)
4. [MarkupAvgPoolPrecisionPreserved](@ref openvino_docs_IE_DG_lpt_MarkupAvgPoolPrecisionPreserved)
5. [PropagatePrecisions](@ref openvino_docs_IE_DG_lpt_PropagatePrecisions)
6. [AlignQuantizationIntervals](@ref openvino_docs_IE_DG_lpt_AlignQuantizationIntervals)
7. [AlignQuantizationParameters](@ref openvino_docs_IE_DG_lpt_AlignQuantizationParameters)
The table of transformations and used attributes:
| Transformation name | Create attributes | Use attributes |
|---------------------------------|-------------------------------|-------------------------------------------|
| MarkupCanBeQuantized | Precisions | |
| MarkupPrecisions | Precisions,PrecisionPreserved | |
| MarkupPerTensorQuantization | PerTensorQuantization | |
| MarkupAvgPoolPrecisionPreserved | AvgPoolPrecisionPreserved | Precisions, PrecisionPreserved |
| PropagatePrecisions | Precisions | Precisions, PrecisionPreserved |
| AlignQuantizationIntervals | IntervalsAlignment | PrecisionPreserved |
| AlignQuantizationParameters | QuantizationAlignment | PrecisionPreserved, PerTensorQuantization |
> **Note:** the same type of attribute instances can be created in different transformations. This approach is the result of the transformation single-responsibility principle. For example, `Precision` attribute instances are created in `MarkupCanBeQuantized` and `MarkupPrecisions` transformations, but the reasons for their creation are different
Common markup transformations can be decomposed into simpler utility markup transformations. The order of Markup utility transformations is not important:
* [CreateAttribute](@ref openvino_docs_IE_DG_lpt_CreateAttribute)
* [CreatePrecisionsDependentAttribute](@ref openvino_docs_IE_DG_lpt_CreatePrecisionsDependentAttribute)
* [PropagateThroughPrecisionPreserved](@ref openvino_docs_IE_DG_lpt_PropagateThroughPrecisionPreserved)
* [PropagateToInput](@ref openvino_docs_IE_DG_lpt_PropagateToInput)
* [UpdateSharedPrecisionPreserved](@ref openvino_docs_IE_DG_lpt_UpdateSharedPrecisionPreserved)
Let's explore all transformations and their relations in detail, using one and the same model:
![](img/step2_markup_original.png)
The original model key features:
* The first `concat1` concatenation operation has not quantized `convolution1` consumer.
* The second `concat2` concatenation operation has quantized `convolution2` consumer with requirements:
- support `unsigned int8` on activations,
- per-tensor quantization.
* Between the `concat2` concatenation operation and `Convolution` there is an `AvgPool` operation, which mathematically should return an `f32` tensor. But the `MarkupAvgPoolPrecisionPreserved` transformation is active. This allows the low precision transformation, that goes after the `AvgPool`, to propagate low precision tensor to the next consumer.
Transformations are run with the following parameters:
@snippet snippets/lpt_mkldnn_plugin.cpp lpt_markup_pipeline
## 1. MarkupCanBeQuantized
The transformation marks operations that cannot be quantized. No attributes are required before the transformation.
Changes in the example model after `MarkupCanBeQuantized` transformation:
* Not quantized `convolution1` operation is marked by the `Precisions` attribute with empty values. This attribute allows the next transformation to ignore not quantized operation.
Result model:
![MarkupCanBeQuantized](img/step2_markup1.png)
Model display features (here and below):
* The attributes added by the current transformation are marked in bold.
* If attributes do not fit into one line, then one line consists of only one attribute.
## 2. MarkupPrecisions
The transformation is required and includes two tasks:
1. Mark operation input ports (create `Precision` attribute instance) by provided restrictions: input port index and required precisions. Restrictions are provided as input argument in `ngraph::pass::low_precision::LowPrecision` constructor.
2. Mark precision preserved operations.
No attributes are required before the transformation. Changes in the example model after `MarkupPrecisions` transformation:
* Both concatenation operations are marked as precision preserved operations. It allows to propagate precision via these operations.
* Quantized `convolution2` operation is marked by the `Precisions` attribute with `u8` precision on activations and `i8` precisions on weights according to the provided restrictions. This attribute instance allows to specify which precisions are required for quantized `Convolution` operation.
Result model:
![MarkupPrecisions result](img/step2_markup2.png)
## 3. MarkupPerTensorQuantization
The transformation is required and marks operations (create `PerTensorQuantization` attribute instance) by provided restrictions: an operation that requires per-tensor quantization. No attributes are required before the transformation.
Changes in the example model after `MarkupPerTensorQuantization` transformation:
* both `Convolution` operations are marked by `PerTensorQuantization`
Result model:
![MarkupPerTensorQuantization result](img/step2_markup3.png)
## 4. MarkupAvgPoolPrecisionPreserved
The transformation is optional. `MarkupAvgPoolPrecisionPreserved` marks `AvgPool` operations as precision preserved or not precision preserved. `AvgPool` operation is precision preserved if next not precision preserved operation can be inferred in low precision. In other words, `AvgPool` operations become precision preserved operations to speed up model inference. The transformation uses `PrecisionPreserved` attributes created before. The transformation is combined and uses:
* CreatePrecisionsDependentAttribute
* PropagateThroughPrecisionPreserved
* UpdateSharedPrecisionPreserved
Changes in the example model after `MarkupAvgPoolPrecisionPreserved` transformation:
* `AvgPool` operations are marked by `PrecisionPreserved` and `AvgPoolPrecisionPreserved` (not used below).
Result model:
![MarkupAvgPoolPrecisionPreserved](img/step2_markup4.png)
## 5. PropagatePrecisions
The transformation is required. `PropagatePrecision` is a key transformation in the markup pipeline, which marks `FakeQuantize` output port precisions. The transformation uses `PrecisionPreserved` attribute instances created before. The transformation is combined and uses:
* CreateAttribute
* PropagateThroughPrecisionPreserved
* PropagateToInput
Changes in the example model after `PropagatePrecisions` transformation:
* All precision preserved operations are marked by the `Precisions` attribute instance, which defines the required precision for the operation.
* `FakeQuantize` operation output ports are marked by `Precisions` attribute instances, which define target precision for decomposition. In the sample model, `FakeQuantize` operations have signed intervals, but the `Precisions` attributes are initialized by `u8` (`unsigned int8`) values as the result applied during transformations restrictions for `Convolution` operations.
Result model:
![PropagatePrecisions](img/step2_markup5.png)
> **NOTE**: `AlignQuantizationIntervals` and `AlignQuantizationParameters` transformations are required if the model has quantized concatenation operations.
## 6. AlignQuantizationIntervals
The transformation is required for models with the quantized operation. The transformation marks `FakeQuantize` operation and precision preserved consumers to combine quantization information from different `FakeQuantize` operations for future quantization intervals alignment. The transformation is combined and uses:
* CreateAttribute
* PropagateThroughPrecisionPreserved
Changes in the example model after `AlignQuantizationIntervals` transformation:
* All `FakeQuantize` operations and their precision preserved consumers are marked by the `IntervalsAlignment` attribute instance.
Result model:
![AlignQuantizationIntervals](img/step2_markup6.png)
## 7. AlignQuantizationParameters
The transformation is required for models with quantized concatenation operation. The transformation marks `FakeQuantize` precision preserved consumers to align quantization intervals. The transformation is combined and uses:
* CreateAttribute
* PropagateThroughPrecisionPreserved
* UpdateSharedPrecisionPreserved
Changes in the example model after `AlignQuantizationParameters` transformation:
* All `FakeQuantize` precision preserved consumers are marked by `QuantizationAlignment` attribute instance. `convolution1` input ports are marked by `Precisions` attribute instances with empty precisions collection. As a result, the `convolution1` operation was detected as not quantized, and the `QuantizationAlignment` attribute default value `false` does not change. `convolution2` input ports are marked by `Precisions` attribute instances with not empty precisions collection. `convolution2` operation was detected as quantized with the `PerTensorQuantization` attribute, and the `QuantizationAlignment` attribute default value changed to `true`.
Final model:
![AlignQuantizationParameters](img/step2_markup7.png)

View File

@ -0,0 +1,49 @@
# Step 3. Main Transformations {#openvino_docs_IE_DG_lpt_step3_main}
Main transformations are the majority of low precision transformations. Transformations operate with dequantization operations. Main transformations include:
* [AddTransformation](@ref openvino_docs_IE_DG_lpt_AddTransformation)
* [AvgPoolTransformation](@ref openvino_docs_IE_DG_lpt_AvgPoolTransformation)
* [ClampTransformation](@ref openvino_docs_IE_DG_lpt_AvgPoolTransformation)
* [ConcatTransformation](@ref openvino_docs_IE_DG_lpt_ConcatTransformation)
* [ConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_ConvolutionTransformation)
* [ConvolutionBackpropDataTransformation](@ref openvino_docs_IE_DG_lpt_ConvolutionBackpropDataTransformation)
* [DepthToSpaceTransformation](@ref openvino_docs_IE_DG_lpt_DepthToSpaceTransformation)
* [FakeQuantizeDecompositionTransformation](@ref openvino_docs_IE_DG_lpt_FakeQuantizeDecompositionTransformation)
* [FakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FakeQuantizeTransformation)
* [InterpolateTransformation](@ref openvino_docs_IE_DG_lpt_InterpolateTransformation)
* [GroupConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_GroupConvolutionTransformation)
* [MatMulTransformation](@ref openvino_docs_IE_DG_lpt_MatMulTransformation)
* [MaxPoolTransformation](@ref openvino_docs_IE_DG_lpt_MaxPoolTransformation)
* [MultiplyTransformation](@ref openvino_docs_IE_DG_lpt_MultiplyTransformation)
* [MVNTransformation](@ref openvino_docs_IE_DG_lpt_MVNTransformation)
* [NormalizeL2Transformation](@ref openvino_docs_IE_DG_lpt_NormalizeL2Transformation)
* [PReluTransformation](@ref openvino_docs_IE_DG_lpt_PReluTransformation)
* [ReduceMaxTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMaxTransformation)
* [ReduceMeanTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMeanTransformation)
* [ReduceMinTransformation](@ref openvino_docs_IE_DG_lpt_ReduceMinTransformation)
* [ReduceSumTransformation](@ref openvino_docs_IE_DG_lpt_ReduceSumTransformation)
* [ReluTransformation](@ref openvino_docs_IE_DG_lpt_ReluTransformation)
* [ReshapeTransformation](@ref openvino_docs_IE_DG_lpt_ReshapeTransformation)
* [SqueezeTransformation](@ref openvino_docs_IE_DG_lpt_SqueezeTransformation)
* [ShuffleChannelsTransformation](@ref openvino_docs_IE_DG_lpt_ShuffleChannelsTransformation)
* [SplitTransformation](@ref openvino_docs_IE_DG_lpt_SplitTransformation)
* [StridedSliceTransformation](@ref openvino_docs_IE_DG_lpt_StridedSliceTransformation)
* [TransposeTransformation](@ref openvino_docs_IE_DG_lpt_TransposeTransformation)
* [UnsqueezeTransformation](@ref openvino_docs_IE_DG_lpt_UnsqueezeTransformation)
* [VariadicSplitTransformation](@ref openvino_docs_IE_DG_lpt_VariadicSplitTransformation)
Let's explore some main transformations on the example model. Original model:
![Original model](img/step3_original.png)
Result model after main transformations:
![Original model](img/step3_transformed.png)
Changes in the example model after main transformation:
* All `FakeQuantize` operations (`fakeQuantize1`, `fakeQuantize2` and `fakeQuantize3`) were decomposed:
- original `FakeQuantize` operations were replaced with new operations with other output intervals and output port precision,
- dequantization operations.
* Dequantization operations were moved via precision preserved (`concat1` and `concat2`) and quantized (`convolution2`) operations.
> **Note:** the left branch (branch #1) does not require per-tensor quantization. As a result, the `fakeQuantize1`output interval is [0, 255]. But quantized `convolution2` requires per-tensor quantization on the right branch (branch #2). Then all connected `FakeQuantize` interval operations (`fakeQuantize1` and `fakeQuantize2`) are aligned to have per-tensor quantization after the concatenation (`concat2`) operation.

View File

@ -0,0 +1,8 @@
# Step 4. Cleanup Transformations {#openvino_docs_IE_DG_lpt_step4_cleanup}
* [FoldConvertTransformation](@ref openvino_docs_IE_DG_lpt_FoldConvertTransformation)
* [FoldFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FoldFakeQuantizeTransformation)
* [FuseConvertTransformation](@ref openvino_docs_IE_DG_lpt_FuseConvertTransformation)
* [FuseMultiplyToFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FuseMultiplyToFakeQuantizeTransformation)
* [FuseSubtractToFakeQuantizeTransformation](@ref openvino_docs_IE_DG_lpt_FuseSubtractToFakeQuantizeTransformation)
* [MultiplyToGroupConvolutionTransformation](@ref openvino_docs_IE_DG_lpt_MultiplyToGroupConvolutionTransformation)

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:288dec05908449cc3fa5e07700fac5cbdff17bb4b4035a4ee83c44cbc6c22c70
size 59664

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8e345c0b2b5fe365ed298d40d3add4b06a8106096186f68dccb5131c01194e72
size 102546

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 23 KiB

View File

@ -0,0 +1,3 @@
# ConvertSubtractConstant transformation {#openvino_docs_IE_DG_lpt_ConvertSubtractConstant}
ngraph::pass::low_precision::ConvertSubtractConstant class represents the `ConvertSubtractConstant` transformation.

View File

@ -0,0 +1,5 @@
# LinOpSequenceFusion transformation {#openvino_docs_IE_DG_lpt_LinOpSequenceFusion}
ngraph::pass::LinOpSequenceFusion class represents the `LinOpSequenceFusion` transformation.
`LinOpSequenceFusion` is common nGraph transformation.

View File

@ -0,0 +1,3 @@
# PullReshapeThroughDequantization transformation {#openvino_docs_IE_DG_lpt_PullReshapeThroughDequantization}
ngraph::pass::low_precision::PullReshapeThroughDequantization class represents the `PullReshapeThroughDequantization` transformation.

View File

@ -0,0 +1,3 @@
# PullTransposeThroughDequantization transformation {#openvino_docs_IE_DG_lpt_PullTransposeThroughDequantization}
ngraph::pass::low_precision::PullTransposeThroughDequantization class represents the `PullTransposeThroughDequantization` transformation.

View File

@ -0,0 +1,3 @@
# AlignQuantizationIntervals transformation {#openvino_docs_IE_DG_lpt_AlignQuantizationIntervals}
ngraph::pass::low_precision::AlignQuantizationIntervals class represents the `AlignQuantizationIntervals` transformation.

View File

@ -0,0 +1,3 @@
# AlignQuantizationParameters transformation {#openvino_docs_IE_DG_lpt_AlignQuantizationParameters}
ngraph::pass::low_precision::AlignQuantizationParameters class represents the `AlignQuantizationParameters` transformation.

View File

@ -0,0 +1,3 @@
# CreateAttribute transformation {#openvino_docs_IE_DG_lpt_CreateAttribute}
ngraph::pass::low_precision::CreateAttribute class represents the `CreateAttribute` transformation.

View File

@ -0,0 +1,3 @@
# CreatePrecisionsDependentAttribute transformation {#openvino_docs_IE_DG_lpt_CreatePrecisionsDependentAttribute}
ngraph::pass::low_precision::CreatePrecisionsDependentAttribute class represents the `CreatePrecisionsDependentAttribute` transformation.

View File

@ -0,0 +1,3 @@
# MarkupAvgPoolPrecisionPreserved transformation {#openvino_docs_IE_DG_lpt_MarkupAvgPoolPrecisionPreserved}
ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved class represents the `MarkupAvgPoolPrecisionPreserved` transformation.

View File

@ -0,0 +1,3 @@
# MarkupCanBeQuantized transformation {#openvino_docs_IE_DG_lpt_MarkupCanBeQuantized}
ngraph::pass::low_precision::MarkupCanBeQuantized class represents the `MarkupCanBeQuantized` transformation.

View File

@ -0,0 +1,3 @@
# MarkupPerTensorQuantization transformation {#openvino_docs_IE_DG_lpt_MarkupPerTensorQuantization}
ngraph::pass::low_precision::MarkupPerTensorQuantization class represents the `MarkupPerTensorQuantization` transformation.

View File

@ -0,0 +1,3 @@
# MarkupPrecisions transformation {#openvino_docs_IE_DG_lpt_MarkupPrecisions}
ngraph::pass::low_precision::MarkupPrecisions class represents the `MarkupPrecisions` transformation.

View File

@ -0,0 +1,3 @@
# PropagatePrecisions transformation {#openvino_docs_IE_DG_lpt_PropagatePrecisions}
ngraph::pass::low_precision::PropagatePrecisions class represents the `PropagatePrecisions` transformation.

View File

@ -0,0 +1,3 @@
# PropagateSharedValue transformation {#openvino_docs_IE_DG_lpt_PropagateSharedValue}
ngraph::pass::low_precision::PropagateSharedValue class represents the `PropagateSharedValue` transformation.

View File

@ -0,0 +1,3 @@
# PropagateThroughPrecisionPreserved transformation {#openvino_docs_IE_DG_lpt_PropagateThroughPrecisionPreserved}
ngraph::pass::low_precision::PropagateThroughPrecisionPreserved class represents the `PropagateThroughPrecisionPreserved` transformation.

View File

@ -0,0 +1,3 @@
# PropagateToInput transformation {#openvino_docs_IE_DG_lpt_PropagateToInput}
ngraph::pass::low_precision::PropagateToInput class represents the `PropagateToInput` transformation.

View File

@ -0,0 +1,3 @@
# UpdateSharedPrecisionPreserved transformation {#openvino_docs_IE_DG_lpt_UpdateSharedPrecisionPreserved}
ngraph::pass::low_precision::UpdateSharedPrecisionPreserved class represents the `UpdateSharedPrecisionPreserved` transformation.

View File

@ -0,0 +1,3 @@
# ClampTransformation transformation {#openvino_docs_IE_DG_lpt_ClampTransformation}
ngraph::pass::low_precision::ClampTransformation class represents the `Clamp` operation transformation.

View File

@ -0,0 +1,3 @@
# PReluTransformation transformation {#openvino_docs_IE_DG_lpt_PReluTransformation}
ngraph::pass::low_precision::PReluTransformation class represents the `PRelu` operation transformation.

View File

@ -0,0 +1,3 @@
# ReluTransformation transformation {#openvino_docs_IE_DG_lpt_ReluTransformation}
ngraph::pass::low_precision::ReluTransformation class represents the `Relu` operation transformation.

View File

@ -0,0 +1,57 @@
# AddTransformation transformation {#openvino_docs_IE_DG_lpt_AddTransformation}
ngraph::pass::low_precision::AddTransformation class represents the `Add` operation transformation.
The transformation propagates dequantization subtraction from one input branch to another and propagates dequantization multiplication from the same branch through `Add` operation. In transformation result, one `Add` operation input branch is in low precision without dequantization operations (empty branch), another input branch is in original precision with updated dequantization operations (full branch).
Criteria for selecting an empty branch in order of priority:
*Step 1.* If one branch is quantized only, then the quantized branch is an empty branch.
*Step 2.* If only one branch has `FakeQuantize` before dequantization operations, then another branch is an empty branch.
*Step 3.* If some `FakeQuantize` has more than one consumer and another has only one, then the branch with `FakeQuantize` with several consumers is an empty branch.
*Step 4.* Constant branch is in original precision, data branch is an empty branch. In this case, dequantization operations are propagated to a constant branch and will be fused in one constant.
*Step 5.* If both branches have operations from the following list before `FakeQuantize`: `Convolution`, `GroupConvolution`, and `MatMul`, or do not have any operations from the list, then the branch with larger shape volume is empty.
*Step 6.* If the operation before `FakeQuantize` has several consumers in any branch, then the branch is empty.
If dequantization operations on the full branch have a `FakeQuantize` operation parent, then they will be fused with `FakeQuantize` during another low precision transformation. If a `FakeQuantize` operation has a parent operation from the list: `Convolution`, `GroupConvolution`, and `MatMul`, then during inference the `FakeQuantize` can be inferred in one plugin kernel with the parent operation.
Depending on the plugin instruction set, low precision inference for the `Add` operation can be implemented in two logical steps in one plugin kernel:
* Inference step #1: Operations in the full branch, for example, `Convolution` and `FakeQuantize` with fused dequantization operations, and `Add` can be inferred in the original precision.
* Inference step #2: Inference step #1 result can be added with the empty branch tensor in low precision.
This approach allows to infer the `Add` operation in the optimal way.
## Subgraph before transformation
The subgraph with quantized `Add` operation before transformation:
\f[
y_{ch,i}=(scale1_{ch} * (x1_{ch,i} - shift1_{ch})) + (scale2_{ch} * (x2_{ch,i} - shift2_{ch}))
\f]
![Add before](img/add.common.png)
## Subgraph after transformation
The subgraph with the `Add` operation after the transformation:
\f[
y_{ch,i}=scale2_{ch} * (scale1_{ch}' * (x1_{ch,i} - shift1_{ch}') + x2_{ch,i})
\f]
where:
\f[
scale1_{ch}' = scale1_{ch} / scale2_{ch}
\f]
\f[
shift1_{ch}' = shift1_{ch} + scale2_{ch} * shift2_{ch} / scale1_{ch}
\f]
![Add before](img/add.transformed.png)

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d8d3621c4be5d3382cb164a19676253412f85b5f47fac27b024c726f1571647e
size 380663

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 54 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ff2d26dc0b86f339458a2fafbbd6a88daf3d3dc6fcefb636243f42a6e91bc328
size 492066

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 51 KiB

View File

@ -0,0 +1,3 @@
# MultiplyTransformation transformation {#openvino_docs_IE_DG_lpt_MultiplyTransformation}
ngraph::pass::low_precision::MultiplyTransformation class represents the `Multiply` operation transformation.

View File

@ -0,0 +1,3 @@
# SubtractTransformation transformation {#openvino_docs_IE_DG_lpt_SubtractTransformation}
ngraph::pass::low_precision::SubtractTransformation class represents the `Subtract` operation transformation.

View File

@ -0,0 +1,34 @@
# ConvolutionTransformation transformation {#openvino_docs_IE_DG_lpt_ConvolutionTransformation}
ngraph::pass::low_precision::ConvolutionTransformation class represents the `Convolution` operation transformation.
The transformation propagates dequantization operations on activations and weights through the `Convolution` operation. The transformation supports several weights quantization approaches:
* quantized weights in low precision with dequantization operations,
* weights in original precision with `FakeQuantize` operation.
Result dequantization `Multiply` constant value *result* is calculated as multiplication for dequantization `Multiply` constant value on activations *a* and dequantization `Multiply` constant value on weights *b* :
\f[
result_{i} = a_{i} \cdot b_{i}
\f]
## Limitations
* Dequantization on activations must be per-tensor. It means that dequantization `Multiply` constant value on activations must be scalar.
## Subgraph before transformation
### Quantized weights in low precision with dequantization operations
The subgraph with quantized `Convolution` before transformation with quantized weights in low precision constant and dequantization operations:
![Convolution before](img/fq_and_convolution.common.png)
### Weights in original precision with FakeQuantize operation
The subgraph with quantized `Convolution` before transformation with weights in original precision and `FakeQuantize` operation:
![Convolution before](img/fq_fq_and_convolution.common.png)
## Subgraph after transformation
The subgraph with `Convolution` operation after the transformation:
![Convolution after](img/fq_and_convolution.transformed.png)

View File

@ -0,0 +1,3 @@
# ConvolutionBackpropDataTransformation transformation {#openvino_docs_IE_DG_lpt_ConvolutionBackpropDataTransformation}
ngraph::pass::low_precision::ConvolutionBackpropDataTransformation class represents the `ConvolutionBackpropData` operation transformation.

View File

@ -0,0 +1,3 @@
# GroupConvolutionTransformation transformation {#openvino_docs_IE_DG_lpt_GroupConvolutionTransformation}
ngraph::pass::low_precision::GroupConvolutionTransformation class represents the `GroupConvolution` operation transformation.

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9e5bfd5ca52ea6660e0ff67afefc98d64941eab6e8b464116242a6e044f318f5
size 207602

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 26 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:756c225ee8e1da046e0210bf0696185b3939378f10b4ed6d757e43070d379436
size 135804

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:08d4116490ab329636fced24c292636fbe00856976b19e5219e433bc2c6e4e16
size 190590

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 28 KiB

View File

@ -0,0 +1,3 @@
# InterpolateTransformation transformation {#openvino_docs_IE_DG_lpt_InterpolateTransformation}
ngraph::pass::low_precision::InterpolateTransformation class represents the `Interpolate` operation transformation.

View File

@ -0,0 +1,3 @@
# MatMulTransformation transformation {#openvino_docs_IE_DG_lpt_MatMulTransformation}
ngraph::pass::low_precision::MatMulTransformation class represents the `MatMul` operation transformation.

View File

@ -0,0 +1,3 @@
# ConcatTransformation transformation {#openvino_docs_IE_DG_lpt_ConcatTransformation}
ngraph::pass::low_precision::ConcatTransformation class represents the `Concat` operation transformation.

View File

@ -0,0 +1,3 @@
# DepthToSpaceTransformation transformation {#openvino_docs_IE_DG_lpt_DepthToSpaceTransformation}
ngraph::pass::low_precision::DepthToSpaceTransformation class represents the `DepthToSpace` operation transformation.

View File

@ -0,0 +1,3 @@
# PadTransformation transformation {#openvino_docs_IE_DG_lpt_PadTransformation}
ngraph::pass::low_precision::PadTransformation class represents the `Pad` operation transformation.

View File

@ -0,0 +1,3 @@
# ShuffleChannelsTransformation transformation {#openvino_docs_IE_DG_lpt_ShuffleChannelsTransformation}
ngraph::pass::low_precision::ShuffleChannelsTransformation class represents the `ShuffleChannels` operation transformation.

View File

@ -0,0 +1,3 @@
# SplitTransformation transformation {#openvino_docs_IE_DG_lpt_SplitTransformation}
ngraph::pass::low_precision::SplitTransformation class represents the `Split` operation transformation.

View File

@ -0,0 +1,3 @@
# StridedSliceTransformation transformation {#openvino_docs_IE_DG_lpt_StridedSliceTransformation}
ngraph::pass::low_precision::StridedSliceTransformation class represents the `StridedSlice` operation transformation.

Some files were not shown because too many files have changed in this diff Show More