Merge remote-tracking branch 'upstream/master'

2021-05-18 08:46:00 +09:00 · 2021-05-18 08:46:00 +09:00 · e07337d533
commit e07337d533
parent d4b251678e e41e25533d
723 changed files with 18992 additions and 13364 deletions
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@ -125,6 +125,7 @@ jobs:
    displayName: 'nGraph UT'
    continueOnError: false
    # python3 $(WORK_DIR)/gtest-parallel/gtest_parallel.py $(BIN_DIR)/InferenceEngineUnitTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=InferenceEngineUnitTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
  - script: $(BIN_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
    displayName: 'IE UT old'
    continueOnError: false
@ -161,14 +162,6 @@ jobs:
    displayName: 'CPU FuncTests'
    continueOnError: false
  - script: |
      export DATA_PATH=$(MODELS_PATH)
      export MODELS_PATH=$(MODELS_PATH)
      python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
    workingDirectory: $(WORK_DIR)
    displayName: 'MklDnnFunctionalTests'
    continueOnError: false
  - script: |
      export DATA_PATH=$(MODELS_PATH)
      export MODELS_PATH=$(MODELS_PATH)
--- a/.ci/azure/mac.yml
+++ b/.ci/azure/mac.yml
@ -105,7 +105,7 @@ jobs:
    workingDirectory: $(BUILD_DIR)
    displayName: 'Install'
-  - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml
+  - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru*:IE_CPU.exp_* --gtest_output=xml:TEST-NGraphUT.xml
    displayName: 'nGraph UT'
    continueOnError: false
@ -137,14 +137,6 @@ jobs:
    displayName: 'CPU FuncTests'
    continueOnError: false
  - script: |
      export DATA_PATH=$(MODELS_PATH)
      export MODELS_PATH=$(MODELS_PATH)
      python3 $(WORK_DIR)/gtest-parallel/gtest-parallel $(BIN_DIR)/MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke*:-smoke_MobileNet/ModelTransformationsTest.LPT/mobilenet_v2_tf_depthwise_batch1_inPluginDisabled_inTestDisabled_asymmetric* -- --gtest_print_time=1
    workingDirectory: $(WORK_DIR)
    displayName: 'MklDnnFunctionalTests'
    continueOnError: false
  - script: |
      export DATA_PATH=$(MODELS_PATH)
      export MODELS_PATH=$(MODELS_PATH)
--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@ -167,17 +167,6 @@ jobs:
    displayName: 'CPU FuncTests - IB'
    continueOnError: false
    # Add for gtest-parallel, it hangs now (CVS-33386)
    #python $(WORK_DIR)\gtest-parallel\gtest-parallel $(BIN_DIR)\MklDnnFunctionalTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=MklDnnFunctionalTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
  - script: |
      set PATH=$(TEST_ENV_PATH)
      set DATA_PATH=$(MODELS_PATH)
      set MODELS_PATH=$(MODELS_PATH)
      rem "$(IB_TESTCONSOLE)" $(BIN_DIR)\MklDnnFunctionalTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-MklDnnFunctionalTests-IB.xml
      $(BIN_DIR)\MklDnnFunctionalTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-MklDnnFunctionalTests.xml
    displayName: 'MklDnnFunctionalTests'
    continueOnError: false
  - script: |
      set PATH=$(TEST_ENV_PATH)
      set DATA_PATH=$(MODELS_PATH)
--- a/docs/IE_DG/supported_plugins/CL_DNN.md
+++ b/docs/IE_DG/supported_plugins/CL_DNN.md
@ -104,6 +104,7 @@ When specifying key values as raw strings (that is, when using Python API), omit
 | Parameter Name          | Parameter Values                | Default         | Description                                               |
 |---------------------|-----------------------------|-----------------|-----------------------------------------------------------|
 | `KEY_CACHE_DIR`      | `"<cache_dir>"`                    | `""`              | Specifies a directory where compiled OCL binaries can be cached. First model loading generates the cache, and all subsequent LoadNetwork calls use precompiled kernels which significantly improves load time. If empty - caching is disabled             |
 | `KEY_PERF_COUNT`      | `YES` / `NO`                    | `NO`              | Collect performance counters during inference             |
 | `KEY_CONFIG_FILE`     | `"<file1> [<file2> ...]"`         | `""`              | Load custom layer configuration files                     |
 | `KEY_DUMP_KERNELS`    | `YES` / `NO`                    | `NO`              | Dump the final kernels used for custom layers             |
@ -115,7 +116,7 @@ When specifying key values as raw strings (that is, when using Python API), omit
 | `KEY_CLDNN_SOURCES_DUMPS_DIR` | `"<dump_dir>"`                       | `""`               | Final optimized clDNN OpenCL sources dump output directory                                   |
 | `KEY_GPU_THROUGHPUT_STREAMS`  | `KEY_GPU_THROUGHPUT_AUTO`, or positive integer| 1 | Specifies a number of GPU "execution" streams for the throughput mode (upper bound for a number of inference requests that can be executed simultaneously).<br>This option is can be used to decrease GPU stall time by providing more effective load from several streams. Increasing the number of streams usually is more effective for smaller topologies or smaller input sizes. Note that your application should provide enough parallel slack (e.g. running many inference requests) to leverage full GPU bandwidth. Additional streams consume several times more GPU memory, so make sure the system has enough memory available to suit parallel stream execution. Multiple streams might also put additional load on CPU. If CPU load increases, it can be regulated by setting an appropriate `KEY_CLDNN_PLUGIN_THROTTLE` option value (see above). If your target system has relatively weak CPU, keep throttling low. <br>The default value is 1, which implies latency-oriented behavior.<br>`KEY_GPU_THROUGHPUT_AUTO` creates bare minimum of streams to improve the performance; this is the most portable option if you are not sure how many resources your target machine has (and what would be the optimal number of streams). <br> A positive integer value creates the requested number of streams. |
 | `KEY_EXCLUSIVE_ASYNC_REQUESTS` | `YES` / `NO`                | `NO`              | Forces async requests (also from different executable networks) to execute serially.|
-
+| `KEY_CLDNN_MAX_NUM_THREADS` | `integer value` | `maximum # of HW threads available in host environment` |  Specifies the number of CPU threads that can be used for clDNN engine, e.g, JIT compilation of clDNN kernels or clDNN cpu kernel processing. The default value is set as the number of maximum available threads in host environment to minimize the time for LoadNetwork, where the clDNN kernel build time occupies a large portion. Note that if the specified value is larger than the maximum available # of threads or less than zero, it is set as maximum available # of threads. It can be specified with a smaller number than the available HW threads according to the usage scenario, e.g., when the user wants to assign more CPU threads while clDNN plugin is running. Note that setting this value with lower number will affect not only the network loading time but also the cpu layers of clDNN networks that are optimized with multi-threading. |
 ## Note on Debug Capabilities of the GPU Plugin
 Inference Engine GPU plugin provides possibility to dump the user custom OpenCL&trade; kernels to a file to allow you to properly debug compilation issues in your custom kernels.
--- a/docs/IE_PLUGIN_DG/QuantizedNetworks.md
+++ b/docs/IE_PLUGIN_DG/QuantizedNetworks.md
@ -35,7 +35,7 @@ Thus we can define:
 **Note**: During the quantization process the values `input_low`, `input_high`, `output_low`, `output_high` are selected so that to map a floating-point zero exactly to an integer value (zero-point) and vice versa.
 ## Quantization specifics and restrictions
-In general, OpenVINO can represent and execute quantized models from different sources. However, the Post-training Optimization Toolkit (POT)
+In general, OpenVINO can represent and execute quantized models from different sources. However, the Post-training Optimization Tool (POT)
 is considered the default way to get optimized models. Since the POT supports HW-aware quantization it means that specific rules can be implemented in it for 
 the particular HW. However, it is reasonable to have compatibility with general-purpose HW such as CPU and GPU and support their quantization schemes.
 Below we define these rules as follows:
--- a/docs/MO_DG/prepare_model/Config_Model_Optimizer.md
+++ b/docs/MO_DG/prepare_model/Config_Model_Optimizer.md
@ -122,10 +122,12 @@ virtualenv -p /usr/bin/python3.6 .env3 --system-site-packages
 virtualenv -p /usr/bin/python3.6 .env3/bin/activate
 ```
 3.  Install all dependencies or only the dependencies for a specific framework:
-    *   To install dependencies for all frameworks except TensorFlow* 2.x:
+    *   To install dependencies for all frameworks except TensorFlow* 1.x:
 ```shell
 pip3 install -r requirements.txt
 ```
 > **NOTE**: TensorFlow 1.x and 2.x are incompatible. Use separate virtual environments if you want to install multiple TensorFlow versions.
    *   To install dependencies only for Caffe:
 ```shell
 pip3 install -r requirements_caffe.txt
--- a/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
+++ b/docs/MO_DG/prepare_model/convert_model/pytorch_specific/Convert_RNNT.md
@ -0,0 +1,107 @@
 # Convert PyTorch\* RNN-T Model to the Intermediate Representation (IR) {#openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT}
 This instruction covers conversion of RNN-T model from [MLCommons](https://github.com/mlcommons) repository. Follow 
 the steps below to export a PyTorch* model into ONNX* before converting it to IR:
 **Step 1**. Clone RNN-T PyTorch implementation from MLCommons repository (revision r1.0). Make a shallow clone to pull 
 only RNN-T model without full repository. If you already have a full repository, skip this and go to **Step 2**:
 ```bash
 git clone -b r1.0 -n https://github.com/mlcommons/inference rnnt_for_openvino --depth 1
 cd rnnt_for_openvino
 git checkout HEAD speech_recognition/rnnt 
 ```
 **Step 2**. If you already have a full clone of MLCommons inference repository, create a folder for 
 pretrained PyTorch model, where conversion into IR will take place. You will also need to specify the path to 
 your full clone at **Step 5**. Skip this step if you have a shallow clone.
 ```bash
 mkdir rnnt_for_openvino 
 cd rnnt_for_openvino
 ```
 **Step 3**. Download pretrained weights for PyTorch implementation from https://zenodo.org/record/3662521#.YG21DugzZaQ.
 For UNIX*-like systems you can use wget:
 ```bash
 wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt
 ```
 The link was taken from `setup.sh` in the `speech_recoginitin/rnnt` subfolder. You will get exactly the same weights as 
 if you were following the steps from https://github.com/mlcommons/inference/tree/master/speech_recognition/rnnt.
 **Step 4**. Install required python* packages:
 ```bash
 pip3 install torch toml
 ```
 **Step 5**. Export RNN-T model into ONNX with the script below. Copy the code below into a file named 
 `export_rnnt_to_onnx.py` and run it in the current directory `rnnt_for_openvino`:
 > **NOTE**: If you already have a full clone of MLCommons inference repository, you need to
 > specify `mlcommons_inference_path` variable.
 ```python
 import toml
 import torch
 import sys
 def load_and_migrate_checkpoint(ckpt_path):
    checkpoint = torch.load(ckpt_path, map_location="cpu")
    migrated_state_dict = {}
    for key, value in checkpoint['state_dict'].items():
        key = key.replace("joint_net", "joint.net")
        migrated_state_dict[key] = value
    del migrated_state_dict["audio_preprocessor.featurizer.fb"]
    del migrated_state_dict["audio_preprocessor.featurizer.window"]
    return migrated_state_dict
 mlcommons_inference_path = './'  # specify relative path for MLCommons inferene
 checkpoint_path = 'DistributedDataParallel_1576581068.9962234-epoch-100.pt'
 config_toml = 'speech_recognition/rnnt/pytorch/configs/rnnt.toml'
 config = toml.load(config_toml)
 rnnt_vocab = config['labels']['labels']
 sys.path.insert(0, mlcommons_inference_path + 'speech_recognition/rnnt/pytorch')
 from model_separable_rnnt import RNNT
 model = RNNT(config['rnnt'], len(rnnt_vocab) + 1, feature_config=config['input_eval'])
 model.load_state_dict(load_and_migrate_checkpoint(checkpoint_path))
 seq_length, batch_size, feature_length = 157, 1, 240
 inp = torch.randn([seq_length, batch_size, feature_length])
 feature_length = torch.LongTensor([seq_length])
 x_padded, x_lens = model.encoder(inp, feature_length)
 torch.onnx.export(model.encoder, (inp, feature_length), "rnnt_encoder.onnx", opset_version=12,
                  input_names=['input.1', '1'], dynamic_axes={'input.1': {0: 'seq_len', 1: 'batch'}})
 symbol = torch.LongTensor([[20]])
 hidden = torch.randn([2, batch_size, 320]), torch.randn([2, batch_size, 320])
 g, hidden = model.prediction.forward(symbol, hidden)
 torch.onnx.export(model.prediction, (symbol, hidden), "rnnt_prediction.onnx", opset_version=12,
                  input_names=['input.1', '1', '2'],
                  dynamic_axes={'input.1': {0: 'batch'}, '1': {1: 'batch'}, '2': {1: 'batch'}})
 f = torch.randn([batch_size, 1, 1024])
 model.joint.forward(f, g)
 torch.onnx.export(model.joint, (f, g), "rnnt_joint.onnx", opset_version=12,
                  input_names=['0', '1'], dynamic_axes={'0': {0: 'batch'}, '1': {0: 'batch'}})
 ```
 ```bash
 python3 export_rnnt_to_onnx.py
 ```
 After completing this step, the files rnnt_encoder.onnx, rnnt_prediction.onnx, and rnnt_joint.onnx will be saved in 
 the current directory. 
 **Step 6**. Run the conversion command:
 ```bash
 python3 {path_to_openvino}/mo.py --input_model rnnt_encoder.onnx --input "input.1[157 1 240],1->157"
 python3 {path_to_openvino}/mo.py --input_model rnnt_prediction.onnx --input "input.1[1 1],1[2 1 320],2[2 1 320]"
 python3 {path_to_openvino}/mo.py --input_model rnnt_joint.onnx --input "0[1 1 1024],1[1 1 320]"
 ```
 Please note that hardcoded value for sequence length = 157 was taken from the MLCommons, but conversion to IR preserves 
 network [reshapeability](../../../../IE_DG/ShapeInference.md); this means you can change input shapes manually to any value either during conversion or 
 inference. 
--- a/docs/doxygen/ie_docs.xml
+++ b/docs/doxygen/ie_docs.xml
@ -56,6 +56,7 @@ limitations under the License.
                            <tab type="user" title="Convert DLRM ONNX* Model to the Intermediate Representation" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_onnx_specific_Convert_DLRM"/>
                            <tab type="usergroup" title="Converting Your PyTorch* Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_PyTorch">
                                <tab type="user" title="Convert PyTorch* QuartzNet Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_QuartzNet"/>
                                <tab type="user" title="Convert PyTorch* RNN-T Model " url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_RNNT"/>
                                <tab type="user" title="Convert PyTorch* YOLACT Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_YOLACT"/>
                                <tab type="user" title="Convert PyTorch* F3Net Model" url="@ref openvino_docs_MO_DG_prepare_model_convert_model_pytorch_specific_Convert_F3Net"/>
                            </tab>
--- a/docs/get_started/get_started_dl_workbench.md
+++ b/docs/get_started/get_started_dl_workbench.md
@ -13,7 +13,7 @@ OpenVINO™ toolkit components:
 with pre-trained models for a range of different tasks
 * [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) to transform models into
 the Intermediate Representation (IR) format
-* [Post-Training Optimization toolkit](@ref pot_README) to calibrate a model and then execute it in the
+* [Post-training Optimization Tool](@ref pot_README) to calibrate a model and then execute it in the
 INT8 precision
 * [Accuracy Checker](@ref omz_tools_accuracy_checker) to determine the accuracy of a model
 * [Benchmark Tool](@ref openvino_inference_engine_samples_benchmark_app_README) to estimate inference performance on supported devices
--- a/docs/index.md
+++ b/docs/index.md
@ -29,11 +29,14 @@ If your neural network model contains layers that are not in the list of known l
 Run the [Accuracy Checker utility](@ref omz_tools_accuracy_checker) either against source topologies or against the output representation to evaluate the accuracy of inference. The Accuracy Checker is also part of the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction), an integrated web-based performance analysis studio.
 Use the [Post-training Optimization Tool](@ref pot_README) to accelerate the inference of a deep learning model by quantizing it to INT8.
 Useful documents for model optimization:
 * [Model Optimizer Developer Guide](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
 * [Intermediate Representation and Opsets](MO_DG/IR_and_opsets.md)
 * [Custom Layers Guide](HOWTO/Custom_Layers_Guide.md)
 * [Accuracy Checker utility](@ref omz_tools_accuracy_checker)
 * [Post-training Optimization Tool](@ref pot_README)
 * [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction)
 * [Model Downloader](@ref omz_tools_downloader) utility
 * [Intel's Pretrained Models (Open Model Zoo)](@ref omz_models_group_intel)
@ -42,7 +45,7 @@ Useful documents for model optimization:
 ### Running and Tuning Inference
 The other core component of OpenVINO™ is the [Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md), which manages the loading and compiling of the optimized neural network model, runs inference operations on input data, and outputs the results. Inference Engine can execute synchronously or asynchronously, and its plugin architecture manages the appropriate compilations for execution on multiple Intel® devices, including both workhorse CPUs and specialized graphics and video processing platforms (see below, Packaging and Deployment).
-You can use OpenVINO™ Tuning Utilities with the Inference Engine to trial and test inference on your model. The Benchmark utility uses an input model to run iterative tests for throughput or latency measures, and the [Cross Check Utility](../inference-engine/tools/cross_check_tool/README.md) compares performance of differently configured inferences. The [Post-Training Optimization Tool](@ref pot_README) integrates a suite of quantization- and calibration-based tools to further streamline performance.
+You can use OpenVINO™ Tuning Utilities with the Inference Engine to trial and test inference on your model. The Benchmark utility uses an input model to run iterative tests for throughput or latency measures, and the [Cross Check Utility](../inference-engine/tools/cross_check_tool/README.md) compares performance of differently configured inferences. 
 For a full browser-based studio integrating these other key tuning utilities, try the [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction).
 ![](img/OV-diagram-step3.png)
@ -56,7 +59,7 @@ Useful documents for inference tuning:
 * [Inference Engine API References](./api_references.html)
 * [Inference Code Samples](IE_DG/Samples_Overview.md)
 * [Application Demos](@ref omz_demos)
-* [Post-Training Optimization Tool Guide](@ref pot_README)
+* [Low Precision Optimization Guide] (@ref pot_docs_LowPrecisionOptimizationGuide)
 * [Deep Learning Workbench Guide](@ref workbench_docs_Workbench_DG_Introduction)
 * [Intel Media SDK](https://github.com/Intel-Media-SDK/MediaSDK)
 * [DL Streamer Samples](@ref gst_samples_README)
@ -86,7 +89,7 @@ Intel® Distribution of OpenVINO™ toolkit includes the following components:
 - [Deep Learning Inference Engine](IE_DG/Deep_Learning_Inference_Engine_DevGuide.md): A unified API to allow high performance inference on many hardware types including Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, Intel® Vision Accelerator Design with Intel® Movidius™ vision processing unit (VPU).
 - [Inference Engine Samples](IE_DG/Samples_Overview.md): A set of simple console applications demonstrating how to use the Inference Engine in your applications.
 - [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction): A web-based graphical environment that allows you to easily use various sophisticated OpenVINO™ toolkit components.
- [Post-Training Optimization tool](@ref pot_README): A tool to calibrate a model and then execute it in the INT8 precision.
+- [Post-training Optimization Tool](@ref pot_README): A tool to calibrate a model and then execute it in the INT8 precision.
 - Additional Tools: A set of tools to work with your models including [Benchmark App](../inference-engine/tools/benchmark_tool/README.md), [Cross Check Tool](../inference-engine/tools/cross_check_tool/README.md), [Compile tool](../inference-engine/tools/compile_tool/README.md).
 - [Open Model Zoo](@ref omz_models_group_intel)     
    - [Demos](@ref omz_demos): Console applications that provide robust application templates to help you implement specific deep learning scenarios.
--- a/docs/ops/convolution/ConvolutionBackpropData_1.md
+++ b/docs/ops/convolution/ConvolutionBackpropData_1.md
@ -4,15 +4,15 @@
 **Category**: Convolution
-**Short description**: Computes the gradients of a Convolution operation with respect to the input. Also known as a Deconvolution or a Transposed Convolution.
+**Short description**: Computes 1D, 2D or 3D *ConvolutionBackpropData* operation with respect to the input and kernel tensors. Also known as a Transposed Convolution.
 **Detailed description**:
-ConvolutionBackpropData takes the input tensor, weights tensor and output shape and computes the output tensor of a given shape. The shape of the output can be specified as an input 1D integer tensor explicitly or determined by other attributes implicitly. If output shape is specified as an explicit input, shape of the output exactly matches the specified size and required amount of padding is computed.
+ConvolutionBackpropData takes the input tensor, weights tensor and output shape and computes the output tensor of a given shape. The shape of the output can be specified as an input 1D integer tensor explicitly or determined by other attributes implicitly. If output shape is specified as an explicit input, shape of the output exactly matches the specified size and required amount of padding is computed. More thorough explanation can be found in [Transposed Convolutions](https://arxiv.org/abs/1603.07285).
-ConvolutionBackpropData accepts the same set of attributes as a regular Convolution operation, but they are interpreted in a "backward way", so they are applied to the output of ConvolutionBackpropData, but not to the input. Refer to a regular Convolution operation for detailed description of each attribute.
+ConvolutionBackpropData accepts the same set of attributes as a regular Convolution operation and additionally `output_padding` attribute, but they are interpreted in a "backward way", so they are applied to the output of ConvolutionBackpropData, but not to the input. Refer to a regular [Convolution](Convolution_1.md) operation for detailed description of each Convolution attribute.
-Output shape when specified as an input `output_shape`, specifies only spatial dimensions. No batch or channel dimension should be passed along with H, W or other spatial dimensions. If `output_shape` is omitted, then `pads_begin`, `pads_end` or `auto_pad` are used to determine output spatial shape `[Y_1, Y_2, ..., Y_D]` by input spatial shape `[X_1, X_2, ..., X_D]` in the following way:
+When output shape is specified as an input tensor `output_shape` then it specifies only spatial dimensions. No batch or channel dimension should be passed along with spatial dimensions. If `output_shape` is omitted, then `pads_begin`, `pads_end` or `auto_pad` are used to determine output spatial shape `[O_z, O_y, O_x]` by input spatial shape `[I_z, I_y, I_x]` in the following way:
 ```
 if auto_pads != None:
@ -24,7 +24,7 @@ Y_i = stride[i] * (X_i - 1) + ((K_i - 1) * dilations[i] + 1) - pads_begin[i] - p
 where `K_i` filter kernel dimension along spatial axis `i`.
- If `output_shape` is specified, `pads_begin` and `pads_end` are ignored, and `auto_pad` defines how to distribute padding amount around the tensor. In this case pads are determined based on the next formulas to correctly align input and output tensors (similar to ONNX definition at https://github.com/onnx/onnx/blob/master/docs/Operators.md#convtranspose):
+ If `output_shape` is specified, `pads_begin` and `pads_end` are ignored, and `auto_pad` defines how to distribute padding amount around the tensor. In this case pads are determined based on the next formulas to correctly align input and output tensors:
 ```
 total_padding[i] = stride[i] * (X_i - 1) + ((K_i - 1) * dilations[i] + 1) - output_shape[i] + output_padding[i]
@ -42,7 +42,7 @@ else:
  * **Description**: *strides* has the same definition as *strides* for a regular Convolution but applied in the backward way, for the output tensor.
  * **Range of values**: positive integers
-  * **Type**: int[]
+  * **Type**: `int[]`
  * **Default value**: None
  * **Required**: *yes*
@ -50,7 +50,7 @@ else:
  * **Description**: *pads_begin* has the same definition as *pads_begin* for a regular Convolution but applied in the backward way, for the output tensor. May be omitted specified, in which case pads are calculated automatically.
  * **Range of values**: non-negative integers
-  * **Type**: int[]
+  * **Type**: `int[]`
  * **Default value**: None
  * **Required**: *yes*
  * **Note**: the attribute is ignored when *auto_pad* attribute is specified.
@ -59,7 +59,7 @@ else:
  * **Description**: *pads_end* has the same definition as *pads_end* for a regular Convolution but applied in the backward way, for the output tensor. May be omitted, in which case pads are calculated automatically.
  * **Range of values**: non-negative integers
-  * **Type**: int[]
+  * **Type**: `int[]`
  * **Default value**: None
  * **Required**: *yes*
  * **Note**: the attribute is ignored when *auto_pad* attribute is specified.
@ -68,7 +68,7 @@ else:
  * **Description**: *dilations* has the same definition as *dilations* for a regular Convolution but applied in the backward way, for the output tensor.
  * **Range of values**: positive integers
-  * **Type**: int[]
+  * **Type**: `int[]`
  * **Default value**: None
  * **Required**: *yes*
@ -76,9 +76,10 @@ else:
  * **Description**: *auto_pad* has the same definition as *auto_pad* for a regular Convolution but applied in the backward way, for the output tensor. 
    * *explicit*: use explicit padding values from `pads_begin` and `pads_end`.
-    * *same_upper (same_lower)* the input is padded to match the output size. In case of odd padding value an extra padding is added at the end (at the beginning).
+    * *same_upper* the input is padded to match the output size. In case of odd padding value an extra padding is added at the end.
    * *same_lower* the input is padded to match the output size. In case of odd padding value an extra padding is added at the beginning.
    * *valid* - do not use padding.
-  * **Type**: string
+  * **Type**: `string`
  * **Default value**: None
  * **Required**: *no*
  * **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is specified.
@ -87,27 +88,38 @@ else:
  * **Description**: *output_padding* adds additional amount of paddings per each spatial axis in the `output` tensor. It unlocks more elements in the output allowing them to be computed. Elements are added at the higher coordinate indices for the spatial dimensions. Number of elements in *output_padding* list matches the number of spatial dimensions in `data` and `output` tensors.
  * **Range of values**: non-negative integer values
-  * **Type**: int[]
+  * **Type**: `int[]`
  * **Default value**: all zeros
  * **Required**: *no*
 **Inputs**:
-*   **1**: `data` -- input tensor of rank 3 or greater. Layout is `[N, C_INPUT, X1, ..., XD]`. *Required*.
+*   **1**: Input tensor of type *T1* and rank 3, 4 or 5. Layout is `[N, C_INPUT, Z, Y, X]` (number of batches, number of input channels, spatial axes Z, Y, X). *Required*.
-*   **2**: `filter` -- convolution kernel tensor. Weights have shape `[C_INPUT, C_OUTPUT, K_D, ..., K_1]`. `C_INPUT` is the number of channels in input `data` tensor shape, and `C_OUTPUT` is the number of channels in the `output` tensor. Spatial size of the kernel `[K_D, ..., K_1]` is derived from the shape of this input and aren't specified by any attribute. *Required*.
+*   **2**: Convolution kernel tensor of type *T1* and rank 3, 4 or 5. Layout is `[C_INPUT, C_OUTPUT, Z, Y, X]` (number of input channels, number of output channels, spatial axes Z, Y, X). Spatial size of the kernel is derived from the shape of this input and aren't specified by any attribute. *Required*.
-*   **3**: `output_shape` is 1D integer tensor that specifies spatial shape of the output. *Optional*. If specified, *padding amount* is deduced from relation of input and output spatial shapes according to formulas in the description. If not specified, *output shape* is calculated based on the `pads_begin` and `pads_end` or completely according to `auto_pad`.
+*   **3**: `output_shape` is 1D tensor of type *T2* that specifies spatial shape of the output. If specified, *padding amount* is deduced from relation of input and output spatial shapes according to formulas in the description. If not specified, *output shape* is calculated based on the `pads_begin` and `pads_end` or completely according to `auto_pad`. *Optional*.
 *   **Note**: Type of the convolution (1D, 2D or 3D) is derived from the rank of the input tensors and not specified by any attribute:
      * 1D convolution (input tensors rank 3) means that there is only one spatial axis X,
      * 2D convolution (input tensors rank 4) means that there are two spatial axes Y, X,
      * 3D convolution (input tensors rank 5) means that there are three spatial axes Z, Y, X.
 **Outputs**:
-*   **1**: `output` -- output tensor of the same rank as input `data` tensor and shape `[N, C_OUTPUT, Y1, ..., YD]`.
+*   **1**: Output tensor of type *T1* and rank 3, 4 or 5. Layout is `[N, C_OUTPUT, Z, Y, X]` (number of batches, number of kernel output channels, spatial axes Z, Y, X).
-**Example**
+**Types**:
 * *T1*: any numeric type.
 * *T2*: any integer type.
 **Examples**
 *Example 1: 2D ConvolutionBackpropData*
 ```xml
 <layer id="5" name="upsampling_node" type="ConvolutionBackpropData">
-    <data dilations="1,1" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
+    <data dilations="1,1" pads_begin="1,1" pads_end="1,1" strides="2,2" output_padding="0,0" auto_pad="explicit"/>
    <input>
        <port id="0">
            <dim>1</dim>
@ -132,3 +144,66 @@ else:
    </output>
 </layer>
 ```
 *Example 2: 2D ConvolutionBackpropData with output_padding*
 ```xml
 <layer id="5" name="upsampling_node" type="ConvolutionBackpropData">
    <data dilations="1,1" pads_begin="0,0" pads_end="0,0" strides="3,3" output_padding="2,2" auto_pad="explicit"/>
    <input>
        <port id="0">
            <dim>1</dim>
            <dim>20</dim>
            <dim>2</dim>
            <dim>2</dim>
        </port>
        <port id="1">
            <dim>20</dim>
            <dim>10</dim>
            <dim>3</dim>
            <dim>3</dim>
        </port>
    </input>
    <output>
        <port id="0" precision="FP32">
            <dim>1</dim>
            <dim>10</dim>
            <dim>8</dim>
            <dim>8</dim>
        </port>
    </output>
 </layer>
 ```
 *Example 3: 2D ConvolutionBackpropData with output_shape input*
 ```xml
 <layer id="5" name="upsampling_node" type="ConvolutionBackpropData">
    <data dilations="1,1" pads_begin="1,1" pads_end="1,1" strides="1,1" output_padding="0,0" auto_pad="valid"/>
    <input>
        <port id="0">
            <dim>1</dim>
            <dim>20</dim>
            <dim>224</dim>
            <dim>224</dim>
        </port>
        <port id="1">
            <dim>20</dim>
            <dim>10</dim>
            <dim>3</dim>
            <dim>3</dim>
        </port>
        <port id="2">
            <dim>2</dim> <!-- output_shape value is: [450, 450]-->
        </port>
    </input>
    <output>
        <port id="0" precision="FP32">
            <dim>1</dim>
            <dim>10</dim>
            <dim>450</dim>
            <dim>450</dim>
        </port>
    </output>
 </layer>
 ```
--- a/docs/ops/movement/Gather_7.md
+++ b/docs/ops/movement/Gather_7.md
@ -22,7 +22,7 @@ the number of batch dimensions. `N` and `M` are numbers of dimensions of `data`
  representing the batches, and *Gather* starts to gather from the `b` dimension. It requires the first `b` 
  dimensions in `data` and `indices` tensors to be equal. If `batch_dims` is less than zero, normalized value is used 
  `batch_dims = indices.rank + batch_dims`.
-  * **Range of values**: `[-min(data.rank, indices.rank); min(data.rank, indices.rank))` and `batch_dims' <= axis'`.
+  * **Range of values**: `[-min(data.rank, indices.rank); min(data.rank, indices.rank)]` and `batch_dims' <= axis'`.
  Where `batch_dims'` and `axis'` stand for normalized `batch_dims` and `axis` values.
  * **Type**: *T_AXIS*
  * **Default value**: 0
@ -136,8 +136,9 @@ output_shape = (2, 3)
 * **1**:  `data` tensor of type *T* with arbitrary data. **Required**.
-* **2**:  `indices` tensor of type *T_IND* with indices to gather. The values for indices are in the range `[0, data[axis] - 1]`. 
+* **2**:  `indices` tensor of type *T_IND* with indices to gather. 0D tensor (scalar) for indices is also allowed. 
-**Required**.
+  The values for indices are in the range `[0, data[axis] - 1]`.
  **Required**.
 * **3**:  Scalar or 1D tensor `axis` of *T_AXIS* type is a dimension index to gather data from. For example, 
 *axis* equal to 1 means that gathering is performed over the first dimension. Negative `axis` means reverse indexing and 
--- a/docs/ops/movement/VariadicSplit_1.md
+++ b/docs/ops/movement/VariadicSplit_1.md
@ -26,7 +26,7 @@ Where D is the rank of input tensor `data`. The sum of elements in `split_length
 * **2**: `axis`. Axis along `data` to split. A scalar of type `T2` with value from range `-rank(data) .. rank(data)-1`. Negative values address dimensions from the end. 
 **Required.**
-* **3**: `split_lengths`. A list containing the dimension values of each output tensor shape along the split `axis`. A 1D tensor of type `T2`. The number of elements in `split_lengths` determines the number of outputs. The sum of elements in `split_lengths` must match `data.shape[axis]`. In addition `split_lenghts` can contain a single `-1` element, which means, all remaining items along specified `axis` that are not consumed by other parts. **Required.**
+* **3**: `split_lengths`. A list containing the dimension values of each output tensor shape along the split `axis`. A 1D tensor of type `T2`. The number of elements in `split_lengths` determines the number of outputs. The sum of elements in `split_lengths` must match `data.shape[axis]`. In addition `split_lengths` can contain a single `-1` element, which means, all remaining items along specified `axis` that are not consumed by other parts. **Required.**
 **Outputs**
--- a/docs/ops/shape/Reshape_1.md
+++ b/docs/ops/shape/Reshape_1.md
@ -8,7 +8,7 @@
 **Detailed description**:
-*Reshape* layer takes two input tensors: the tensor to be resized and the output tensor shape. The values in the second tensor could be -1, 0 and any positive integer number. The two special values -1 and 0:
+*Reshape* takes two input tensors: `data` to be resized and `shape` of the new output. The values in the `shape` could be `-1`, `0` and any positive integer number. The two special values `-1` and `0`:
   * `0` means "copy the respective dimension *(left aligned)* of the input tensor" if `special_zero` is set to `true`; otherwise it is a normal dimension and is applicable to empty tensors.
   * `-1` means that this dimension is calculated to keep the overall elements count the same as in the input tensor. Not more than one `-1` can be used in a reshape operation.
@ -18,30 +18,31 @@ If `special_zero` is set to `true` index of `0` cannot be larger than the rank o
 * *special_zero*
-  * **Description**: *special_zero* controls how zero values in `shape` are interpreted. If *special_zero* is `false`, then 0 is interpreted as-is which means that output shape will contain a zero dimension at the specified location. Input and output tensors are empty in this case. If *special_zero* is `true`, then all zeros in `shape` implies the copying of corresponding dimensions from `data.shape` into the output shape *(left aligned)*.
+  * **Description**: *special_zero* controls how zero values in `shape` are interpreted. If *special_zero* is `false`, then `0` is interpreted as-is which means that output shape will contain a zero dimension at the specified location. Input and output tensors are empty in this case. If *special_zero* is `true`, then all zeros in `shape` implies the copying of corresponding dimensions from `data.shape` into the output shape *(left aligned)*.
  * **Range of values**: `false` or `true`
-  * **Type**: boolean
+  * **Type**: `boolean`
  * **Default value**: None
  * **Required**: *yes*
 **Inputs**:
-*   **1**: `data` -- multidimensional input tensor of type *T*. *Required*.
+*   **1**: `data` a tensor of type T and arbitrary shape. **Required**.
-*   **2**: `shape` -- 1D tensor of type *T_SHAPE* describing output shape. *Required*.
+*   **2**: `shape` 1D tensor of type *T_SHAPE* describing output shape. **Required**.
 **Outputs**:
-*   **1**: Output tensor with the same content as a tensor at input `data` but with shape defined by input `shape`.
+*   **1**: Output tensor of type *T* with the same content as `data` input tensor but with shape defined by `shape` input tensor.
 **Types**
-* *T*: supported type.
+* *T*: any numeric type.
-* *T_SHAPE*: supported integer type.
+* *T_SHAPE*: any supported integer type.
 **Examples**
 *Example 1: reshape empty tensor*
 ```xml
 <layer ... type="Reshape" ...>
    <data special_zero="false"/>
@ -65,6 +66,7 @@ If `special_zero` is set to `true` index of `0` cannot be larger than the rank o
 </layer>
 ```
 *Example 2: reshape tensor - preserve first dim, calculate second and fix value for third dim*
 ```xml
 <layer ... type="Reshape" ...>
    <data special_zero="true"/>
@ -89,6 +91,7 @@ If `special_zero` is set to `true` index of `0` cannot be larger than the rank o
 </layer>
 ```
 *Example 3: reshape tensor - preserve first two dims, fix value for third dim and calculate fourth*
 ```xml
 <layer ... type="Reshape" ...>
    <data special_zero="true"/>
@ -113,6 +116,7 @@ If `special_zero` is set to `true` index of `0` cannot be larger than the rank o
 </layer>
 ```
 *Example 4: reshape tensor - calculate first dim and preserve second dim*
 ```xml
 <layer ... type="Reshape" ...>
    <data special_zero="true"/>
@ -135,6 +139,7 @@ If `special_zero` is set to `true` index of `0` cannot be larger than the rank o
 </layer>
 ```
 *Example 5: reshape tensor - preserve first dim and calculate second dim*
 ```xml
 <layer ... type="Reshape" ...>
    <data special_zero="true"/>
--- a/docs/template_plugin/src/template_plugin.cpp
+++ b/docs/template_plugin/src/template_plugin.cpp
@ -67,7 +67,7 @@ std::shared_ptr<ngraph::Function> TransformNetwork(const std::shared_ptr<const n
    // Example: register CommonOptimizations transformation from transformations library
    passManager.register_pass<ngraph::pass::CommonOptimizations>();
    // Template plugin handles only FP32 networks
-    passManager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::f16, ngraph::element::f32);
+    passManager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ngraph::element::f16, ngraph::element::f32 }});
    // Example: register plugin specific transformation
    passManager.register_pass<ngraph::pass::DecomposeDivideMatcher>();
    passManager.register_pass<ngraph::pass::ReluReluFusionMatcher>();
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@ -122,6 +122,7 @@ if(SPEECH_LIBS_AND_DEMOS)
    install(DIRECTORY ${TEMP}/deployment_tools
                      ${TEMP}/data_processing
            DESTINATION .
            USE_SOURCE_PERMISSIONS
            COMPONENT speech_demo_files)
 endif()
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@ -38,62 +38,6 @@ if (ENABLE_MYRIAD)
    include(cmake/vpu_dependencies.cmake)
 endif()
 ## enable cblas_gemm from OpenBLAS package
 if (ENABLE_MKL_DNN AND GEMM STREQUAL "OPENBLAS")
    if(AARCH64)
        if(DEFINED ENV{THIRDPARTY_SERVER_PATH})
            set(IE_PATH_TO_DEPS "$ENV{THIRDPARTY_SERVER_PATH}")
        elseif(DEFINED THIRDPARTY_SERVER_PATH)
            set(IE_PATH_TO_DEPS "${THIRDPARTY_SERVER_PATH}")
        else()
            message(WARNING "OpenBLAS is not found!")
        endif()
        if(DEFINED IE_PATH_TO_DEPS)
            reset_deps_cache(OpenBLAS_DIR)
            RESOLVE_DEPENDENCY(OpenBLAS
                    ARCHIVE_LIN "keembay/openblas_0.3.7_yocto_kmb.tar.xz"
                    TARGET_PATH "${TEMP}/openblas_0.3.7_yocto_kmb"
                    ENVIRONMENT "OpenBLAS_DIR"
                    SHA256 "c75aac901d5297d6d60a4b1f941f0335d8fd7f52e0dff8c445f644e2e45e6fba")
            update_deps_cache(OpenBLAS_DIR "${OpenBLAS}/lib/cmake/openblas" "Path to OpenBLAS package folder")
            find_package(OpenBLAS QUIET)
            if(OpenBLAS_FOUND)
                set(BLAS_FOUND TRUE)
                set(BLAS_INCLUDE_DIRS ${OpenBLAS_INCLUDE_DIRS})
                set(BLAS_LIBRARIES ${OpenBLAS_LIBRARIES})
            endif()
            unset(IE_PATH_TO_DEPS)
        endif()
    endif()
    if(NOT BLAS_LIBRARIES OR NOT BLAS_INCLUDE_DIRS)
        find_package(BLAS REQUIRED)
        if(BLAS_FOUND)
            find_path(BLAS_INCLUDE_DIRS cblas.h)
        else()
            message(ERROR "OpenBLAS not found: install OpenBLAS or set -DBLAS_INCLUDE_DIRS=<path to dir with cblas.h> and -DBLAS_LIBRARIES=<path to libopenblas.so or openblas.lib>")
        endif()
    endif()
    debug_message(STATUS "openblas=" ${BLAS_LIBRARIES})
 endif ()
 ## MKL-ML package
 if (GEMM STREQUAL "MKL")
    if(NOT MKLROOT)
        message(FATAL_ERROR "MKLROOT not found: install MKL and set -DMKLROOT=<path_to_MKL>")
    endif()
    set(MKL ${MKLROOT})
    debug_message(STATUS "mkl_ml=" ${MKLROOT})
 endif ()
 ## Intel OMP package
 if (THREADING STREQUAL "OMP")
    reset_deps_cache(OMP)
@ -145,10 +89,10 @@ if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
                ENVIRONMENT "TBBROOT"
                SHA256 "f1c9b9e2861efdaa01552bd25312ccbc5feeb45551e5f91ae61e29221c5c1479")
        RESOLVE_DEPENDENCY(TBBBIND_2_4
-                ARCHIVE_WIN "tbbbind_2_4_static_win.zip"
+                ARCHIVE_WIN "tbbbind_2_4_static_win_v2.zip"
                TARGET_PATH "${TEMP}/tbbbind_2_4"
                ENVIRONMENT "TBBBIND_2_4_ROOT"
-                SHA256 "1a3a05082cc5ef1a764d635793be347b82c795f0e9ced771515fc3706a4dc4f0")
+                SHA256 "90dc165652f6ac2ed3014c71e57f797fcc4b11e1498a468e3d2c85deb2a4186a")
    elseif(ANDROID)  # Should be before LINUX due LINUX is detected as well
        RESOLVE_DEPENDENCY(TBB
                ARCHIVE_ANDROID "tbb2020_20200404_android.tgz"
@ -159,11 +103,13 @@ if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
        RESOLVE_DEPENDENCY(TBB
                ARCHIVE_LIN "tbb2020_20200415_lin_strip.tgz"
                TARGET_PATH "${TEMP}/tbb"
                ENVIRONMENT "TBBROOT"
                SHA256 "95b2f3b0b70c7376a0c7de351a355c2c514b42c4966e77e3e34271a599501008")
        RESOLVE_DEPENDENCY(TBBBIND_2_4
-                ARCHIVE_LIN "tbbbind_2_4_static_lin.tgz"
+                ARCHIVE_LIN "tbbbind_2_4_static_lin_v2.tgz"
                TARGET_PATH "${TEMP}/tbbbind_2_4"
-                SHA256 "888582a94f81821f9894cc089db36d5a6c2e0b6998cfa1fec0c027f28c597ada")
+                ENVIRONMENT "TBBBIND_2_4_ROOT"
                SHA256 "6dc926258c6cd3cba0f5c2cc672fd2ad599a1650fe95ab11122e8f361a726cb6")
    elseif(LINUX AND AARCH64)
        RESOLVE_DEPENDENCY(TBB
                ARCHIVE_LIN "keembay/tbb2020_38404_kmb_lic.tgz"
--- a/inference-engine/cmake/features.cmake
+++ b/inference-engine/cmake/features.cmake
@ -8,23 +8,6 @@ ie_dependent_option (ENABLE_GNA "GNA support for inference engine" ON "NOT APPLE
 ie_dependent_option (ENABLE_CLDNN_TESTS "Enable clDNN unit tests" OFF "ENABLE_CLDNN" OFF)
 # "MKL-DNN library might use MKL-ML or OpenBLAS for gemm tasks: MKL|OPENBLAS|JIT"
 if (ENABLE_MKL_DNN)
    if(AARCH64)
        set(GEMM_DEFAULT "OPENBLAS")
    else()
        set(GEMM_DEFAULT "JIT")
    endif()
    set(GEMM "${GEMM_DEFAULT}" CACHE STRING "GEMM implementation")
    set_property(CACHE GEMM PROPERTY STRINGS "MKL" "OPENBLAS" "JIT")
    list (APPEND IE_OPTIONS GEMM)
    if (NOT GEMM STREQUAL "MKL" AND
        NOT GEMM STREQUAL "OPENBLAS" AND
        NOT GEMM STREQUAL "JIT")
        message(FATAL_ERROR "GEMM should be set to MKL, OPENBLAS or JIT. Default option is ${GEMM_DEFAULT}")
    endif()
 endif()
 # "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ"
 if(X86 OR ARM OR (MSVC AND (ARM OR AARCH64)) )
    set(THREADING_DEFAULT "SEQ")
--- a/inference-engine/cmake/ie_parallel.cmake
+++ b/inference-engine/cmake/ie_parallel.cmake
@ -25,9 +25,9 @@ function(set_ie_threading_interface_for TARGET_NAME)
        else()
            find_dependency(TBB COMPONENTS tbb tbbmalloc)
        endif()
-        set("TBB_FOUND" ${TBB_FOUND} PARENT_SCOPE)
+        set(TBB_FOUND ${TBB_FOUND} PARENT_SCOPE)
-        set("TBB_IMPORTED_TARGETS" ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
+        set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
-        set("TBB_VERSION" ${TBB_VERSION} PARENT_SCOPE)
+        set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE)
        if (NOT TBB_FOUND)
            ext_message(WARNING "TBB was not found by the configured TBB_DIR/TBBROOT path.\
                                SEQ method will be used.")
--- a/inference-engine/cmake/templates/InferenceEngineConfig.cmake.in
+++ b/inference-engine/cmake/templates/InferenceEngineConfig.cmake.in
@ -19,15 +19,44 @@
@PACKAGE_INIT@
-include(CMakeFindDependencyMacro)
+macro(_ie_find_dependency dep)
    set(cmake_fd_quiet_arg)
    if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY)
        set(cmake_fd_quiet_arg QUIET)
    endif()
    set(cmake_fd_required_arg)
    if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED)
        set(cmake_fd_required_arg REQUIRED)
    endif()
    get_property(cmake_fd_alreadyTransitive GLOBAL PROPERTY
        _CMAKE_${dep}_TRANSITIVE_DEPENDENCY)
    find_package(${dep} ${ARGN}
        ${cmake_fd_quiet_arg}
        ${cmake_fd_required_arg})
    if(NOT DEFINED cmake_fd_alreadyTransitive OR cmake_fd_alreadyTransitive)
        set_property(GLOBAL PROPERTY _CMAKE_${dep}_TRANSITIVE_DEPENDENCY TRUE)
    endif()
    if(NOT ${dep}_FOUND)
        set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.")
        set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False)
        return()
    endif()
    set(cmake_fd_required_arg)
    set(cmake_fd_quiet_arg)
 endmacro()
 # need to store current PACKAGE_PREFIX_DIR, because it's overwritten by ngraph one
 set(IE_PACKAGE_PREFIX_DIR "${PACKAGE_PREFIX_DIR}")
 set(THREADING "@THREADING@")
-if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
+if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" AND NOT TBB_FOUND)
    set_and_check(_tbb_dir "@PACKAGE_IE_TBB_DIR@")
-    find_dependency(TBB
+    _ie_find_dependency(TBB
                        COMPONENTS tbb tbbmalloc
                        CONFIG
                        PATHS ${TBBROOT}/cmake
@ -37,7 +66,7 @@ if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
 endif()
 set_and_check(_ngraph_dir "@PACKAGE_IE_NGRAPH_DIR@")
-find_dependency(ngraph
+_ie_find_dependency(ngraph
                    CONFIG
                    PATHS ${_ngraph_dir}
                    NO_CMAKE_FIND_ROOT_PATH
--- a/inference-engine/ie_bridges/python/requirements.txt
+++ b/inference-engine/ie_bridges/python/requirements.txt
@ -1,2 +1 @@
-numpy>=1.16.3
+numpy~=1.19.5
 cython>=0.29.17
--- a/inference-engine/ie_bridges/python/sample/requirements.txt
+++ b/inference-engine/ie_bridges/python/sample/requirements.txt
@ -1,2 +1,2 @@
-opencv-python>=3.4.4.19
+opencv-python==4.5.*
-numpy>=1.16.3
+numpy~=1.19.5
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api.pyx
@ -6,6 +6,7 @@ from ..inference_engine.ie_api cimport IENetwork
 from libcpp cimport bool
 from libcpp.string cimport string
 from libc.stdint cimport int64_t
 def ApplyMOCTransformations(IENetwork network, bool cf):
@ -16,8 +17,8 @@ def ApplyPOTTransformations(IENetwork network, string device):
    C.ApplyPOTTransformations(network.impl, device)
-def ApplyLowLatencyTransformation(IENetwork network):
+def ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations=1):
-    C.ApplyLowLatencyTransformation(network.impl)
+    C.ApplyLowLatencyTransformation(network.impl, num_iterations)
 def ApplyPruningTransformation(IENetwork network):
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp
@ -26,8 +26,9 @@ void InferenceEnginePython::ApplyPOTTransformations(InferenceEnginePython::IENet
    manager.run_passes(network.actual->getFunction());
 }
-void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network) {
+void InferenceEnginePython::ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations) {
    ngraph::pass::Manager manager;
    // TODO: pass num_iterations to LowLatency
    manager.register_pass<ngraph::pass::LowLatency>();
    manager.register_pass<ngraph::pass::UnrollTensorIterator>();
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.hpp
@ -15,7 +15,7 @@ void ApplyMOCTransformations(InferenceEnginePython::IENetwork network, bool cf);
 void ApplyPOTTransformations(InferenceEnginePython::IENetwork network, std::string device);
-void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network);
+void ApplyLowLatencyTransformation(InferenceEnginePython::IENetwork network, int64_t num_iterations);
 void ApplyPruningTransformation(InferenceEnginePython::IENetwork network);
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl_defs.pxd
@ -3,6 +3,7 @@
 from libcpp cimport bool
 from libcpp.string cimport string
 from libc.stdint cimport int64_t
 from ..inference_engine.ie_api_impl_defs cimport IENetwork
@ -11,7 +12,7 @@ cdef extern from "offline_transformations_api_impl.hpp" namespace "InferenceEngi
    cdef void ApplyPOTTransformations(IENetwork network, string device)
-    cdef void ApplyLowLatencyTransformation(IENetwork network)
+    cdef void ApplyLowLatencyTransformation(IENetwork network, int64_t num_iterations)
    cdef void ApplyPruningTransformation(IENetwork network)
--- a/inference-engine/ie_bridges/python/src/requirements-dev.txt
+++ b/inference-engine/ie_bridges/python/src/requirements-dev.txt
@ -2,3 +2,4 @@ opencv-python>=3.4.4.19
 pytest==4.0.1
 attrs==19.1.0
 pytest-html==1.19.0
 cython>=0.29.22
--- a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
+++ b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
@ -1,28 +1,28 @@
-defusedxml>=0.5.0
+defusedxml>=0.7.1
-scipy==1.5.4
+scipy~=1.5.4
-jstyleson==0.0.2
+jstyleson~=0.0.2
-numpy~=1.18.5
+numpy~=1.19.5
-addict==2.2.1
+addict>=2.4.0
 pandas~=1.1.5
-hyperopt==0.1.2
+hyperopt~=0.1.2
-networkx==2.2
+networkx~=2.5
-tqdm==4.31.1
+tqdm>=4.54.1
-texttable==1.6.3
+texttable~=1.6.3
-py-cpuinfo!=5.0,!=6.0
+py-cpuinfo>=7.0.0
 PyYAML>=5.4.1
-pillow>=8.1.0
+pillow>=8.1.2
-scikit-image>=0.17
+scikit-image~=0.17.2
-scikit-learn>=0.23
+scikit-learn>=0.24.1
 yamlloader>=0.5
-shapely>=1.7
+shapely>=1.7.1
-nibabel>=3.1
+nibabel>=3.2.1
-pydicom>=2.0
+pydicom>=2.1.2
-sentencepiece>=0.1.91
+sentencepiece>=0.1.95
-tokenizers>=0.8
+tokenizers>=0.10.1
-editdistance>=0.5
+editdistance>=0.5.3
-parasail>=1.2
+parasail>=1.2.4
-fast-ctc-decode>=0.2
+fast-ctc-decode>=0.2.5
-rawpy>=0.15
+rawpy>=0.16.0
 nltk>=3.5
-opencv-python>=4.4
+opencv-python==4.5.*
-progress==1.5
+progress>=1.5
--- a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
+++ b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
@ -8,20 +8,10 @@ py_modules =
    mo_kaldi
 [options.package_data]
-    mo = *.txt
+    * = *
    compression.configs.hardware = *.json
    mo.extensions.front.mxnet = *.json
    mo.extensions.front.onnx = *.json
    mo.extensions.front.tf = *.json
    mo.mo.front.caffe.proto = *.proto
 [options.entry_points]
 console_scripts =
    mo=mo.__main__:main
    pot=app.run:main
    accuracy_check=accuracy_checker.main:main
    convert_annotation=accuracy_checker.annotation_converters.convert:main
    benchmark_app=openvino.tools.benchmark.main:main
 [metadata]
 license_files =
--- a/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt
+++ b/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt
@ -1 +1 @@
-numpy>=1.16.3
+numpy~=1.19.5
--- a/inference-engine/include/cpp/ie_executable_network.hpp
+++ b/inference-engine/include/cpp/ie_executable_network.hpp
@ -35,7 +35,7 @@ class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
    std::shared_ptr<IExecutableNetworkInternal> _impl;
    std::shared_ptr<details::SharedObjectLoader> _so;
-    explicit ExecutableNetwork(const std::shared_ptr<IExecutableNetworkInternal>&   impl,
+    ExecutableNetwork(const std::shared_ptr<IExecutableNetworkInternal>&   impl,
                      const std::shared_ptr<details::SharedObjectLoader>&  so);
    friend class InferencePlugin;
--- a/inference-engine/include/cpp/ie_infer_request.hpp
+++ b/inference-engine/include/cpp/ie_infer_request.hpp
@ -36,7 +36,7 @@ class INFERENCE_ENGINE_API_CLASS(InferRequest) {
    std::shared_ptr<IInferRequestInternal>          _impl;
    std::shared_ptr<details::SharedObjectLoader>    _so;
-    explicit InferRequest(const std::shared_ptr<IInferRequestInternal>&         impl,
+    InferRequest(const std::shared_ptr<IInferRequestInternal>&         impl,
                 const std::shared_ptr<details::SharedObjectLoader>&   so);
    friend class ExecutableNetwork;
@ -191,7 +191,7 @@ public:
     */
    template<typename F>
    void SetCompletionCallback(F callbackToSet) {
-        return SetCallback<F>{*this}(std::move(callbackToSet));
+        SetCallback<F>{*this}(std::move(callbackToSet));
    }
    /**
--- a/inference-engine/include/cpp/ie_memory_state.hpp
+++ b/inference-engine/include/cpp/ie_memory_state.hpp
@ -11,9 +11,10 @@
 #pragma once
 #include <string>
 #include <memory>
 #include "ie_api.h"
 #include "ie_blob.h"
 #include "details/ie_so_loader.h"
 namespace InferenceEngine {
@ -28,15 +29,15 @@ class IVariableStateInternal;
 */
 class INFERENCE_ENGINE_API_CLASS(VariableState) {
    std::shared_ptr<IVariableStateInternal> _impl = nullptr;
-    details::SharedObjectLoader::Ptr _so = nullptr;
+    std::shared_ptr<details::SharedObjectLoader> _so = nullptr;
    /**
     * @brief Constructs VariableState from the initialized std::shared_ptr
     * @param impl Initialized shared pointer
     * @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed.
     */
-    explicit VariableState(const std::shared_ptr<IVariableStateInternal>& impl,
+    VariableState(const std::shared_ptr<IVariableStateInternal>& impl,
-                           const details::SharedObjectLoader::Ptr& so = {});
+                  const std::shared_ptr<details::SharedObjectLoader>& so);
    friend class InferRequest;
    friend class ExecutableNetwork;
--- a/inference-engine/include/details/ie_no_copy.hpp
+++ b/inference-engine/include/details/ie_no_copy.hpp
@ -1,50 +0,0 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 /**
 * @brief header file for no_copy class
 * 
 * @file ie_no_copy.hpp
 */
 #pragma once
 namespace InferenceEngine {
 namespace details {
 /**
 * @brief This class is used for objects returned from the shared library factory to prevent copying
 */
 class no_copy {
 protected:
    /**
     * @brief A default constructor
     */
    no_copy() = default;
    /**
     * @brief A default destructor
     */
    virtual ~no_copy() = default;
    /**
     * @brief A removed copy constructor
     */
    no_copy(no_copy const&) = delete;
    /**
     * @brief A removed assign operator
     */
    no_copy& operator=(no_copy const&) = delete;
    /**
     * @brief A removed move constructor
     */
    no_copy(no_copy&&) = delete;
    /**
     * @brief A removed move operator
     */
    no_copy& operator=(no_copy&&) = delete;
 };
 }  // namespace details
 }  // namespace InferenceEngine
--- a/inference-engine/include/ie_iextension.h
+++ b/inference-engine/include/ie_iextension.h
@ -19,7 +19,6 @@
 #include "ie_layouts.h"
 #include "ie_blob.h"
 #include "ie_version.hpp"
 #include "details/ie_no_copy.hpp"
 /**
 * @def INFERENCE_EXTENSION_API(TYPE)
--- a/inference-engine/include/ie_imemory_state.hpp
+++ b/inference-engine/include/ie_imemory_state.hpp
@ -14,7 +14,6 @@
 #include "ie_blob.h"
 #include "ie_common.h"
 #include "details/ie_no_copy.hpp"
 namespace InferenceEngine {
@ -23,7 +22,7 @@ namespace InferenceEngine {
 * @interface IVariableState
 * @brief Manages data for reset operations
 */
-class IVariableState : public details::no_copy {
+class IVariableState {
 public:
    IE_SUPPRESS_DEPRECATED_START
    /**
--- a/inference-engine/samples/benchmark_app/README.md
+++ b/inference-engine/samples/benchmark_app/README.md
@ -93,6 +93,8 @@ Options:
    -progress                   Optional. Show progress bar (can affect performance measurement). Default values is "false".
    -shape                      Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.
    -layout                     Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size.
    -cache_dir "<path>"         Optional. Enables caching of loaded models to specified directory.
    -load_from_file             Optional. Loads model from file directly without ReadNetwork.
  CPU-specific performance options:
    -nstreams "<integer>"       Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@ -122,6 +122,14 @@ static const char shape_message[] = "Optional. Set shape for input. For example,
 static const char layout_message[] = "Optional. Prompts how network layouts should be treated by application. "
                                     "For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
 // @brief message for enabling caching
 static const char cache_dir_message[] = "Optional. Enables caching of loaded models to specified directory. "
                                        "List of devices which support caching is shown at the end of this message.";
 // @brief message for single load network
 static const char load_from_file_message[] = "Optional. Loads model from file directly without ReadNetwork."
                                             "All CNNNetwork options (like re-shape) will be ignored";
 // @brief message for quantization bits
 static const char gna_qb_message[] = "Optional. Weight bits for quantization:  8 or 16 (default)";
@ -238,6 +246,12 @@ DEFINE_string(op, "", outputs_precision_message);
 ///        Overwrites layout from ip and op options for specified layers.";
 DEFINE_string(iop, "", iop_message);
 /// @brief Define parameter for cache model dir <br>
 DEFINE_string(cache_dir, "", cache_dir_message);
 /// @brief Define flag for load network from model file by name without ReadNetwork <br>
 DEFINE_bool(load_from_file, false, load_from_file_message);
 /**
 * @brief This function show a help message
 */
@ -262,6 +276,8 @@ static void showUsage() {
    std::cout << "    -progress                 " << progress_message << std::endl;
    std::cout << "    -shape                    " << shape_message << std::endl;
    std::cout << "    -layout                   " << layout_message << std::endl;
    std::cout << "    -cache_dir \"<path>\"        " << cache_dir_message << std::endl;
    std::cout << "    -load_from_file           " << load_from_file_message << std::endl;
    std::cout << std::endl << "  device-specific performance options:" << std::endl;
    std::cout << "    -nstreams \"<integer>\"     " << infer_num_streams_message << std::endl;
    std::cout << "    -nthreads \"<integer>\"     " << infer_num_threads_message << std::endl;
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@ -330,7 +330,29 @@ int main(int argc, char* argv[]) {
        std::string topology_name = "";
        benchmark_app::InputsInfo app_inputs_info;
        std::string output_name;
-        if (!isNetworkCompiled) {
+
        // Takes priority over config from file
        if (!FLAGS_cache_dir.empty()) {
            ie.SetConfig({{CONFIG_KEY(CACHE_DIR), FLAGS_cache_dir}});
        }
        if (FLAGS_load_from_file && !isNetworkCompiled) {
            next_step();
            slog::info << "Skipping the step for loading network from file" << slog::endl;
            next_step();
            slog::info << "Skipping the step for loading network from file" << slog::endl;
            next_step();
            slog::info << "Skipping the step for loading network from file" << slog::endl;
            auto startTime = Time::now();
            exeNetwork = ie.LoadNetwork(FLAGS_m, device_name);
            auto duration_ms = double_to_string(get_total_ms_time(startTime));
            slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
            if (statistics)
                statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}});
            if (batchSize == 0) {
                batchSize = 1;
            }
        } else if (!isNetworkCompiled) {
            // ----------------- 4. Reading the Intermediate Representation network
            // ----------------------------------------
            next_step();
@ -363,7 +385,7 @@ int main(int argc, char* argv[]) {
                slog::info << "Reshaping network: " << getShapesString(shapes) << slog::endl;
                startTime = Time::now();
                cnnNetwork.reshape(shapes);
-                auto duration_ms = double_to_string(get_total_ms_time(startTime));
+                duration_ms = double_to_string(get_total_ms_time(startTime));
                slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl;
                if (statistics)
                    statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"reshape network time (ms)", duration_ms}});
--- a/inference-engine/src/CMakeLists.txt
+++ b/inference-engine/src/CMakeLists.txt
@ -30,6 +30,8 @@ endif()
 add_subdirectory(hetero_plugin)
 add_subdirectory(auto_plugin)
 add_subdirectory(multi_device)
 add_subdirectory(transformations)
--- a/inference-engine/src/auto_plugin/CMakeLists.txt
+++ b/inference-engine/src/auto_plugin/CMakeLists.txt
@ -0,0 +1,19 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 set (TARGET_NAME "AutoPlugin")
 file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
 file(GLOB HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
 ie_add_plugin(NAME ${TARGET_NAME}
              DEVICE_NAME "AUTO"
              SOURCES ${SOURCES} ${HEADERS}
              VERSION_DEFINES_FOR auto_plugin.cpp)
 target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
 ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
 set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
--- a/inference-engine/src/auto_plugin/auto_exec_network.cpp
+++ b/inference-engine/src/auto_plugin/auto_exec_network.cpp
@ -0,0 +1,59 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include <vector>
 #include <memory>
 #include <map>
 #include <unordered_map>
 #include "ie_metric_helpers.hpp"
 #include "auto_exec_network.hpp"
 #include "auto_infer_request.hpp"
 namespace AutoPlugin {
    using namespace InferenceEngine;
 AutoExecutableNetwork::AutoExecutableNetwork(const ExecutableNetwork& network,
                                             const DeviceInformation& deviceInfo,
                                             const bool               needPerfCounters) :
    _deviceInfo(deviceInfo),
    _network(network),
    _config(deviceInfo.config.begin(), deviceInfo.config.end()),
    _needPerfCounters(needPerfCounters) {
 }
 AutoExecutableNetwork::~AutoExecutableNetwork() = default;
 IInferRequestInternal::Ptr AutoExecutableNetwork::CreateInferRequestImpl(InputsDataMap networkInputs,
                                                                         OutputsDataMap networkOutputs) {
    auto inferRequest = _network.CreateInferRequest();
    return std::make_shared<AutoInferRequest>(networkInputs, networkOutputs, inferRequest);
 }
 void AutoExecutableNetwork::Export(std::ostream& networkModel) {
    _network.Export(networkModel);
 }
 RemoteContext::Ptr AutoExecutableNetwork::GetContext() const {
  return _network.GetContext();
 }
 InferenceEngine::CNNNetwork AutoExecutableNetwork::GetExecGraphInfo() {
    return _network.GetExecGraphInfo();
 }
 Parameter AutoExecutableNetwork::GetMetric(const std::string &name) const {
    return _network.GetMetric(name);
 }
 void AutoExecutableNetwork::SetConfig(const std::map<std::string, Parameter>& config) {
    _network.SetConfig(config);
 }
 Parameter AutoExecutableNetwork::GetConfig(const std::string& name) const {
    return _network.GetConfig(name);
 }
 }  // namespace AutoPlugin
--- a/inference-engine/src/auto_plugin/auto_exec_network.hpp
+++ b/inference-engine/src/auto_plugin/auto_exec_network.hpp
@ -0,0 +1,51 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <atomic>
 #include <mutex>
 #include <queue>
 #include <unordered_map>
 #include <map>
 #include <vector>
 #include <string>
 #include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
 #include <threading/ie_itask_executor.hpp>
 namespace AutoPlugin {
 using DeviceName = std::string;
 struct DeviceInformation {
    DeviceName deviceName;
    std::map<std::string, std::string> config;
 };
 class AutoExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
 public:
    using Ptr = std::shared_ptr<AutoExecutableNetwork>;
    AutoExecutableNetwork(const InferenceEngine::ExecutableNetwork&                             network,
                          const DeviceInformation&                                              deviceInfo,
                          const bool                                                            needPerfCounters = false);
    void Export(std::ostream& networkModel) override;
    InferenceEngine::RemoteContext::Ptr GetContext() const override;
    InferenceEngine::CNNNetwork GetExecGraphInfo() override;
    InferenceEngine::Parameter GetMetric(const std::string &name) const override;
    void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
    InferenceEngine::Parameter GetConfig(const std::string& name) const override;
    InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
                                                                       InferenceEngine::OutputsDataMap networkOutputs) override;
    ~AutoExecutableNetwork() override;
    DeviceInformation                                            _deviceInfo;
    InferenceEngine::ExecutableNetwork                           _network;
    std::unordered_map<std::string, InferenceEngine::Parameter>  _config;
    bool                                                         _needPerfCounters = false;
 };
 }  // namespace AutoPlugin
--- a/inference-engine/src/auto_plugin/auto_infer_request.cpp
+++ b/inference-engine/src/auto_plugin/auto_infer_request.cpp
@ -0,0 +1,39 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "auto_infer_request.hpp"
 #include <ie_input_info.hpp>
 #include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
 namespace AutoPlugin {
    using namespace InferenceEngine;
 AutoInferRequest::AutoInferRequest(const InputsDataMap&   networkInputs,
                                   const OutputsDataMap&  networkOutputs,
                                   const InferRequest&    inferRequest)
    : IInferRequestInternal(networkInputs, networkOutputs)
    , _inferRequest(inferRequest) {
 }
 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoInferRequest::GetPerformanceCounts() const {
    return _inferRequest.GetPerformanceCounts();
 }
 void AutoInferRequest::InferImpl() {
    _inferRequest.Infer();
 }
 void AutoInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) {
    _inferRequest.SetBlob(name, data);
 }
 Blob::Ptr AutoInferRequest::GetBlob(const std::string& name) {
    return _inferRequest.GetBlob(name);
 }
 void AutoInferRequest::Cancel() {
    _inferRequest.Cancel();
 }
 }  // namespace AutoPlugin
--- a/inference-engine/src/auto_plugin/auto_infer_request.hpp
+++ b/inference-engine/src/auto_plugin/auto_infer_request.hpp
@ -0,0 +1,40 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <atomic>
 #include <cpp/ie_executable_network.hpp>
 #include <cpp/ie_infer_request.hpp>
 #include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
 #include <ie_blob.h>
 #include <ie_common.h>
 #include <map>
 #include <memory>
 #include <mutex>
 #include <queue>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 namespace AutoPlugin {
 class AutoInferRequest : public InferenceEngine::IInferRequestInternal {
 public:
    using Ptr = std::shared_ptr<AutoInferRequest>;
    explicit AutoInferRequest(const InferenceEngine::InputsDataMap&  networkInputs,
                              const InferenceEngine::OutputsDataMap& networkOutputs,
                              const InferenceEngine::InferRequest&   inferRequest);
    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
    void InferImpl() override;
    void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override;
    InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
    void Cancel() override;
 private:
    InferenceEngine::InferRequest _inferRequest;
 };
 }  // namespace AutoPlugin
--- a/inference-engine/src/auto_plugin/auto_plugin.cpp
+++ b/inference-engine/src/auto_plugin/auto_plugin.cpp
@ -0,0 +1,199 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include <vector>
 #include <memory>
 #include <map>
 #include <unordered_set>
 #include <ie_metric_helpers.hpp>
 #include <ie_core.hpp>
 #include <threading/ie_executor_manager.hpp>
 #include <ie_algorithm.hpp>
 #include "auto_plugin.hpp"
 namespace AutoPlugin {
 namespace {
    ConfigType mergeConfigs(ConfigType config, const ConfigType& local) {
        for (auto && kvp : local) {
            config[kvp.first] = kvp.second;
        }
        return config;
    }
    DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices) {
        for (auto& item : metaDevices) {
            if (item.deviceName.find("CPU") == 0) {
              return item;
            }
        }
        IE_THROW(NotFound) << "No available device could be used";
    }
 }  // namespace
 AutoInferencePlugin::AutoInferencePlugin() {
    _pluginName = "AUTO";
 }
 IE::ExecutableNetworkInternal::Ptr AutoInferencePlugin::LoadExeNetworkImpl(const IE::CNNNetwork& network,
                                                                           const ConfigType&     config) {
    if (GetCore() == nullptr) {
        IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
    }
    if (network.getFunction() == nullptr) {
        IE_THROW() << "AUTO device supports just ngraph network representation";
    }
    auto fullConfig = mergeConfigs(_config, config);
    auto metaDevices = GetDeviceChoice(fullConfig);
    // FIXME: always select CPU device now
    DeviceInformation selectedDevice = SelectDevice(metaDevices);
    IE::ExecutableNetwork executableNetwork;
    try {
        executableNetwork = GetCore()->LoadNetwork(network, selectedDevice.deviceName, selectedDevice.config);
    } catch(const IE::Exception &iie) {
        IE_THROW() << "Failed to load network to device named " << selectedDevice.deviceName
                   << " with exception " << iie.what();
    }
    bool enablePerfCounters = false;
    try {
        enablePerfCounters =
            executableNetwork.GetConfig(IE::PluginConfigParams::KEY_PERF_COUNT).as<std::string>() ==
                IE::PluginConfigParams::YES;
    } catch (...) {
    }
    return std::make_shared<AutoExecutableNetwork>(executableNetwork,
                                                   selectedDevice,
                                                   enablePerfCounters);
 }
 IE::QueryNetworkResult AutoInferencePlugin::QueryNetwork(const IE::CNNNetwork& network, const ConfigType& config) const {
    IE::QueryNetworkResult queryResult = {};
    if (GetCore() == nullptr) {
        IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
    }
    if (network.getFunction() == nullptr) {
        IE_THROW() << "AUTO device supports just ngraph network representation";
    }
    auto fullConfig = mergeConfigs(_config, config);
    auto metaDevices = GetDeviceChoice(fullConfig);
    std::unordered_set<std::string> supportedLayers;
    for (auto&& value : metaDevices) {
        try {
            auto deviceQr = GetCore()->QueryNetwork(network, value.deviceName, value.config);
            std::unordered_set<std::string> deviceSupportedLayers;
            for (auto &&layerQr : deviceQr.supportedLayersMap) {
                deviceSupportedLayers.emplace(layerQr.first);
            }
            supportedLayers = supportedLayers.empty()
                            ? deviceSupportedLayers : (deviceSupportedLayers.empty()
                            ? supportedLayers : IE::details::Intersection(
                                 supportedLayers, deviceSupportedLayers));
            break;
        } catch (...) {
        }
    }
    for (auto&& supportedLayer : supportedLayers) {
        queryResult.supportedLayersMap[supportedLayer] = GetName();
    }
    return queryResult;
 }
 IE::Parameter AutoInferencePlugin::GetConfig(const std::string& name,
                                             const std::map<std::string, IE::Parameter> & options) const {
    auto it = _config.find(name);
    if (it == _config.end()) {
        IE_THROW() << "Unsupported config key: " << name;
    } else {
        return { it->second };
    }
 }
 void AutoInferencePlugin::SetConfig(const ConfigType& config) {
    for (auto && kvp : config) {
        _config[kvp.first] = kvp.second;
    }
 }
 IE::Parameter AutoInferencePlugin::GetMetric(const std::string& name,
                                             const std::map<std::string, IE::Parameter> & options) const {
    if (name == METRIC_KEY(SUPPORTED_METRICS)) {
        std::vector<std::string> metrics;
        metrics.emplace_back(METRIC_KEY(SUPPORTED_METRICS));
        metrics.emplace_back(METRIC_KEY(FULL_DEVICE_NAME));
        metrics.emplace_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
        metrics.emplace_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
        IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
    } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
        std::string device_name = {"Inference Engine AUTO device"};
        IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, device_name);
    } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
        std::vector<std::string> configKeys;
        IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
    } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
        std::vector<std::string> capabilities = { "" };
        IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
    } else {
        IE_THROW() << "Unsupported metric key " << name;
    }
 }
 std::vector<AutoPlugin::DeviceInformation> AutoInferencePlugin::GetDeviceChoice(const ConfigType&  config) const {
    std::vector<DeviceInformation> metaDevices;
    std::vector<std::string> availableDevices = GetCore()->GetAvailableDevices();
    auto getDeviceConfig = [&] (const DeviceName & deviceWithID) {
        IE::DeviceIDParser deviceParser(deviceWithID);
        std::string deviceName = deviceParser.getDeviceName();
        ConfigType tconfig = mergeConfigs(_config, config);
        // set device ID if any
        std::string deviceIDLocal = deviceParser.getDeviceID();
        if (!deviceIDLocal.empty()) {
            tconfig[IE::PluginConfigParams::KEY_DEVICE_ID] = deviceIDLocal;
        }
        return GetSupportedConfig(tconfig, deviceName);
    };
    for (auto && d : availableDevices) {
        if (d != _pluginName) {
            metaDevices.push_back({ d, getDeviceConfig(d)});
        }
    }
    if (metaDevices.empty()) {
        IE_THROW() << "Please, check environment due to no supported devices can be used";
    }
    return metaDevices;
 }
 //////////////////////////////////// private & protected functions ///////////////////
 ConfigType AutoInferencePlugin::GetSupportedConfig(const ConfigType&  config,
                                                   const std::string& deviceName) const {
    std::vector<std::string> supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
    ConfigType supportedConfig;
    for (auto&& key : supportedConfigKeys) {
        auto itKey = config.find(key);
        if (config.end() != itKey) {
            supportedConfig[key] = itKey->second;
        }
    }
    return supportedConfig;
 }
 // define CreatePluginEngine to create plugin instance
 static const IE::Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoPlugin"};
 IE_DEFINE_PLUGIN_CREATE_FUNCTION(AutoInferencePlugin, version)
 }  // namespace AutoPlugin
--- a/inference-engine/src/auto_plugin/auto_plugin.hpp
+++ b/inference-engine/src/auto_plugin/auto_plugin.hpp
@ -0,0 +1,37 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <map>
 #include <vector>
 #include <string>
 #include <unordered_set>
 #include <cpp_interfaces/impl/ie_plugin_internal.hpp>
 #include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 #include "auto_exec_network.hpp"
 namespace AutoPlugin {
 namespace IE = InferenceEngine;
 using ConfigType = std::map<std::string, std::string>;
 class AutoInferencePlugin : public IE::InferencePluginInternal {
 public:
    AutoInferencePlugin();
    ~AutoInferencePlugin() = default;
    IE::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const IE::CNNNetwork& network, const ConfigType& config) override;
    IE::QueryNetworkResult QueryNetwork(const IE::CNNNetwork& network, const ConfigType& config) const override;
    IE::Parameter GetMetric(const std::string& name, const std::map<std::string, IE::Parameter>& options) const override;
    IE::Parameter GetConfig(const std::string& name, const std::map<std::string, IE::Parameter> & options) const override;
    void SetConfig(const ConfigType& config) override;
 private:
    std::vector<AutoPlugin::DeviceInformation> GetDeviceChoice(const ConfigType&  config) const;
 protected:
    ConfigType GetSupportedConfig(const ConfigType& config, const AutoPlugin::DeviceName & deviceName) const;
 };
 }  // namespace AutoPlugin
--- a/inference-engine/src/cldnn_engine/CMakeLists.txt
+++ b/inference-engine/src/cldnn_engine/CMakeLists.txt
@ -40,8 +40,6 @@ target_include_directories(${TARGET_NAME} PRIVATE
 set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
 set_ie_threading_interface_for(clDNN_lib)
 # Failed because of OpenCL
 # ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@ -70,6 +70,7 @@
 #include <low_precision/pull_reshape_through_dequantization.hpp>
 #include <low_precision/pull_transpose_through_dequantization.hpp>
 #include <low_precision/transformer.hpp>
 #include <low_precision/convolution_backprop_data.hpp>
 #include <low_precision/mat_mul.hpp>
 #include <low_precision/strided_slice.hpp>
 #include <low_precision/network_helper.hpp>
@ -175,7 +176,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
            manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
            manager.register_pass<ngraph::pass::ConvertGather0D>();
-            std::vector<std::pair<ngraph::element::Type, ngraph::element::Type>> convert_precision_list {
+            static const precisions_array convert_precision_list {
                    {ngraph::element::i64, ngraph::element::i32},
                    {ngraph::element::u64, ngraph::element::i32},
                    {ngraph::element::u16, ngraph::element::i32},
@ -185,9 +186,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
                    {ngraph::element::u4, ngraph::element::u8},
            };
-            for (auto& precision : convert_precision_list) {
+            manager.register_pass<ngraph::pass::ConvertPrecision>(convert_precision_list);
                manager.register_pass<ngraph::pass::ConvertPrecision>(precision.first, precision.second);
            }
            auto pass_config = manager.get_pass_config();
@ -366,7 +365,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
            // Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers
            // With this key users can work-around such issues
            if (!config.enable_fp16_for_quantized_models) {
-                manager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::f16, ngraph::element::f32);
+                manager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }});
            }
            auto lptPrerequisites = manager.register_pass<ngraph::pass::GraphRewrite>();
            const std::vector<ngraph::element::Type> supportedTypes = { ngraph::element::i8, ngraph::element::u8 };
@ -383,6 +382,9 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
                .add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
                    .setSupportAsymmetricQuantization(false)
                    .setSupport3DTensorOnActivations(false))
                .add<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>(LayerTransformation::Params(params)
                    .setSupportAsymmetricQuantization(false)
                    .setDeconvolutionSpecificChannelsRatio(true))
                // INT8 StridedSlice not supported
                .remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());
--- a/inference-engine/src/cldnn_engine/cldnn_program.h
+++ b/inference-engine/src/cldnn_engine/cldnn_program.h
@ -90,7 +90,8 @@ public:
    std::map<std::string, InferenceEngine::SizeVector> outputDims;
    std::map<std::string, cldnn::layout> inputLayouts;
-    std::map<const char *, cldnn::primitive_id> blobMemCache;
+    using BlobCacheKey = std::pair<const char*, std::vector<size_t>>;
    std::map<BlobCacheKey, cldnn::primitive_id> blobMemCache;
    int m_max_batch;
    int m_curBatch;
--- a/inference-engine/src/cldnn_engine/ops/constant.cpp
+++ b/inference-engine/src/cldnn_engine/ops/constant.cpp
@ -163,7 +163,8 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
    cldnn::primitive_id constPrimID;
    auto data = op->get_data_ptr<char>();
-    auto bufIter = p.blobMemCache.find(data);
+
    auto bufIter = p.blobMemCache.find(std::make_pair(data, constDims));
    if (bufIter != p.blobMemCache.end()) {
        constPrimID = bufIter->second;
@ -198,7 +199,7 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
            std::memcpy(&buf[0], &data[0], bufSize);
        }
        p.AddPrimitive(cldnn::data(initialconstPrimID, mem));
-        p.blobMemCache[data] = initialconstPrimID;
+        p.blobMemCache[std::make_pair(data, constDims)] = initialconstPrimID;
        constPrimID = initialconstPrimID;
    }
--- a/inference-engine/src/cldnn_engine/ops/matmul.cpp
+++ b/inference-engine/src/cldnn_engine/ops/matmul.cpp
@ -60,8 +60,7 @@ void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& o
    auto shape_a = op->get_input_shape(0);
    auto shape_b = op->get_input_shape(1);
-    bool is_fc = ngraph::is_type<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1)) ||
+    bool is_fc = IsNodeOnConstPath(op->get_input_node_shared_ptr(1));
                 ngraph::is_type<ngraph::op::v0::FakeQuantize>(op->get_input_node_shared_ptr(1));
    is_fc &= std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) { return x != 1; }) <= 2;
    if (is_fc) {
--- a/inference-engine/src/cldnn_engine/ops/parameter.cpp
+++ b/inference-engine/src/cldnn_engine/ops/parameter.cpp
@ -154,7 +154,7 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
        auto data = static_cast<const char *>(meanBlobPtr->buffer());
-        auto bufIter = p.blobMemCache.find(data);
+        auto bufIter = p.blobMemCache.find(std::make_pair(data, meanDims));
        if (bufIter != p.blobMemCache.end()) {
            meanBlobID = bufIter->second;
        } else {
@ -166,7 +166,7 @@ void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Paramet
            std::memcpy(&buf[0], &data[0], bufSize);
            p.AddPrimitive(cldnn::data(meanBlobID, mem));
-            p.blobMemCache[data] = meanBlobID;
+            p.blobMemCache[std::make_pair(data, meanDims)] = meanBlobID;
        }
        break;
    }
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@ -14,6 +14,7 @@
 #include <legacy/ie_layers.h>
 #include "gna_upstream_iterator.hpp"
 #include "layers/gna_layer_info.hpp"
 #include "layers/gna_convolution_layer.hpp"
 #include "gna_plugin_log.hpp"
 #include "gna_slope_scale.h"
 #include "runtime/pwl.h"
@ -834,6 +835,7 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
                    THROW_GNA_EXCEPTION << "Two Input layers " << (*sourceLayerIt)->name
                        << " and " << (*nextInputIt)->name << " have different scales in concat!!! \n";
                }
                ++nextInputIt;
            }
        }
@ -1107,8 +1109,9 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
            double weights_reducer = 1.0;
            auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
            if (conv) {
-                auto channels_num = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
+                const auto inDepth = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
-                weights_reducer = MAX_VAL_2B_FEAT * scaleRange * channels_num / std::numeric_limits<int32_t>::max();
+                weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv);
                weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
                weights_reducer = std::max(1.0, weights_reducer);
            }
            quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer);
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -30,6 +30,7 @@
 #include "frontend/model_quantizer.hpp"
 #include "layers/layers_builder.hpp"
 #include "layers/gna_concat_layer.hpp"
 #include "layers/gna_convolution_layer.hpp"
 #include "layers/gna_crop_layer.hpp"
 #include "layers/gna_fake_quantize_layer.hpp"
 #include "round_float_define.hpp"
@ -265,7 +266,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
    }
    // Map 2d convolution to 1d if it's possible
-    if (in_height > 1 && in_width > 1 && in_width == convolution._kernel_x && convolution._stride_x == 1) {
+    if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, convolution._kernel_x, convolution._stride_x)) {
        in_width *= in_height;
        in_height = 1;
        out_width *= out_height;
@ -298,9 +299,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
        dnn->new_num_conv_columns = 0;
    }
-    // TODO: refine following condition
+    if (GNAConvolutionLayer::isConv2D(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) ||
    if (((in_channels > 1) && (in_height > 1) && (in_width > 1)) || // 3D input
        (convolution._kernel_x != 1 && convolution._kernel_y != 1) || // 2D kernel
        in_height != 1) {
        // TensorFlow default layout is NHWC
        // OpenVino Default layout is   NCHW
--- a/inference-engine/src/gna_plugin/gna_plugin.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.hpp
@ -110,6 +110,8 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
    InferenceEngine::IExecutableNetworkInternal::Ptr LoadNetwork(const InferenceEngine::CNNNetwork &network,
                                  const std::map<std::string, std::string> &config_map,
                                  InferenceEngine::RemoteContext::Ptr context) override { THROW_GNA_EXCEPTION << "Not implemented"; }
    InferenceEngine::ExecutableNetwork LoadNetwork(const std::string &modelPath,
                                  const std::map<std::string, std::string> &config_map) override { THROW_GNA_EXCEPTION << "Not implemented"; }
    bool Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &result);
    void SetCore(InferenceEngine::ICore*) noexcept override {}
    InferenceEngine::ICore* GetCore() const noexcept override {return nullptr;}
--- a/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_convolution_layer.hpp
@ -0,0 +1,49 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <algorithm>
 #include <cmath>
 #include <utility>
 #include <vector>
 #include <legacy/ie_layers.h>
 #include "../gna_graph_tools.hpp"
 namespace GNAPluginNS {
 struct GNAConvolutionLayer {
    static bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
        return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
    }
    // 3D input or 2D kernel
    static bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
                     const uint32_t kernelHeight, const uint32_t kernelWidth) {
        return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
    }
    static double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
        using KRT = std::pair<uint32_t, double>;
        // Empirically determined weights reducers for 2D Convolution
        // i.e.:
        // for kernelSize >= 9       -> 1.3
        // for kernelSize in {7, 8}  -> 1.2
        const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
        auto reducer = 1.0;
        const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
        const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
        const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
        if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
             !isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
            const auto kernelSize = conv._kernel_x * conv._kernel_y;
            auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
                [](const KRT& l, const KRT::first_type& r) {return l.first > r; });
            if (r != reducers.end())
                reducer = r->second;
        }
        return reducer;
    }
 };
 }  // namespace GNAPluginNS
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@ -8,11 +8,6 @@ if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
    find_package(TBBBIND_2_4 QUIET)
    if (TBBBIND_2_4_FOUND)
        message(STATUS "Static tbbbind_2_4 package was found")
        # WA: need to update TBBBind_2_4 package
        set_target_properties(TBBbind::tbbbind_2_4 PROPERTIES
            MAP_IMPORTED_CONFIG_MINSIZEREL Release
            MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release)
    endif()
 endif()
@ -32,6 +27,9 @@ set(LEGACY_LIBRARY_SHARED_SRCS
    "${LEGACY_SRC_ROOT}/ngraph_ops/nms_ie.cpp"
    "${LEGACY_SRC_ROOT}/ngraph_ops/onehot_ie.cpp")
 set_source_files_properties(${LEGACY_LIBRARY_SHARED_SRCS} PROPERTIES
    COMPILE_DEFINITIONS "USE_STATIC_IE")
 set(IE_STATIC_DEPENDENT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cpp)
 list(REMOVE_ITEM LIBRARY_SRC ${IE_STATIC_DEPENDENT_FILES})
@ -203,7 +201,6 @@ if(WIN32)
 endif()
 target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES}
                                               inference_engine_snippets
                                               inference_engine_transformations pugixml)
 target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE)
--- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp
+++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp
@ -94,6 +94,28 @@ void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph::
    }
 }
 void CNNNetworkNGraphImpl::validateFunctionNames() const {
    // nGraph function parameters and pre-Results operations should have unique names
    std::unordered_set<std::string> unique_names;
    for (const auto& param : _ngraph_function->get_parameters()) {
        if (unique_names.count(param->get_friendly_name())) {
            IE_THROW() << "Function contains several inputs with one friendly name!";
        }
        unique_names.insert(param->get_friendly_name());
    }
    for (const auto& result : _ngraph_function->get_results()) {
        const auto& parent = result->get_input_node_shared_ptr(0);
        auto name = parent->get_friendly_name();
        if (parent->get_output_size() > 1) {
            name += "." + std::to_string(result->get_input_source_output(0).get_index());
        }
        if (unique_names.count(name) && !ngraph::op::is_parameter(parent)) {
            IE_THROW() << "Function contains several inputs and outputs with one friendly name!";
        }
        unique_names.insert(name);
    }
 }
 CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(
    const std::shared_ptr<Function>& nGraph,
    const std::vector<IExtensionPtr>& exts)
@ -113,6 +135,8 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(
        network.setInputInfo(info);
    };
    validateFunctionNames();
    reshape();
    for (const auto& layer : _ngraph_function->get_parameters()) {
        std::string outName = layer->get_friendly_name();
@ -148,6 +172,7 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const CNNNetwork& network) {
    }
    _ngraph_function = copyFunction(network.getFunction(), false);
    validateFunctionNames();
    InputsDataMap inputs = network.getInputsInfo();
    OutputsDataMap outputs = network.getOutputsInfo();
@ -231,6 +256,13 @@ StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName, size_t
                auto result = make_shared<::ngraph::op::Result>(layer->output(outputIndex));
                result->set_friendly_name(outputName);
                _ngraph_function->add_results({result});
                // Check that we cannot add Result to layer with non unique friendly name
                try {
                    validateFunctionNames();
                } catch (...) {
                    _ngraph_function->remove_result(result);
                    throw;
                }
                if (_outputData.count(outputName) == 0) {
                    reshape();
--- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp
+++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp
@ -105,6 +105,7 @@ private:
     */
    void reshape();
    void reshape(const std::map<std::string, std::vector<size_t>>& inputShapes);
    void validateFunctionNames() const;
 };
 }  // namespace details
 }  // namespace InferenceEngine
--- a/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
+++ b/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
@ -74,11 +74,11 @@ InferRequest::Ptr ExecutableNetwork::CreateInferRequestPtr() {
 }
 void ExecutableNetwork::Export(const std::string& modelFileName) {
-    EXEC_NET_CALL_STATEMENT(return _impl->Export(modelFileName));
+    EXEC_NET_CALL_STATEMENT(_impl->Export(modelFileName));
 }
 void ExecutableNetwork::Export(std::ostream& networkModel) {
-    EXEC_NET_CALL_STATEMENT(return _impl->Export(networkModel));
+    EXEC_NET_CALL_STATEMENT(_impl->Export(networkModel));
 }
 CNNNetwork ExecutableNetwork::GetExecGraphInfo() {
--- a/inference-engine/src/inference_engine/cpp/ie_variable_state.cpp
+++ b/inference-engine/src/inference_engine/cpp/ie_variable_state.cpp
@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "details/ie_so_loader.h"
 #include "cpp/ie_memory_state.hpp"
 #include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
 #include "exception2status.hpp"
@ -19,7 +20,7 @@
 namespace InferenceEngine {
 VariableState::VariableState(const std::shared_ptr<IVariableStateInternal>&      impl,
-                             const details::SharedObjectLoader::Ptr& so) : _impl(impl), _so(so) {
+                             const std::shared_ptr<details::SharedObjectLoader>& so) : _impl(impl), _so(so) {
    if (impl == nullptr) {
        IE_THROW(NotAllocated) << "VariableState wrapper was not initialized.";
    }
--- a/inference-engine/src/inference_engine/ie_core.cpp
+++ b/inference-engine/src/inference_engine/ie_core.cpp
@ -51,6 +51,9 @@ Parsed<T> parseDeviceNameIntoConfig(const std::string& deviceName, const std::ma
        deviceName_ = "MULTI";
        config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6);
    } else {
        if (deviceName_.empty()) {
            deviceName_ = "AUTO";
        }
        DeviceIDParser parser(deviceName_);
        deviceName_ = parser.getDeviceName();
        std::string deviceIDLocal = parser.getDeviceID();
@ -493,9 +496,8 @@ public:
        return res;
    }
    // TODO: In future this method can be added to ICore interface
    ExecutableNetwork LoadNetwork(const std::string& modelPath, const std::string& deviceName,
-                                  const std::map<std::string, std::string>& config) {
+                                  const std::map<std::string, std::string>& config) override {
        OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "Core::LoadNetwork::Path");
        auto parsed = parseDeviceNameIntoConfig(deviceName, config);
        auto plugin = GetCPPPluginByName(parsed._deviceName);
@ -511,6 +513,8 @@ public:
                auto cnnNetwork = ReadNetwork(modelPath, std::string());
                res = LoadNetworkImpl(cnnNetwork, plugin, parsed._config, nullptr, hash, modelPath);
            }
        } else if (cacheManager) {
            res = plugin.LoadNetwork(modelPath, parsed._config);
        } else {
            auto cnnNetwork = ReadNetwork(modelPath, std::string());
            res = LoadNetworkImpl(cnnNetwork, plugin, parsed._config, nullptr, {}, modelPath);
@ -582,6 +586,15 @@ public:
            }
        }
        // AUTO case
        {
            if (deviceName.find("AUTO:") == 0) {
                IE_THROW()
                    << "You can get specific metrics with the GetMetric only for the AUTO itself (without devices). "
                       "To get individual devices's metrics call GetMetric for each device separately";
            }
        }
        auto parsed = parseDeviceNameIntoConfig(deviceName);
        // we need to return a copy of Parameter object which is created on Core side,
@ -752,7 +765,7 @@ public:
     * @brief Sets config values for a plugin or set of plugins
     * @param deviceName A device name to set config to
     *        If empty, config is set for all the plugins / plugin's meta-data
-     * @note  `deviceName` is not allowed in form of MULTI:CPU, HETERO:FPGA,CPU
+     * @note  `deviceName` is not allowed in form of MULTI:CPU, HETERO:FPGA,CPU, AUTO:CPU
     *        just simple forms like CPU, GPU, MULTU, GPU.0, etc
     */
    void SetConfigForPlugins(const std::map<std::string, std::string>& configMap, const std::string& deviceName) {
@ -908,6 +921,10 @@ RemoteContext::Ptr Core::CreateContext(const std::string& deviceName, const Para
    if (deviceName.find("MULTI") == 0) {
        IE_THROW() << "MULTI device does not support remote context";
    }
    if (deviceName.find("AUTO") == 0) {
        IE_THROW() << "AUTO device does not support remote context";
    }
    auto parsed = parseDeviceNameIntoConfig(deviceName, params);
    return _impl->GetCPPPluginByName(parsed._deviceName).CreateContext(parsed._config);
@ -920,6 +937,9 @@ RemoteContext::Ptr Core::GetDefaultContext(const std::string& deviceName) {
    if (deviceName.find("MULTI") == 0) {
        IE_THROW() << "MULTI device does not support remote context";
    }
    if (deviceName.find("AUTO") == 0) {
        IE_THROW() << "AUTO device does not support remote context";
    }
    auto parsed = parseDeviceNameIntoConfig(deviceName, ParamMap());
    return _impl->GetCPPPluginByName(parsed._deviceName).GetDefaultContext(parsed._config);
@ -934,6 +954,10 @@ void Core::AddExtension(IExtensionPtr extension, const std::string& deviceName_)
        IE_THROW()
            << "MULTI device does not support extensions. Please, set extensions directly to fallback devices";
    }
    if (deviceName_.find("AUTO") == 0) {
        IE_THROW()
            << "AUTO device does not support extensions. Please, set extensions directly to fallback devices";
    }
    _impl->AddExtension(extension);
 }
@ -953,6 +977,9 @@ ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const st
    if (deviceName.find("MULTI") == 0) {
        IE_THROW() << "MULTI device does not support ImportNetwork";
    }
    if (deviceName.find("AUTO") == 0) {
        IE_THROW() << "AUTO device does not support ImportNetwork";
    }
    auto parsed = parseDeviceNameIntoConfig(deviceName, config);
    return _impl->GetCPPPluginByName(parsed._deviceName).ImportNetwork(modelFileName, parsed._config);
@ -998,6 +1025,12 @@ void Core::SetConfig(const std::map<std::string, std::string>& config, const std
                                "You can configure the devices with SetConfig before creating the MULTI on top.";
    }
    // AUTO case
    if (deviceName.find("AUTO:") == 0) {
        IE_THROW() << "SetConfig is supported only for AUTO itself (without devices). "
                               "You can configure the devices with SetConfig before creating the AUTO on top.";
    }
    // GPU.0, FPGA.1 cases
    if (deviceName.find(".") != std::string::npos) {
        IE_THROW() << "SetConfig is supported only for device family itself (without particular device .#). "
@ -1029,6 +1062,14 @@ Parameter Core::GetConfig(const std::string& deviceName, const std::string& name
                   "GetConfig is also possible for the individual devices before creating the MULTI on top.";
        }
    }
    // AUTO case
    {
        if (deviceName.find("AUTO:") == 0) {
            IE_THROW()
                << "You can only GetConfig of the AUTO itself (without devices). "
                   "GetConfig is also possible for the individual devices before creating the AUTO on top.";
      }
  }
    auto parsed = parseDeviceNameIntoConfig(deviceName);
--- a/inference-engine/src/inference_engine/ie_plugin_cpp.hpp
+++ b/inference-engine/src/inference_engine/ie_plugin_cpp.hpp
@ -88,6 +88,10 @@ public:
        PLUGIN_CALL_STATEMENT(return ExecutableNetwork(actual->LoadNetwork(network, config, context), actual));
    }
    ExecutableNetwork LoadNetwork(const std::string& modelPath, const std::map<std::string, std::string>& config) {
        PLUGIN_CALL_STATEMENT(return actual->LoadNetwork(modelPath, config));
    }
    QueryNetworkResult QueryNetwork(const CNNNetwork& network,
                                    const std::map<std::string, std::string>& config) const {
        QueryNetworkResult res;
--- a/inference-engine/src/inference_engine/precision_utils.cpp
+++ b/inference-engine/src/inference_engine/precision_utils.cpp
@ -41,7 +41,7 @@ inline float asfloat(uint32_t v) {
    return f;
 }
-// Function to convert F32 into F16
+// Function to convert F16 into F32
 float f16tof32(ie_fp16 x) {
    // this is storage for output result
    uint32_t u = static_cast<uint32_t>(x);
--- a/inference-engine/src/legacy_api/CMakeLists.txt
+++ b/inference-engine/src/legacy_api/CMakeLists.txt
@ -40,7 +40,6 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE
    ${PUBLIC_HEADERS_DIR}
    ${CMAKE_CURRENT_SOURCE_DIR}/src
    ${IE_MAIN_SOURCE_DIR}/src/inference_engine # For CNNNetworkNGraphImpl
    $<TARGET_PROPERTY:inference_engine_snippets,INTERFACE_INCLUDE_DIRECTORIES>
    $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
    $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
    $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_INCLUDE_DIRECTORIES>
@ -61,7 +60,7 @@ add_library(${TARGET_NAME} SHARED
 ie_add_vs_version_file(NAME ${TARGET_NAME}
                       FILEDESCRIPTION "Inference Engine Legacy library")
-target_link_libraries(${TARGET_NAME} PUBLIC inference_engine inference_engine_snippets
+target_link_libraries(${TARGET_NAME} PUBLIC inference_engine
                                     PRIVATE pugixml openvino::itt
                                             ${NGRAPH_LIBRARIES} inference_engine_transformations)
--- a/inference-engine/src/legacy_api/include/legacy/ngraph_ops/lstm_sequence_ie.hpp
+++ b/inference-engine/src/legacy_api/include/legacy/ngraph_ops/lstm_sequence_ie.hpp
@ -24,7 +24,7 @@ public:
    LSTMSequenceIE(const Output <Node> &X,
                   const Output <Node> &H_t,
                   const Output <Node> &C_t,
-                   const Output <Node> &seq_lenghts,
+                   const Output <Node> &seq_lengths,
                   const Output <Node> &WR,
                   const Output <Node> &B,
                   size_t hidden_size,
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@ -39,7 +39,6 @@
 #include "legacy/ngraph_ops/rnn_sequence_ie.hpp"
 #include "legacy/ngraph_ops/lstm_sequence_ie.hpp"
 #include "legacy/ngraph_ops/gru_sequence_ie.hpp"
 #include "snippets/op/subgraph.hpp"
 #include "exec_graph_info.hpp"
 #include "caseless.hpp"
@ -1979,15 +1978,6 @@ void convertFunctionToICNNNetwork(const std::shared_ptr<const ::ngraph::Function
            cnnLayer->params[ExecGraphInfoSerialization::ORIGINAL_NAMES] = originalNames;
        }
        if (auto subgraph = ::ngraph::as_type_ptr<ngraph::snippets::op::Subgraph>(layer)) {
            std::string names = "";
            for (const auto& op : subgraph->get_body()->get_ordered_ops()) {
                names += ", " + op->get_friendly_name();
            }
            cnnLayer->params["originalLayersNames"] += names;
        }
        std::string primitivesPriority = ::ngraph::getPrimitivesPriority(layer);
        if (!primitivesPriority.empty()) {
            cnnLayer->params["PrimitivesPriority"] = primitivesPriority;
--- a/inference-engine/src/legacy_api/src/ngraph_ops/gru_sequence_ie.cpp
+++ b/inference-engine/src/legacy_api/src/ngraph_ops/gru_sequence_ie.cpp
@ -16,7 +16,7 @@ NGRAPH_RTTI_DEFINITION(op::GRUSequenceIE, "GRUSequenceIE", 4);
 op::GRUSequenceIE::GRUSequenceIE(const Output<Node>& X,
                                 const Output<Node>& H_t,
-                                 const Output<Node>& seq_lenghts,
+                                 const Output<Node>& seq_lengths,
                                 const Output<Node>& WR,
                                 const Output<Node>& B,
                                 std::size_t hidden_size,
@ -27,7 +27,7 @@ op::GRUSequenceIE::GRUSequenceIE(const Output<Node>& X,
                                 float clip,
                                 bool linear_before_reset,
                                 int64_t seq_axis)
-        : RNNCellBase({X, H_t, seq_lenghts, WR, B}, hidden_size, clip, activations, activations_alpha, activations_beta),
+        : RNNCellBase({X, H_t, seq_lengths, WR, B}, hidden_size, clip, activations, activations_alpha, activations_beta),
          m_direction(direction),
          m_linear_before_reset(linear_before_reset),
          m_seq_axis(seq_axis) {
@ -50,7 +50,7 @@ void op::GRUSequenceIE::validate_and_infer_types() {
    auto b_pshape = get_input_partial_shape(4);
    std::vector<ngraph::PartialShape> pshapes = {x_pshape, h_state_pshape, seq_lengths_pshape, wr_pshape, b_pshape};
-    std::vector<std::string> in_names = {"X", "H", "seq_lenghts", "WR", "B"};
+    std::vector<std::string> in_names = {"X", "H", "seq_lengths", "WR", "B"};
    // num_direction dimension should be squeezed, we don't support bidirectional case
    std::vector<size_t> ranks = {3, 2, 1, 2, 1};
    for (size_t i = 0; i < pshapes.size(); ++i) {
--- a/inference-engine/src/legacy_api/src/ngraph_ops/lstm_sequence_ie.cpp
+++ b/inference-engine/src/legacy_api/src/ngraph_ops/lstm_sequence_ie.cpp
@ -17,7 +17,7 @@ NGRAPH_RTTI_DEFINITION(op::LSTMSequenceIE, "LSTMSequenceIE", 5);
 op::LSTMSequenceIE::LSTMSequenceIE(const Output<Node> &X,
                                   const Output<Node> &H_t,
                                   const Output<Node> &C_t,
-                                   const Output<Node> &seq_lenghts,
+                                   const Output<Node> &seq_lengths,
                                   const Output<Node> &WR,
                                   const Output<Node> &B,
                                   std::size_t hidden_size,
@ -27,7 +27,7 @@ op::LSTMSequenceIE::LSTMSequenceIE(const Output<Node> &X,
                                   const std::vector<float> &activations_beta,
                                   float clip,
                                   int64_t seq_axis)
-        : RNNCellBase({X, H_t, C_t, seq_lenghts, WR, B}, hidden_size, clip, activations, activations_alpha, activations_beta),
+        : RNNCellBase({X, H_t, C_t, seq_lengths, WR, B}, hidden_size, clip, activations, activations_alpha, activations_beta),
          m_direction(direction),
          m_seq_axis(seq_axis) {
    constructor_validate_and_infer_types();
@ -52,7 +52,7 @@ void op::LSTMSequenceIE::validate_and_infer_types() {
    std::vector<ngraph::PartialShape> pshapes = {x_pshape, h_state_pshape, c_state_pshape,
                                                 seq_lengths_pshape, wr_pshape, b_pshape};
-    std::vector<std::string> in_names = {"X", "H", "C", "seq_lenghts", "WR", "B"};
+    std::vector<std::string> in_names = {"X", "H", "C", "seq_lengths", "WR", "B"};
    // num_direction dimension should be squeezed, we don't support bidirectional case
    std::vector<size_t> ranks = {3, 2, 2, 1, 2, 1};
    for (size_t i = 0; i < pshapes.size(); ++i) {
--- a/inference-engine/src/legacy_api/src/ngraph_ops/rnn_sequence_ie.cpp
+++ b/inference-engine/src/legacy_api/src/ngraph_ops/rnn_sequence_ie.cpp
@ -48,7 +48,7 @@ void op::RNNSequenceIE::validate_and_infer_types() {
    auto b_pshape = get_input_partial_shape(4);
    std::vector<ngraph::PartialShape> pshapes = {x_pshape, h_state_pshape, seq_lengths_pshape, wr_pshape, b_pshape};
-    std::vector<std::string> in_names = {"X", "H", "seq_lenghts", "WR", "B"};
+    std::vector<std::string> in_names = {"X", "H", "seq_lengths", "WR", "B"};
    // num_direction dimension should be squeezed, we don't support bidirectional case
    std::vector<size_t> ranks = {3, 2, 1, 2, 1};
    for (size_t i = 0; i < pshapes.size(); ++i) {
--- a/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_mul_or_add_finally.cpp
+++ b/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/convert_mul_or_add_finally.cpp
@ -140,6 +140,12 @@ ngraph::matcher_pass_callback get_callback() {
            }
            const ngraph::Shape constShape = constant->get_output_shape(0);
            const ngraph::Shape shape = partialShape.to_shape();
            if (constShape.size() == 1ul && constShape[0] != 1 && constShape[0] != shape[1]) {
                return false;
            }
            if ((constShape.size() > 5ul)) {
                return false;
            }
@ -148,7 +154,6 @@ ngraph::matcher_pass_callback get_callback() {
                return true;
            }
            const ngraph::Shape shape = partialShape.to_shape();
            if (constShape.size() == shape.size()) {
                if ((constShape[0] != 1ul) || (constShape[1] != shape[1])) {
                    return false;
--- a/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp
@ -0,0 +1,27 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <memory>
 #include <utility>
 #include <transformations_visibility.hpp>
 #include <ngraph/pass/graph_rewrite.hpp>
 namespace ngraph {
 namespace pass {
 namespace low_precision {
 class TRANSFORMATIONS_API ConvertSubtractConstant;
 }  // namespace low_precision
 }  // namespace pass
 }  // namespace ngraph
 class ngraph::pass::low_precision::ConvertSubtractConstant : public ngraph::pass::MatcherPass {
 public:
    NGRAPH_RTTI_DECLARATION;
    ConvertSubtractConstant(const std::vector<ngraph::element::Type>& constantPrecisions = {});
 };
--- a/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp
@ -0,0 +1,25 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <ngraph/ngraph.hpp>
 #include "weightable_layer_transformation.hpp"
 namespace ngraph {
 namespace pass {
 namespace low_precision {
 class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
 public:
    ConvolutionBackpropDataTransformation(const Params& params);
    void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
    bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
    bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
 };
 } // namespace low_precision
 } // namespace pass
 } // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp
@ -45,6 +45,13 @@ class TRANSFORMATIONS_API DataPrecision {
 public:
    DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
    explicit DataPrecision(const element::Type& precision) {
        this->precision = precision;
        min = getMinValue(precision, 256);
        max = getMaxValue(precision, 256);
        hasZeroPoint = false;
    }
    DataPrecision(const element::Type precision, const float min, const float max, const bool hasZeroPoint) :
            precision(precision),
            min(min),
@ -70,6 +77,10 @@ public:
            return -1.0e15f;
        } else if (precision == element::f32) {
            return std::numeric_limits<float>::lowest();
        } else if (precision == element::i4) {
            return -8.f;
        } else if (precision == element::u4) {
            return 0.f;
        } else {
            NGRAPH_CHECK(false, "unexpected precision ", precision);
        }
@ -88,6 +99,10 @@ public:
            return 1.0e15f;
        } else if (precision == element::f32) {
            return std::numeric_limits<float>::max();
        } else if (precision == element::i4) {
            return 7.f;
        } else if (precision == element::u4) {
            return 15.f;
        } else {
            THROW_TRANSFORMATION_EXCEPTION << "unexpected precision " << precision;
        }
@ -114,29 +129,6 @@ public:
    static element::Type getPrecision(const size_t /* quantizationLevels */, const bool signedInterval) {
        return signedInterval ? element::i8 : element::u8;
    }
    static float getMin(const size_t quantizationLevels, const bool signedInterval) {
        if (quantizationLevels == 255) {
            return signedInterval  ? -127.0f : 0.0f;
        } else if (quantizationLevels == 256) {
            return signedInterval ? -128.0f : 0.0f;
        } else {
            // THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
            // FIXME: not completed
            return signedInterval ? -128.0f : 0.0f;
        }
    }
    static float getMax(const size_t quantizationLevels, const bool signedInterval) {
        if ((quantizationLevels == 255) || (quantizationLevels == 256)) {
            return signedInterval ? 127.0f : 255.0f;
        } else {
            // THROW_TRANSFORMATION_EXCEPTION << "quantization level " << quantizationLevels << " is not supported";
            // FIXME: not completed
            // return quantizationLevels - 1.0;
            return signedInterval ? 127.0f : 255.0f;
        }
    }
 };
 inline bool operator==(const DataPrecision& value1, const DataPrecision& value2) {
@ -173,7 +165,8 @@ public:
                std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
                std::vector<element::Type> precisionsOnWeights = { element::i8 },
                element::Type deqPrecision = element::f32,
-                bool support3DTensorOnActivations = true) :
+                bool support3DTensorOnActivations = true,
                bool deconvolutionSpecificChannelsRatio = false) :
                updatePrecisions(updatePrecisions),
                quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
                quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
@ -181,7 +174,8 @@ public:
                precisionsOnActivations(precisionsOnActivations),
                precisionsOnWeights(precisionsOnWeights),
                deqPrecision(deqPrecision),
-                support3DTensorOnActivations(support3DTensorOnActivations) {
+                support3DTensorOnActivations(support3DTensorOnActivations),
                deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) {
            if (precisionsOnActivations.size() == 0ul) {
                THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
            }
@ -226,6 +220,11 @@ public:
            return *this;
        }
        Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) {
            this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio;
            return *this;
        }
        bool updatePrecisions;
        QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
        QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
@ -234,6 +233,7 @@ public:
        std::vector<element::Type> precisionsOnWeights;
        element::Type deqPrecision;
        bool support3DTensorOnActivations;
        bool deconvolutionSpecificChannelsRatio;
    };
    class PrecisionDetails {
@ -310,6 +310,7 @@ protected:
    std::vector<element::Type> precisionsOnWeights;
    element::Type deqPrecision;
    bool support3DTensorOnActivations;
    bool deconvolutionSpecificChannelsRatio;
    // absolute value, used to determine quantization interval asymmetry
    float quantizationIntervalAsymmetryThreshold;
--- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
@ -109,7 +109,8 @@ public:
        const float max,
        const bool hasZeroPoint,
        const bool updatePrecision,
-        const element::Type deqPrecision = element::f32);
+        const element::Type deqPrecision = element::f32,
        const size_t outChannelsShapeIndex = 0);
    static std::shared_ptr<opset1::FakeQuantize> updateFakeQuantize(
        std::shared_ptr<opset1::FakeQuantize> fq,
@ -183,7 +184,7 @@ public:
    static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node);
    static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq);
-    static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues);
+    static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, int outChannelsShapeIndex = 0);
    static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace = false);
@ -191,8 +192,16 @@ public:
    static std::shared_ptr<opset1::FakeQuantize> fuseConvert(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize);
    static std::vector<element::Type> precisionIntersection(
            const std::vector<element::Type>& v1,
            const std::vector<element::Type>& v2) noexcept;
 private:
-    static std::shared_ptr<Node> foldFakeQuantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues, const bool roundValuesWasSet);
+    static std::shared_ptr<Node> foldFakeQuantize(
            const std::shared_ptr<opset1::FakeQuantize>& fq,
            const bool roundValues,
            const bool roundValuesWasSet,
            int outChannelsShapeIndex = 0);
    // 1  - on weights
    // 0  - weightable layer was not found
@ -255,6 +264,8 @@ std::shared_ptr<Node> fold(Args&&... args) {
    return node;
 }
 std::shared_ptr<Node> foldConvert(const Output<Node>& node, const element::Type targetPrecision);
 template <typename T, typename... Args>
 std::shared_ptr<Node> fold_reshape(Args&&... args) {
    std::shared_ptr<Node> node = std::make_shared<T>(std::forward<Args>(args)...);
--- a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp
@ -303,10 +303,6 @@ private:
        std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations,
        GraphRewrite& pass,
        TransformationContext& context);
    std::vector<element::Type> precisionIntersection(
        const std::vector<element::Type>& v1,
        const std::vector<element::Type>& v2) const noexcept;
 };
 class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite {
--- a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp
@ -22,7 +22,7 @@ public:
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 protected:
-    void decomposeFakeQuantizeForWeightsPath(std::shared_ptr<Node> weightableLayer) const;
+    void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
    static bool isGroup(const std::shared_ptr<Node>& node);
    static bool isDepthwise(const std::shared_ptr<Node>& node);
--- a/inference-engine/src/low_precision_transformations/src/add.cpp
+++ b/inference-engine/src/low_precision_transformations/src/add.cpp
@ -42,6 +42,7 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
    const auto parent = add->get_input_node_shared_ptr(dataBranchIndex);
    if (is_type<opset1::Convolution>(parent) ||
        is_type<opset1::GroupConvolution>(parent) ||
        is_type<opset1::ConvolutionBackpropData>(parent) ||
        (is_type<opset1::MatMul>(parent) &&
        (is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) {
        return nullptr;
--- a/inference-engine/src/low_precision_transformations/src/concat.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat.cpp
@ -50,14 +50,14 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
        return false;
    }
-    DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
+    std::vector<element::Type> concatParentsChildrensPrecisions = precisionsOnActivations;
-    if (dataPrecision.precision == ngraph::element::undefined) {
+    fillAvailablePrecisions(subgraph.quantizationLayers[0], concatParentsChildrensPrecisions);
    if (concatParentsChildrensPrecisions.empty()) {
        return false;
    }
    std::unordered_map<std::string, ngraph::pass::low_precision::FakeQuantizeDequantization> dequantizations;
    for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
-        const std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
+        fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
        if (fq == nullptr) {
            return false;
        }
@ -72,21 +72,20 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
        if (quantizationDetails.inputHighValues.size() != 1ul) {
            return false;
        }
        std::vector<element::Type> fqChildrensPrecisions = precisionsOnActivations;
        fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrensPrecisions);
        concatParentsChildrensPrecisions = NetworkHelper::precisionIntersection(concatParentsChildrensPrecisions, fqChildrensPrecisions);
-        const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false);
+        if (concatParentsChildrensPrecisions.empty()) {
        if (dataPrecision2.precision == ngraph::element::undefined) {
            return false;
        }
        if (dataPrecision.precision != dataPrecision2.precision) {
            // quantization levels are the same, difference can be in sign
            // wider interval (precision) is preferable: use signed if least one interval is signed
            dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
        }
    }
-    if (dataPrecision.precision == ngraph::element::undefined) {
+    DataPrecision dataPrecision;
-        return false;
+    if (std::find(concatParentsChildrensPrecisions.begin(), concatParentsChildrensPrecisions.end(), element::i8) != concatParentsChildrensPrecisions.end()) {
        dataPrecision = DataPrecision(element::i8);
    } else {
        dataPrecision = DataPrecision(concatParentsChildrensPrecisions[0]);
    }
    std::vector<QuantizationDetails> quantizationLayersDetails;
--- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp
@ -27,7 +27,9 @@ bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector<std::sh
    for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) {
        const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat);
        for (const std::shared_ptr<ngraph::Node>& child : children) {
-            if (is_type<ngraph::opset1::Convolution>(child.get())) {
+            if ((is_type<ngraph::opset1::Convolution>(child.get()) ||
                is_type<ngraph::opset1::ConvolutionBackpropData>(child.get())) &&
                this->layerTransformationsManager->isQuantized(child)) {
                return false;
            }
        }
--- a/inference-engine/src/low_precision_transformations/src/convert_subtract_constant.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convert_subtract_constant.cpp
@ -0,0 +1,98 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "low_precision/convert_subtract_constant.hpp"
 #include <memory>
 #include <vector>
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
 #include "low_precision/network_helper.hpp"
 using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConvertSubtractConstant, "ConvertSubtractConstant", 0);
 // Original (FP16 as example, I8 in constantPrecisions):
 //
 //   Constant
 //     | I8
 //   Convert     Constant
 //      \ FP16   / FP16
 //       Subtract    Constant
 //         \ FP16   / FP16
 //          Multiply
 //
 // Result:
 //
 //   Constant    Constant
 //     | I8      | I8
 //   Convert     Convert
 //      \ FP16   / FP16
 //       Subtract    Constant
 //         \ FP16   / FP16
 //          Multiply
 //
 ngraph::pass::low_precision::ConvertSubtractConstant::ConvertSubtractConstant(const std::vector<ngraph::element::Type>& constantPrecisions) {
    auto weightsConstantWrapper = ngraph::pattern::wrap_type<opset1::Constant>(pattern::consumers_count(1));
    auto weightsConvertWrapper = ngraph::pattern::wrap_type<opset1::Convert>({ weightsConstantWrapper }, pattern::consumers_count(1));
    auto subtractConstantWrapper = ngraph::pattern::wrap_type<opset1::Constant>(pattern::consumers_count(1));
    auto subtractWrapper = ngraph::pattern::wrap_type<opset1::Subtract>({ weightsConvertWrapper, subtractConstantWrapper }, pattern::consumers_count(1));
    auto multiplyConstantWrapper = ngraph::pattern::wrap_type<opset1::Constant>(pattern::consumers_count(1));
    auto multiplyWrapper = ngraph::pattern::wrap_type<opset1::Multiply>({ subtractWrapper, multiplyConstantWrapper }, pattern::consumers_count(1));
    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher & m) -> bool {
        const auto& opsMap = m.get_pattern_value_map();
        const auto weightsConvert = opsMap.at(weightsConvertWrapper).get_node_shared_ptr();
        const auto quantizePrecision = weightsConvert->get_input_element_type(0);
        const auto dequantizationPrecision = weightsConvert->get_output_element_type(0);
        // validation by Convert operation input precisions
        if (!constantPrecisions.empty()) {
            const ngraph::element::Type inputPrecision = quantizePrecision;
            if (std::find(constantPrecisions.begin(), constantPrecisions.end(), inputPrecision) == constantPrecisions.end()) {
                return false;
            }
        }
        const auto subtract = opsMap.at(subtractWrapper).get_node_shared_ptr();
        if (!NetworkHelper::checkZeroPoint(subtract)) {
            return false;
        }
        const auto subtractConstant = opsMap.at(subtractConstantWrapper).get_node_shared_ptr();
        auto resultSubtractConstant = NetworkHelper::round(subtractConstant, quantizePrecision);
        if (NetworkHelper::isScalarLike(resultSubtractConstant)) {
            resultSubtractConstant = NetworkHelper::toScalar(resultSubtractConstant);
            if (op::util::constantIsEqualTo(resultSubtractConstant, 0.f)) {
                resultSubtractConstant = nullptr;
            }
        }
        if (resultSubtractConstant == nullptr) {
            const auto multiply = opsMap.at(multiplyWrapper).get_node_shared_ptr();
            const auto newMultiply = std::make_shared<opset1::Multiply>(weightsConvert, opsMap.at(multiplyConstantWrapper).get_node_shared_ptr());
            NetworkHelper::copyInfo(multiply, newMultiply);
            replace_node(multiply, newMultiply);
        } else {
            NetworkHelper::copyInfo(subtractConstant, resultSubtractConstant);
            const auto resultConvert = std::make_shared<opset1::Convert>(resultSubtractConstant, dequantizationPrecision);
            NetworkHelper::copyInfo(subtractConstant, resultConvert);
            resultConvert->set_friendly_name(subtractConstant->get_friendly_name() + "/Convert");
            auto& rtInfo = resultConvert->get_rt_info();
            rtInfo["DISABLED_CONSTANT_FOLDING"] = std::make_shared<VariantWrapper<std::string>>("");
            const auto newSubtract = std::make_shared<opset1::Subtract>(opsMap.at(weightsConvertWrapper).get_node_shared_ptr(), resultConvert);
            NetworkHelper::copyInfo(subtract, newSubtract);
            replace_node(subtract, newSubtract);
        }
        return true;
    };
    auto m = std::make_shared<ngraph::pattern::Matcher>(multiplyWrapper, "ConvertSubtractConstant");
    this->register_matcher(m, callback);
 }
--- a/inference-engine/src/low_precision_transformations/src/convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp
@ -42,7 +42,27 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
    auto convolution = m.get_match_root();
    if (!canConvolutionBeTransformed(context, convolution)) {
-        return false;
+        auto weightInput = convolution->get_input_node_shared_ptr(1);
        std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
        FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
                                                    NetworkHelper::getDequantization(convolution, 1ul) :
                                                    NetworkHelper::getDequantization(reshapeFromWeights);
        if (dequantization.empty()) {
            const auto fqOnWeights = getFakeQuantizeOnWeights(convolution);
            std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
            if (reshapeFromWeights != nullptr) {
                resultConstant = fold_reshape<opset1::Reshape>(
                        resultConstant,
                        reshapeFromWeights->input_value(1),
                        false);
            }
            if (as_type_ptr<opset1::Constant>(resultConstant)) {
                replace_node(weightInput, resultConstant);
            }
        } else {
            NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
        }
        return true;
    }
    convolution = NetworkHelper::separateInStandaloneBranch(convolution);
@ -79,6 +99,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
            const auto newSubtract = as_type_ptr<opset1::Subtract>(subtract->clone_with_new_inputs({
                subtract->input_value(0).get_node_shared_ptr(),
                newShift }));
            NetworkHelper::copyInfo(subtract, newSubtract);
            replace_node(subtract, newSubtract);
            newSubtract->set_output_type(0, subtract->get_output_element_type(0), newSubtract->get_output_partial_shape(0));
@ -203,7 +224,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
                        reshapeFromWeights :
                        multiplyFromWeights->input_value(0)
                    }),
-                fold<opset1::Convert>(
+                foldConvert(
                    fold_reshape<opset1::Reshape>(
                        multiplyFromWeights->input_value(1),
                        std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
@ -230,6 +251,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
                auto zeroPointConstant = fold<opset1::Broadcast>(
                    subtractFromWeights->get_input_node_shared_ptr(1),
                    std::make_shared<opset1::Constant>(element::i32, Shape{ zeroPointShape.size() }, zeroPointShape));
                NetworkHelper::copyInfo(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
                replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
            }
        }
--- a/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
@ -0,0 +1,218 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "low_precision/convolution_backprop_data.hpp"
 #include <algorithm>
 #include <memory>
 #include <string>
 #include <vector>
 #include <cassert>
 #include "low_precision/network_helper.hpp"
 #include "low_precision/common/dequantization_op.hpp"
 namespace ngraph {
 namespace pass {
 namespace low_precision {
 ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) {
 }
 void ConvolutionBackpropDataTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
    addPattern(
            pass,
            context,
            make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>() }));
    addPattern(
            pass,
            context,
            make_op_pattern<opset1::ConvolutionBackpropData>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>() }));
    addPattern(
            pass,
            context,
            make_op_pattern<opset1::ConvolutionBackpropData>(
                    { make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>(), make_op_label<opset1::Constant>() }));
    addPattern(
            pass,
            context,
            make_op_pattern<opset1::ConvolutionBackpropData>(
                    { make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>(), make_op_label<opset1::Constant>() }));
 }
 bool ConvolutionBackpropDataTransformation::isQuantized(std::shared_ptr<Node> layer) const noexcept {
    if (deconvolutionSpecificChannelsRatio) {
        size_t inputChannels = layer->get_input_shape(0)[1];
        size_t outputChannels = layer->get_output_shape(0)[1];
        if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
            return false;
        }
    }
    return WeightableLayerTransformation::isQuantized(layer, false);
 }
 bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const {
    auto convolutionBackpropData = m.get_match_root();
    if (!canBeTransformed(context, convolutionBackpropData)) {
        auto weightsInput = convolutionBackpropData->get_input_node_shared_ptr(1);
        std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightsInput);
        FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
                         NetworkHelper::getDequantization(convolutionBackpropData, 1ul) :
                         NetworkHelper::getDequantization(reshapeFromWeights);
        if (dequantization.empty()) {
            const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData);
            std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
            if (reshapeFromWeights != nullptr) {
                resultConstant = fold_reshape<opset1::Reshape>(
                        resultConstant,
                        reshapeFromWeights->input_value(1),
                        false);
            }
            if (as_type_ptr<opset1::Constant>(resultConstant)) {
                replace_node(weightsInput, resultConstant);
            }
        } else {
            NetworkHelper::foldDequantization(dequantization.multiply, 0, true);
        }
        return true;
    }
    convolutionBackpropData = NetworkHelper::separateInStandaloneBranch(convolutionBackpropData);
    FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
    {
        if (dequantization.subtract != nullptr) {
            std::shared_ptr<ngraph::Node> layer = dequantization.subtract;
            ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
            NetworkHelper::optimizeSubtract(dequantization.subtract);
        }
        std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(dequantization.multiplyConstant);
        std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>(
                reducedConstant->get_output_element_type(0),
                Shape{ 1 },
                reducedConstant->cast_vector<float>()[0]);
        auto inputs = convolutionBackpropData->input_values();
        inputs[0] = dequantization.multiply->input_value(0);
        const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs);
        const auto relaxedConvolutionBackpropData = std::make_shared<op::TypeRelaxed<opset1::ConvolutionBackpropData>>(
            *as_type_ptr<opset1::ConvolutionBackpropData>(copyNode),
            std::vector<element::Type>{deqPrecision, deqPrecision},
            std::vector<element::Type>{deqPrecision});
        const auto newMultiplyAfter = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
            std::vector<element::Type>{ deqPrecision, deqPrecision },
            std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
            ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
            ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
        replace_node(convolutionBackpropData, newMultiplyAfter);
        convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
        inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
        if (is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
            auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs);
            replace_node(convolutionBackpropData, newConvolution);
            convolutionBackpropData = newConvolution;
        }
    }
    {
        decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
        dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul);
        if (is_type<opset1::FakeQuantize>(dequantization.data.get_node())) {
            const std::shared_ptr<opset1::FakeQuantize> fq = as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
            std::shared_ptr<ngraph::Node> newFQ = NetworkHelper::fold_fake_quantize(fq, true);
            NetworkHelper::copyInfo(fq, newFQ);
            replace_node(fq, newFQ);
        }
        std::shared_ptr<opset1::Multiply> multiplyFromWeights = as_type_ptr<opset1::Multiply>(
                convolutionBackpropData->input_value(1).get_node_shared_ptr());
        std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
        {
            Shape newScaleShape = multiplyFromWeights->get_input_shape(1);
            auto inputs = convolutionBackpropData->input_values();
            inputs[1] = multiplyFromWeights->input_value(0);
            auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
                convolutionBackpropData->copy_with_new_inputs(inputs),
                foldConvert(
                    fold_reshape<opset1::Reshape>(
                        multiplyFromWeights->input_value(1),
                        std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
                        false),
                    convolutionBackpropData->get_output_element_type(0)));
            replace_node(convolutionBackpropData, newMultiplyAfter);
            convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
        }
        if (subtractFromWeights != nullptr) {
            // optimize zero point on weights
            auto optimizedSubtract = NetworkHelper::optimizeSubtract(subtractFromWeights);
            if (optimizedSubtract == nullptr) {
                subtractFromWeights = nullptr;
            } else {
                subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
                const Shape weightsShape = subtractFromWeights->input(0).get_shape();
                Shape zeroPointShape(weightsShape.size(), 1ul);
                zeroPointShape[1] = weightsShape[1];
                auto zeroPointConstant = fold<opset1::Broadcast>(
                        subtractFromWeights->get_input_node_shared_ptr(1),
                        std::make_shared<opset1::Constant>(element::i32, Shape{zeroPointShape.size()}, zeroPointShape));
                replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
            }
        }
        std::shared_ptr<opset1::Convert> convertFromWeights =
                as_type_ptr<opset1::Convert>(
                    subtractFromWeights == nullptr ?
                        multiplyFromWeights->get_input_node_shared_ptr(0) :
                        subtractFromWeights->get_input_node_shared_ptr(0));
        if (convertFromWeights != nullptr) {
            auto inputs = convolutionBackpropData->input_values();
            inputs[1] = convolutionBackpropData->get_input_node_ptr(1)->input_value(0);
            // remove Convert on weights
            auto newConvolution = convolutionBackpropData->clone_with_new_inputs(inputs);
            replace_node(convolutionBackpropData, newConvolution);
            convolutionBackpropData = newConvolution;
        }
    }
    std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
            convolutionBackpropData->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
    ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
    updateOutput(context, finalDequantization, convolutionBackpropData);
    auto onWeights = convolutionBackpropData->get_input_node_shared_ptr(1);
    if (is_type<opset1::Reshape>(onWeights)) {
        onWeights = onWeights->get_input_node_shared_ptr(0);
    }
    if (is_type<opset1::Subtract>(onWeights)) {
        auto& rt = onWeights->get_rt_info();
        rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared<ngraph::VariantWrapper<std::string>>("");
    }
    return true;
 }
 bool ConvolutionBackpropDataTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const {
    if (deconvolutionSpecificChannelsRatio) {
        size_t inputChannels = op->get_input_shape(0)[1];
        size_t outputChannels = op->get_output_shape(0)[1];
        if (inputChannels % 4 != 0 || outputChannels % 16 != 0) {
            return false;
        }
    }
    return canConvolutionBeTransformed(context, op);
 }
 } // namespace low_precision
 } // namespace pass
 } // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
@ -20,7 +20,7 @@ void FakeQuantizeTransformation::registerMatcherIn(GraphRewrite& pass, Transform
 bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
    std::shared_ptr<opset1::FakeQuantize> layer = std::dynamic_pointer_cast<opset1::FakeQuantize>(m.get_match_root());
-    if (!NetworkHelper::isQuantizeSupported(layer)) {
+    if (!QuantizationDetails::outputLayoutIsSupported(layer)) {
        return false;
    }
@ -114,15 +114,14 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
    const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize) const {
    const std::shared_ptr<Node> eltwise = fakeQuantize->get_input_node_shared_ptr(0);
-    std::shared_ptr<Node> inputLowConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(1), deqPrecision);
+    std::shared_ptr<Node> inputLowConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(1), deqPrecision);
-    std::shared_ptr<Node> inputHighConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(2), deqPrecision);
+    std::shared_ptr<Node> inputHighConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(2), deqPrecision);
    std::shared_ptr<opset1::Constant> constant = fq::getConstant(eltwise);
    if (is_type<opset1::Multiply>(eltwise) && checkElementwise(eltwise)) {
        const auto value = constant->get_output_element_type(0) == deqPrecision ?
            constant :
-            fold<opset1::Convert>(constant, deqPrecision);
+            foldConvert(constant, deqPrecision);
        const auto valueVec = as_type_ptr<opset1::Constant>(value)->cast_vector<float>();
@ -144,19 +143,21 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
    } else if (is_type<opset1::Subtract>(eltwise) && checkElementwise(eltwise)) {
        const auto value = constant->get_output_element_type(0) == deqPrecision ?
            constant :
-            fold<opset1::Convert>(constant, deqPrecision);
+            foldConvert(constant, deqPrecision);
        inputLowConst_f32 = fq::updateShape(fold<opset1::Add>(inputLowConst_f32, value), fakeQuantize->get_output_shape(0));
        inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0));
    } else if (is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) {
        if (is_type<opset1::Convolution>(fq::getData(eltwise)) ||
-            is_type<opset1::GroupConvolution>(fq::getData(eltwise))) {
+            is_type<opset1::GroupConvolution>(fq::getData(eltwise)) ||
            is_type<opset1::ConvolutionBackpropData>(fq::getData(eltwise)) ||
            is_type<opset1::GroupConvolutionBackpropData>(fq::getData(eltwise))) {
            return nullptr;
        }
        const auto value = constant->get_output_element_type(0) == deqPrecision ?
            constant :
-            fold<opset1::Convert>(constant, deqPrecision);
+            foldConvert(constant, deqPrecision);
        inputLowConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputLowConst_f32, value), fakeQuantize->get_output_shape(0));
        inputHighConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputHighConst_f32, value), fakeQuantize->get_output_shape(0));
@ -176,8 +177,8 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
        data->output(outputIdx),
        inputLowConst_f32,
        inputHighConst_f32,
-        fold<opset1::Convert>(fakeQuantize->input_value(3), deqPrecision),
+        foldConvert(fakeQuantize->input_value(3), deqPrecision),
-        fold<opset1::Convert>(fakeQuantize->input_value(4), deqPrecision) }));
+        foldConvert(fakeQuantize->input_value(4), deqPrecision) }));
    replace_node(fakeQuantize, newFakeQuantize);
    ngraph::copy_runtime_info({ fakeQuantize, eltwise }, newFakeQuantize);
--- a/inference-engine/src/low_precision_transformations/src/fold_convert.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fold_convert.cpp
@ -22,18 +22,31 @@ bool FoldConvertTransformation::transform(TransformationContext& context, ngraph
        return false;
    }
-    const auto convert = subtract->get_input_node_shared_ptr(1);
+    auto foldConvert = [&](const size_t branch) {
-    const auto resultConstant = fold<opset1::Convert>(convert->get_input_node_shared_ptr(0), convert->output(0).get_element_type());
+        const auto convert = subtract->get_input_node_shared_ptr(branch);
        if (!is_type<opset1::Convert>(convert) || !is_type<opset1::Constant>(convert->get_input_node_shared_ptr(0))) {
            return;
        }
        const auto resultConstant = ngraph::pass::low_precision::foldConvert(convert->get_input_node_shared_ptr(0), convert->output(0).get_element_type());
        assert(is_type<opset1::Constant>(resultConstant));
        replace_node(convert, resultConstant);
        updateOutput(context, resultConstant, convert);
    };
    foldConvert(0ul);
    foldConvert(1ul);
    return true;
 }
 bool FoldConvertTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
    return
-        is_type<opset1::Convert>(operation->get_input_node_ptr(1)) &&
+        (is_type<opset1::Convert>(operation->get_input_node_ptr(1)) &&
-        is_type<opset1::Constant>(operation->get_input_node_ptr(1)->get_input_node_ptr(0));
+        is_type<opset1::Constant>(operation->get_input_node_ptr(1)->get_input_node_ptr(0))) ||
        (is_type<opset1::Convert>(operation->get_input_node_ptr(0)) &&
        is_type<opset1::Constant>(operation->get_input_node_ptr(0)->get_input_node_ptr(0)));
 }
 bool FoldConvertTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
--- a/inference-engine/src/low_precision_transformations/src/fuse_convert.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fuse_convert.cpp
@ -60,7 +60,7 @@ bool FuseConvertTransformation::transform(TransformationContext& context, ngraph
    std::shared_ptr<Node> parent = convert->get_input_node_shared_ptr(0);
    if (is_type<opset1::Constant>(parent)) {
-        auto convertedConstant = fold<opset1::Convert>(parent, convert->get_convert_element_type());
+        auto convertedConstant = foldConvert(parent, convert->get_convert_element_type());
        NetworkHelper::copyInfo(parent, convertedConstant);
        replace_node(convert, convertedConstant);
    } else {
--- a/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp
@ -102,21 +102,21 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
    if (is_type<opset1::Multiply>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
        const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
            constant :
-            fold<opset1::Convert>(constant, eltwise->get_output_element_type(0));
+            foldConvert(constant, eltwise->get_output_element_type(0));
        inputLowConst = fuse_fq::updateShape(fold<opset1::Divide>(inputLowConst, value), fakeQuantize->get_output_shape(0));
        inputHightConst = fuse_fq::updateShape(fold<opset1::Divide>(inputHightConst, value), fakeQuantize->get_output_shape(0));
    } else if (is_type<opset1::Divide>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
        const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
            constant :
-            fold<opset1::Convert>(constant, eltwise->get_output_element_type(0));
+            foldConvert(constant, eltwise->get_output_element_type(0));
        inputLowConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputLowConst, value), fakeQuantize->get_output_shape(0));
        inputHightConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputHightConst, value), fakeQuantize->get_output_shape(0));
    } else if (is_type<opset1::Subtract>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
        const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
            constant :
-            fold<opset1::Convert>(constant, eltwise->get_output_element_type(0));
+            foldConvert(constant, eltwise->get_output_element_type(0));
        inputLowConst = fuse_fq::updateShape(fold<opset1::Add>(inputLowConst, value), fakeQuantize->get_output_shape(0));
        inputHightConst = fuse_fq::updateShape(fold<opset1::Add>(inputHightConst, value), fakeQuantize->get_output_shape(0));
@ -128,7 +128,7 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
        const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
            constant :
-            fold<opset1::Convert>(constant, eltwise->get_output_element_type(0));
+            foldConvert(constant, eltwise->get_output_element_type(0));
        inputLowConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputLowConst, value), fakeQuantize->get_output_shape(0));
        inputHightConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputHightConst, value), fakeQuantize->get_output_shape(0));
--- a/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp
@ -32,12 +32,12 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
    const auto multiplyConstant = multiply->get_input_node_shared_ptr(1);
-    auto outputLowConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(3), deqPrecision);
+    auto outputLowConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(3), deqPrecision);
-    auto outputHighConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(4), deqPrecision);
+    auto outputHighConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(4), deqPrecision);
    const auto value = multiplyConstant->get_output_element_type(0) == element::f32 ?
        multiplyConstant :
-        fold<opset1::Convert>(multiplyConstant, deqPrecision);
+        foldConvert(multiplyConstant, deqPrecision);
    outputLowConst_f32 = fold<opset1::Multiply>(outputLowConst_f32, value);
    outputHighConst_f32 = fold<opset1::Multiply>(outputHighConst_f32, value);
@ -45,11 +45,18 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
    const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
    const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
    const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
    const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
    auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
        opset1::FakeQuantize(
            fakeQuantizeParent->output(parentIndex),
-            fold<opset1::Convert>(fakeQuantize->input_value(1), deqPrecision),
+            inputLow,
-            fold<opset1::Convert>(fakeQuantize->input_value(2), deqPrecision),
+            inputHigh,
            outputLowConst_f32,
            outputHighConst_f32,
            fakeQuantize->get_levels()),
--- a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp
@ -32,12 +32,12 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
    const auto subtractConstant = subtract->get_input_node_shared_ptr(1);
-    auto outputLowConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(3), deqPrecision);
+    auto outputLowConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(3), deqPrecision);
-    auto outputHighConst_f32 = fold<opset1::Convert>(fakeQuantize->get_input_node_shared_ptr(4), deqPrecision);
+    auto outputHighConst_f32 = foldConvert(fakeQuantize->get_input_node_shared_ptr(4), deqPrecision);
    const auto value = subtractConstant->get_output_element_type(0) == element::f32 ?
        subtractConstant :
-        fold<opset1::Convert>(subtractConstant, deqPrecision);
+        foldConvert(subtractConstant, deqPrecision);
    outputLowConst_f32 = fold<opset1::Subtract>(outputLowConst_f32, value);
    outputHighConst_f32 = fold<opset1::Subtract>(outputHighConst_f32, value);
@ -45,11 +45,18 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
    const auto fakeQuantizeParent = fakeQuantize->get_input_node_shared_ptr(0);
    const size_t parentIndex = NetworkHelper::getParentOutputIndex(fakeQuantizeParent, fakeQuantize);
    const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
    const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(1), inputLow);
    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(2), inputHigh);
    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
    NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
    auto newFakeQuantize = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
        opset1::FakeQuantize(
            fakeQuantizeParent->output(parentIndex),
-            fold<opset1::Convert>(fakeQuantize->input_value(1), deqPrecision),
+            inputLow,
-            fold<opset1::Convert>(fakeQuantize->input_value(2), deqPrecision),
+            inputHigh,
            outputLowConst_f32,
            outputHighConst_f32,
            fakeQuantize->get_levels()),
@ -76,7 +83,8 @@ bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const Transforma
    for (const auto& target : children) {
        const auto convolution = is_type<opset1::Convolution>(target.get_node());
        const auto groupConvolution = is_type<opset1::GroupConvolution>(target.get_node());
-        if (convolution || groupConvolution) {
+        const auto convolutionBackpropData = is_type<opset1::ConvolutionBackpropData>(target.get_node());
        if (convolution || groupConvolution || convolutionBackpropData) {
            return false;
        }
    }
--- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp
@ -32,6 +32,7 @@ LayerTransformation::LayerTransformation(const Params& params) :
    precisionsOnWeights(params.precisionsOnWeights),
    deqPrecision(params.deqPrecision),
    support3DTensorOnActivations(params.support3DTensorOnActivations),
    deconvolutionSpecificChannelsRatio(params.deconvolutionSpecificChannelsRatio),
    quantizationIntervalAsymmetryThreshold(0.002f),
    zeroThreshold(1.e-6f),
    minQuantizationLevels(2ul),
--- a/inference-engine/src/low_precision_transformations/src/mat_mul.cpp
+++ b/inference-engine/src/low_precision_transformations/src/mat_mul.cpp
@ -80,7 +80,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
        // multiply by weights: [1, ..., 1, Y] x [Y, Z] => [1, ..., 1, Z]
        const auto newSubConst = NetworkHelper::toScalarIfPossible(fold<opset1::MatMul>(
            broadcastedConst,
-            fold<opset1::Convert>(newMatMul->get_input_node_shared_ptr(1), newMatMul->get_element_type()),
+            foldConvert(newMatMul->get_input_node_shared_ptr(1), newMatMul->get_element_type()),
            newMatMul->get_transpose_a(),
            newMatMul->get_transpose_b()));
@ -128,7 +128,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
    const auto newMulConst = NetworkHelper::toScalarIfPossible(fold<ngraph::opset1::Multiply>(
            mulConst1,
-            fold<opset1::Convert>(mulConst2, element::f32)));
+            foldConvert(mulConst2, element::f32)));
    const auto newMultiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
        std::vector<element::Type>{ deqPrecision, deqPrecision },
--- a/inference-engine/src/low_precision_transformations/src/multiply.cpp
+++ b/inference-engine/src/low_precision_transformations/src/multiply.cpp
@ -74,8 +74,8 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
            ngraph::op::TemporaryReplaceOutputType(multiplyParentParent, element::f32).get(),
            ngraph::op::TemporaryReplaceOutputType(
                fold<opset1::Multiply>(
-                    fold<opset1::Convert>(multiplyParentConst, element::f32),
+                    foldConvert(multiplyParentConst, element::f32),
-                    fold<opset1::Convert>(constParent, element::f32)),
+                    foldConvert(constParent, element::f32)),
                element::f32).get());
        NetworkHelper::copyInfo(multiplyParent.get_node_shared_ptr(), newMultiply);
--- a/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp
@ -91,7 +91,7 @@ bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext&
    if (dequantization.subtract != nullptr) {
        lastNode = std::make_shared<opset1::Add>(
            convolution,
-            fold<opset1::Negative>(fold<opset1::Convert>(dequantization.subtractConstant, element::f32)));
+            fold<opset1::Negative>(foldConvert(dequantization.subtractConstant, element::f32)));
        lastNode->set_friendly_name(convolution->get_friendly_name() + "/Add");
    }
--- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp
+++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
@ -69,7 +69,8 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr<Node>& op) {
        return is_type<opset1::Parameter>(node) ||
            is_type<opset1::Convolution>(node) ||
            is_type<opset1::GroupConvolution>(node) ||
-            is_type<opset1::MatMul>(node);
+            is_type<opset1::MatMul>(node) ||
            is_type<opset1::ConvolutionBackpropData>(node);
    };
    if (isNotConstantPathOperation(op)) {
@ -262,11 +263,11 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
                aBroadcasted ? b->get_output_shape(0) : a->get_output_shape(0),
                bDivAValues);
    } else {
-        b = fold<opset1::Convert>(b, element::f32);
+        b = foldConvert(b, element::f32);
-        a = fold<opset1::Convert>(a, element::f32);
+        a = foldConvert(a, element::f32);
        bDivA = fold<opset1::Divide>(b, a);
        // TODO: issue #49868
-        bDivA = fold<opset1::Convert>(bDivA, a->get_output_element_type(0));
+        bDivA = foldConvert(bDivA, a->get_output_element_type(0));
    }
    OutputVector inputs{ {}, {} };
@ -440,8 +441,11 @@ std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<op
    return foldFakeQuantize(fq, false, false);
 }
-std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues) {
+std::shared_ptr<Node> NetworkHelper::fold_fake_quantize(
-    return foldFakeQuantize(fq, roundValues, true);
+        const std::shared_ptr<opset1::FakeQuantize>& fq,
        const bool roundValues,
        const int outChannelsShapeIndex) {
    return foldFakeQuantize(fq, roundValues, true, outChannelsShapeIndex);
 }
 FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_ptr<Node>& node, const size_t branchIndex, const bool inPlace) {
@ -451,7 +455,7 @@ FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_p
    }
    if (dequantization.convert != nullptr) {
-        const std::shared_ptr<Node> result = fold<opset1::Convert>(dequantization.data, dequantization.convert->get_element_type());
+        const std::shared_ptr<Node> result = foldConvert(dequantization.data, dequantization.convert->get_element_type());
        if (is_type<opset1::Constant>(result)) {
            if (inPlace) {
                copyInfo(dequantization.convert, result);
@ -467,7 +471,7 @@ FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_p
        }
        if (dequantization.subtractConvert != nullptr) {
-            const auto convertionResult = fold<opset1::Convert>(
+            const auto convertionResult = foldConvert(
                dequantization.subtractConstant,
                dequantization.subtractConvert->get_element_type());
            if (is_type<opset1::Constant>(convertionResult)) {
@ -502,7 +506,7 @@ FakeQuantizeDequantization NetworkHelper::foldDequantization(const std::shared_p
            return dequantization;
        }
        if (dequantization.multiply->get_output_element_type(0) != result->get_element_type()) {
-            result = fold<opset1::Convert>(result, dequantization.multiply->get_output_element_type(0));
+            result = foldConvert(result, dequantization.multiply->get_output_element_type(0));
        }
        if (inPlace) {
            copyInfo(dequantization.multiply, result);
@ -591,7 +595,8 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::fuseConvert(const std::shar
 std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
    const std::shared_ptr<opset1::FakeQuantize>& fq,
    const bool roundValuesArg,
-    const bool roundValuesWasSet) {
+    const bool roundValuesWasSet,
    const int outChannelsShapeIndex) {
    if (is_type<opset1::Constant>(fq->get_input_node_shared_ptr(0)) &&
        is_type<opset1::Constant>(fq->get_input_node_shared_ptr(1)) &&
        is_type<opset1::Constant>(fq->get_input_node_shared_ptr(2)) &&
@ -609,16 +614,16 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
        if (type1.is_real() && !type2.is_real()) {
            return fold<opset1::Add>(
                fq->input_value(0),
-                fold<opset1::Convert>(fq->input_value(3), type1));
+                foldConvert(fq->input_value(3), type1));
        }
        if (!type1.is_real() && type2.is_real()) {
            return fold<opset1::Add>(
-                fold<opset1::Convert>(fq->input_value(0), type2),
+                foldConvert(fq->input_value(0), type2),
                fq->input_value(3));
        }
        return fold<opset1::Add>(
-            fold<opset1::Convert>(fq->input_value(0), element::f32),
+            foldConvert(fq->input_value(0), element::f32),
-            fold<opset1::Convert>(fq->input_value(3), element::f32));
+            foldConvert(fq->input_value(3), element::f32));
    }
    auto constant = as_type_ptr<opset1::Constant>(fq->get_input_node_shared_ptr(0));
@ -630,10 +635,20 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
        if (constShape.empty() || constShape.size() > 5lu) {
            THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
        }
        if (outChannelsShapeIndex != 0 && outChannelsShapeIndex != 1) {
            THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected outChannelsShapeIndex " << outChannelsShapeIndex;
        }
-        // OIDHW
+        size_t OC;
-        const size_t OC = constShape[0];
+        size_t IC;
-        const size_t IC = constShape.size() > 1lu ? constShape[1] : 1;
+        // OIDHW or IODHW
        if (constShape.size() == 1) {
            OC = constShape[0];
            IC = 1;
        } else {
            OC = constShape[outChannelsShapeIndex];
            IC = constShape[outChannelsShapeIndex == 0 ? 1 : 0];
        }
        const size_t D = constShape.size() > 4lu ? constShape[constShape.size() - 3] : 1;
        const size_t H = constShape.size() > 2lu ? constShape.size() == 3lu ? constShape[2] : constShape[constShape.size() - 2] : 1;
        const size_t W = constShape.size() > 3lu ? constShape[constShape.size() - 1] : 1;
@ -667,20 +682,25 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
        auto levels_1 = fq->get_levels() - 1.f;
-        //const size_t DHW = D * H * W;
+        const size_t DHW = D * H * W;
        const size_t IDHW = IC * D * H * W;
        const auto values = constant->cast_vector<float>();
        std::vector<float> quantizedValues(OC * IC * D * H * W);
        for (size_t oc = 0; oc < OC; ++oc) {
            for (size_t iidx = 0; iidx < IDHW; ++iidx) {
            const float inputLow = inputLowValues[isInputLowBroadcasted ? 0 : oc];
            const float inputHigh = inputHighValues[isInputHighBroadcasted ? 0 : oc];
            const float outputLow = outputLowValues[isOutputLowBroadcasted ? 0 : oc];
            const float outputHigh = outputHighValues[isOutputHighBroadcasted ? 0 : oc];
-
+            for (size_t ic = 0; ic < IC; ++ic) {
-                const size_t idx = oc * IDHW + iidx;
+                for (size_t iidx = 0; iidx < DHW; ++iidx) {
                    size_t idx;
                    if (outChannelsShapeIndex == 0) {
                        idx = oc * IDHW + ic * DHW + iidx;
                    } else {
                        idx = ic * IDHW + oc * DHW + iidx;
                    }
                    if (values[idx] <= inputLow) {
                        quantizedValues[idx] = roundValues ? std::roundf(outputLow) : outputLow;
@ -693,6 +713,7 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
                    }
                }
            }
        }
        return std::make_shared<opset1::Constant>(fq->get_output_element_type(0), constShape, quantizedValues);
    }
@ -755,7 +776,7 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::composeFakeQuantize(const s
    if (dequantization.subtract != nullptr) {
        const auto subtractValue = (dequantization.subtractConvert == nullptr) ?
            dequantization.subtractConstant :
-            fold<opset1::Convert>(dequantization.subtractConstant, dequantization.subtractConvert->output(0).get_element_type());
+            foldConvert(dequantization.subtractConstant, dequantization.subtractConvert->output(0).get_element_type());
        const std::shared_ptr<opset1::FakeQuantize> replacement = std::make_shared<op::TypeRelaxed<opset1::FakeQuantize>>(
            newFakeQuantize->input_value(0),
@ -782,11 +803,11 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::composeFakeQuantize(const s
            assert((precision2.is_real() == precision1.is_real()) && (precision2.bitwidth() >= precision1.bitwidth()));
            auto output = fold<opset1::Multiply>(
-                precision2 != precision1 ? fold<opset1::Convert>(value1, precision2) : value1,
+                precision2 != precision1 ? foldConvert(value1, precision2) : value1,
                value2);
            if (output->output(0).get_element_type() != precision1) {
-                output = fold<opset1::Convert>(output, precision1);
+                output = foldConvert(output, precision1);
            }
            return output;
@ -818,7 +839,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
    const float max,
    const bool hasZeroPoint,
    const bool updatePrecision,
-    const element::Type deqPrecision) {
+    const element::Type deqPrecision,
    const size_t outChannelsShapeIndex) {
    using std::make_shared;
    const auto outputLow = fq->input_value(3);
@ -898,7 +920,8 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
            newMax->output(0),
            fq->get_levels(),
            fq->get_auto_broadcast()),
-        true);
+        true,
        outChannelsShapeIndex);
    NetworkHelper::copyInfo(fq, newFQ);
    std::shared_ptr<ngraph::Node> convert2;
@ -907,7 +930,7 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
        std::shared_ptr<opset1::Constant> newFqConstant = as_type_ptr<opset1::Constant>(newFQ);
        if (is_type<opset1::Constant>(newFQ)) {
-            convert = fold<opset1::Convert>(newFQ, precision);
+            convert = foldConvert(newFQ, precision);
        } else if (is_type<opset1::FakeQuantize>(newFQ)) {
            newFQ = setOutDataPrecision(as_type_ptr<opset1::FakeQuantize>(newFQ), precision);
            convert = newFQ;
@ -1032,13 +1055,13 @@ FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize(
    // TODO: threshold values have to used here to avoid shifts
-    const std::shared_ptr<opset1::Constant> scale = as_type_ptr<opset1::Constant>(fold<opset1::Convert>(fold<opset1::Divide>(
+    const std::shared_ptr<opset1::Constant> scale = as_type_ptr<opset1::Constant>(foldConvert(fold<opset1::Divide>(
        fold<opset1::Subtract>(outputHigh, outputLow),
        fold<opset1::Subtract>(newMax, newMin)), deqPrecision));
    assert(scale != nullptr);
    std::shared_ptr<opset1::Constant> shift = hasZeroPoint ?
-        as_type_ptr<opset1::Constant>(fold<opset1::Convert>(fold<opset1::Divide>(
+        as_type_ptr<opset1::Constant>(foldConvert(fold<opset1::Divide>(
            fold<opset1::Subtract>(fold<opset1::Multiply>(newMin, outputHigh), fold<opset1::Multiply>(newMax, outputLow)),
            fold<opset1::Subtract>(outputHigh, outputLow)), deqPrecision)) :
        nullptr;
@ -1298,7 +1321,7 @@ FakeQuantizeDequantizationValues NetworkHelper::createEmptyValues(const FakeQuan
    std::shared_ptr<Node> subtract1Const = dequantization.subtract ?
        (dequantization.subtractConvert == nullptr ?
            dequantization.subtractConstant->clone_with_new_inputs({}) :
-            fold<opset1::Convert>(dequantization.subtractConstant, dequantization.subtractConvert->get_element_type())) :
+            foldConvert(dequantization.subtractConstant, dequantization.subtractConvert->get_element_type())) :
        std::make_shared<opset1::Constant>(parent->get_output_element_type(0), Shape({}), std::vector<float>({ 0.f }));
    subtract1Const->set_output_type(0, multiply1Const->get_output_element_type(0), subtract1Const->get_output_partial_shape(0));
@ -1357,6 +1380,8 @@ std::shared_ptr<Node> NetworkHelper::optimizeSubtract(std::shared_ptr<opset1::Su
        }
        if (roundedShift) {
            NetworkHelper::copyInfo(shift, roundedShift);
            // Propagate convertInputType down
            replacement = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(data, roundedShift);
            NetworkHelper::copyInfo(subtract, replacement);
@ -1364,22 +1389,10 @@ std::shared_ptr<Node> NetworkHelper::optimizeSubtract(std::shared_ptr<opset1::Su
            replace_node(subtract, replacement);
        }
        // We lose the tail conversion here; not needed if the next node is a TypeRelaxed
        // TODO: check cases when Convert should be preserved
        // Try to optimize Add out if constant is zero
        // TODO: don't remove operation here: don't create this Subtraction operation in FQ decomposition
        // if (isScalarLike(roundedShift)) {
        //    auto scalar = distillToScalar(roundedShift);
        //    if (op::util::constantIsEqualTo(scalar, 0)) {
        //        replace_node(replacement, replacement->input_value(0).get_node_shared_ptr());
        //        replacement = nullptr;
        //    }
        // }
        return replacement;
-    } else if (is_type<opset1::Convert>(subtractParent) || is_type<opset1::Constant>(subtractParent->get_input_node_shared_ptr(0))) {
+    } else if (is_type<opset1::Convert>(subtractParent) && is_type<opset1::Constant>(subtractParent->get_input_node_shared_ptr(0))) {
        auto replacement = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(data, subtractParent->get_input_node_shared_ptr(0));
        NetworkHelper::copyInfo(subtract, replacement);
        NetworkHelper::setOutDataPrecisionForTypeRelaxed(replacement, convertOutputType);
        replace_node(subtract, replacement);
        return replacement;
@ -1453,7 +1466,7 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
                parent,
                dequantization.subtractConstant->output(0).get_element_type() == parentPrecision ?
                    dequantization.subtractConstant :
-                    fold<opset1::Convert>(dequantization.subtractConstant, parentPrecision));
+                    foldConvert(dequantization.subtractConstant, parentPrecision));
            ngraph::copy_runtime_info({ newOperation, parent }, parent);
        } else {
            parent = std::make_shared<DequantizationSubtract>(parent, dequantization.subtractConvert);
@ -1474,7 +1487,7 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
            DequantizationMultiply(parent,
                multiplyConstant->output(0).get_element_type() == parentPrecision ?
                    multiplyConstant :
-                    fold<opset1::Convert>(multiplyConstant->output(0), parentPrecision)),
+                    foldConvert(multiplyConstant->output(0), parentPrecision)),
            dequantization.multiply->get_output_element_type(0));
        ngraph::copy_runtime_info({ newOperation, parent }, parent);
    }
@ -1541,6 +1554,14 @@ std::shared_ptr<Node> NetworkHelper::toScalarIfPossible(std::shared_ptr<Node> no
    return NetworkHelper::toScalar(constant);
 }
 std::shared_ptr<Node> foldConvert(const Output<Node>& node, const element::Type targetPrecision) {
    if (is_type<opset1::Constant>(node.get_node_shared_ptr()) && (node.get_element_type() == targetPrecision)) {
        return node.get_node_shared_ptr();
    }
    return fold<opset1::Convert>(node, targetPrecision);
 }
 bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const DataPrecision& dataPrecision) {
    if (!node) {
        return true;
@ -1550,12 +1571,12 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
    if (is_type<opset1::Subtract>(node)) {
        const auto parent = node->get_input_node_shared_ptr(0);
        const auto intNode = is_type<opset1::Convert>(parent) ? parent : node;
-        const auto intType = intNode->get_input_element_type(0);
+        const auto type = intNode->get_input_element_type(0);
-        if (intType == element::u8 || intType == element::i8) {
+        if (type == element::u8 || type == element::i8) {
-            min = DataPrecision::getMinValue(intType, 256) - 0.5f;
+            min = DataPrecision::getMinValue(type, 256) - 0.5f;
-            max = DataPrecision::getMaxValue(intType, 256) + 0.5f;
+            max = DataPrecision::getMaxValue(type, 256) + 0.5f;
        } else {
-            return false;
+            return type == element::f32 || type == element::f16;
        }
        auto subtract1input = node->get_input_node_shared_ptr(1);
        if (is_type<opset1::Convert>(subtract1input)) {
@ -1597,6 +1618,23 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr<Node>& node, const Data
    return true;
 }
 std::vector<element::Type> NetworkHelper::precisionIntersection(
        const std::vector<element::Type>& v1,
        const std::vector<element::Type>& v2) noexcept {
    std::vector<element::Type> v3;
    auto v1Copy = v1;
    auto v2Copy = v2;
    std::sort(v1Copy.begin(), v1Copy.end());
    std::sort(v2Copy.begin(), v2Copy.end());
    std::set_intersection(v1Copy.begin(), v1Copy.end(),
                          v2Copy.begin(), v2Copy.end(),
                          std::back_inserter(v3));
    return v3;
 }
 }  // namespace low_precision
 }  // namespace pass
 }  // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/src/pull_reshape_through_dequantization.cpp
+++ b/inference-engine/src/low_precision_transformations/src/pull_reshape_through_dequantization.cpp
@ -23,13 +23,14 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& reshap
    const auto reshapeValues = reshape->get_input_node_shared_ptr(1);
    NGRAPH_CHECK(reshapeValues != nullptr, "Reshape constant was not found");
-    const auto constantIndex = ngraph::pass::low_precision::NetworkHelper::getConstantInputIndex(elementwise);
+    auto elementwiseValuesConvert = as_type_ptr<opset1::Convert>(elementwise->get_input_node_shared_ptr(1ul));
-    NGRAPH_CHECK(constantIndex != -1);
+    auto elementwiseValues = elementwiseValuesConvert == nullptr ?
-
+        elementwise->get_input_node_shared_ptr(1ul) :
-    const auto elementwiseValues = elementwise->get_input_node_shared_ptr(constantIndex);
+        elementwiseValuesConvert->get_input_node_shared_ptr(0ul);
    assert(is_type<opset1::Constant>(elementwiseValues));
    const std::shared_ptr<opset1::Reshape> newReshape = as_type_ptr<opset1::Reshape>(reshape->clone_with_new_inputs({
-        elementwise->get_input_node_shared_ptr(constantIndex == 1 ? 0ul : 1ul),
+        elementwise->get_input_node_shared_ptr(0ul),
        reshapeValues }));
    std::shared_ptr<Node> newElementwiseValues;
@ -54,10 +55,15 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& reshap
            elementwiseValues->output(0),
            newReshapeValues->output(0),
            as_type_ptr<opset1::Reshape>(reshape)->get_special_zero());
        assert(is_type<opset1::Constant>(newElementwiseValues));
    } else {
        newElementwiseValues = elementwiseValues;
    }
-    const auto newElementwise = elementwise->clone_with_new_inputs({ newReshape, newElementwiseValues });
+    const auto newElementwise = elementwise->clone_with_new_inputs({
        newReshape,
        elementwiseValuesConvert == nullptr ?
            newElementwiseValues :
            std::make_shared<opset1::Convert>(newElementwiseValues, elementwiseValuesConvert->get_destination_type()) });
    replace_node(reshape, newElementwise);
    copy_runtime_info({ elementwise, reshape }, { newReshape, newElementwise });
@ -87,8 +93,12 @@ ngraph::pass::low_precision::PullReshapeThroughDequantization::PullReshapeThroug
    const std::vector<ngraph::element::Type>& inputPrecisions) {
    const auto weights = ngraph::pattern::wrap_type<ngraph::opset1::Constant>(pattern::type_matches_any(inputPrecisions));
    const auto convert = ngraph::pattern::wrap_type<ngraph::opset1::Convert>({ weights });
-    const auto subtractConvert = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
+
-    const auto subtract = ngraph::pattern::wrap_type<ngraph::opset1::Subtract>({ convert, subtractConvert });
+    const auto subtractValues = std::make_shared<pattern::op::Or>(OutputVector{
        ngraph::pattern::wrap_type<ngraph::opset1::Constant>(),
        ngraph::pattern::wrap_type<ngraph::opset1::Convert>({ngraph::pattern::wrap_type<ngraph::opset1::Constant>()})
    });
    const auto subtract = ngraph::pattern::wrap_type<ngraph::opset1::Subtract>({ convert, subtractValues });
    const auto subtractOrConvert = std::make_shared<pattern::op::Or>(OutputVector{ convert, subtract });
--- a/inference-engine/src/low_precision_transformations/src/pull_transpose_through_dequantization.cpp
+++ b/inference-engine/src/low_precision_transformations/src/pull_transpose_through_dequantization.cpp
@ -24,10 +24,12 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& transp
    const auto transposeValues = transpose->get_input_node_shared_ptr(1);
    NGRAPH_CHECK(transposeValues != nullptr, "transpose constant was not found");
-    const auto constantIndex = ngraph::pass::low_precision::NetworkHelper::getConstantInputIndex(elementwise);
+    auto elementwiseValuesConvert = as_type_ptr<opset1::Convert>(elementwise->get_input_node_shared_ptr(1ul));
-    NGRAPH_CHECK(constantIndex != -1);
+    auto elementwiseValues = elementwiseValuesConvert == nullptr ?
        elementwise->get_input_node_shared_ptr(1ul) :
        elementwiseValuesConvert->get_input_node_shared_ptr(0ul);
    assert(is_type<opset1::Constant>(elementwiseValues));
    auto elementwiseValues = elementwise->get_input_node_shared_ptr(constantIndex);
    const auto transposeValuesShape = transposeValues->output(0).get_shape();
    const auto elementwiseValuesShape = elementwiseValues->output(0).get_shape();
    if (elementwiseValuesShape.size() != shape_size(transposeValuesShape)) {
@ -45,7 +47,7 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& transp
    }
    const std::shared_ptr<opset1::Transpose> newTranspose = as_type_ptr<opset1::Transpose>(transpose->clone_with_new_inputs({
-        elementwise->get_input_node_shared_ptr(constantIndex == 1 ? 0ul : 1ul),
+        elementwise->get_input_node_shared_ptr(0ul),
        transposeValues }));
    const auto newElementwiseValues = ngraph::pass::low_precision::fold<opset1::Transpose>(
@ -53,7 +55,11 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& transp
        transposeValues->output(0));
    assert(is_type<opset1::Constant>(newElementwiseValues));
-    const auto newElementwise = elementwise->clone_with_new_inputs({ newTranspose, newElementwiseValues });
+    const auto newElementwise = elementwise->clone_with_new_inputs({
        newTranspose,
        elementwiseValuesConvert == nullptr ?
            newElementwiseValues :
            std::make_shared<opset1::Convert>(newElementwiseValues, elementwiseValuesConvert->get_destination_type()) });
    replace_node(transpose, newElementwise);
    copy_runtime_info({ elementwise, transpose }, { newTranspose, newElementwise });
@ -85,8 +91,12 @@ ngraph::pass::low_precision::PullTransposeThroughDequantization::PullTransposeTh
    const std::vector<ngraph::element::Type>& inputPrecisions) {
    const auto weights = ngraph::pattern::wrap_type<ngraph::opset1::Constant>(pattern::type_matches_any(inputPrecisions));
    const auto convert = ngraph::pattern::wrap_type<ngraph::opset1::Convert>({ weights });
-    const auto subtractConvert = ngraph::pattern::wrap_type<ngraph::opset1::Constant>();
+
-    const auto subtract = ngraph::pattern::wrap_type<ngraph::opset1::Subtract>({ convert, subtractConvert });
+    const auto subtractValues = std::make_shared<pattern::op::Or>(OutputVector{
        ngraph::pattern::wrap_type<ngraph::opset1::Constant>(),
        ngraph::pattern::wrap_type<ngraph::opset1::Convert>({ngraph::pattern::wrap_type<ngraph::opset1::Constant>()})
    });
    const auto subtract = ngraph::pattern::wrap_type<ngraph::opset1::Subtract>({ convert, subtractValues });
    const auto subtractOrConvert = std::make_shared<pattern::op::Or>(OutputVector{ convert, subtract });
--- a/inference-engine/src/low_precision_transformations/src/split.cpp
+++ b/inference-engine/src/low_precision_transformations/src/split.cpp
@ -83,14 +83,20 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt
            parent = subtract;
        }
-        const auto multiply = std::make_shared<DequantizationMultiply>(parent, splitedMul[i]);
+        const auto multiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(parent, splitedMul[i]);
        NetworkHelper::setOutDataPrecisionForTypeRelaxed(multiply, dequantization.multiply->get_output_element_type(0));
        copy_runtime_info({ newSplit, multiply }, multiply);
        lastNodes.push_back(multiply);
        replacement.push_back(multiply);
    }
-    replace_node(split, replacement);
+    for (size_t i = 0ul; i < newSplit->get_output_size(); ++i) {
        for (auto input : split->output(i).get_target_inputs()) {
            input.replace_source_output(replacement[i]);
        }
    }
    updateOutputs(context, lastNodes, newSplit);
    return true;
 }
--- a/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp
+++ b/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp
@ -101,9 +101,9 @@ bool SubtractMultiplyToMultiplyAddTransformation::transform(TransformationContex
        std::shared_ptr<Node> subtractConstant = fold<opset1::Multiply>(
            fold<opset1::Multiply>(
-                fold<opset1::Convert>(originalSubtractConstant, deqPrecision),
+                foldConvert(originalSubtractConstant, deqPrecision),
                std::make_shared<opset1::Constant>(deqPrecision, Shape{}, std::vector<float>{ -1.f })),
-            fold<opset1::Convert>(dequantization.multiply->get_input_node_shared_ptr(1), deqPrecision));
+            foldConvert(dequantization.multiply->get_input_node_shared_ptr(1), deqPrecision));
        if (is_type<opset1::Constant>(subtractConstant)) {
            std::shared_ptr<opset1::Constant> constant = as_type_ptr<opset1::Constant>(subtractConstant);
--- a/inference-engine/src/low_precision_transformations/src/transformer.cpp
+++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp
@ -34,6 +34,7 @@
 #include "low_precision/avg_pool.hpp"
 #include "low_precision/clamp.hpp"
 #include "low_precision/convolution.hpp"
 #include "low_precision/convolution_backprop_data.hpp"
 #include "low_precision/depth_to_space.hpp"
 #include "low_precision/fake_quantize.hpp"
 #include "low_precision/group_convolution.hpp"
@ -220,6 +221,7 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const
        add<AvgPoolTransformation, opset1::AvgPool>(params).
        add<ClampTransformation, opset1::Clamp>(params).
        add<ConvolutionTransformation, opset1::Convolution>(params).
        add<ConvolutionBackpropDataTransformation, opset1::ConvolutionBackpropData>(params).
        add<DepthToSpaceTransformation, opset1::DepthToSpace>(params).
        add<FakeQuantizeTransformation, opset1::FakeQuantize>(params).
        add<GroupConvolutionTransformation, opset1::GroupConvolution>(params).
@ -338,6 +340,7 @@ TypeRelaxedReplacer::TypeRelaxedReplacer() {
    make_matcher_type_relaxed<opset1::Clamp>(this);
    make_matcher_type_relaxed<opset1::Concat>(this);
    make_matcher_type_relaxed<opset1::Convolution>(this);
    make_matcher_type_relaxed<opset1::ConvolutionBackpropData>(this);
    make_matcher_type_relaxed<opset1::DepthToSpace>(this);
    make_matcher_type_relaxed<opset1::FakeQuantize>(this);
    make_matcher_type_relaxed<opset1::GroupConvolution>(this);
@ -430,23 +433,6 @@ void LowPrecisionTransformer::transform(std::shared_ptr<Function> network) {
    network->validate_nodes_and_infer_types();
 }
 std::vector<element::Type> LowPrecisionTransformer::precisionIntersection(
    const std::vector<element::Type>& v1,
    const std::vector<element::Type>& v2) const noexcept {
    std::vector<element::Type> v3;
    auto v1Copy = v1;
    auto v2Copy = v2;
    std::sort(v1Copy.begin(), v1Copy.end());
    std::sort(v2Copy.begin(), v2Copy.end());
    std::set_intersection(v1Copy.begin(), v1Copy.end(),
        v2Copy.begin(), v2Copy.end(),
        std::back_inserter(v3));
    return v3;
 }
 std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept {
    const std::string operantionType = LowPrecisionTransformations::getType(op);
    const std::vector<LayerTransformationPtr> transformation = transformations.find(operantionType);
@ -456,7 +442,7 @@ std::vector<element::Type> LowPrecisionTransformer::getPrecisionsOnActivations(c
    std::vector<element::Type> precisions = transformation[0]->getPrecisionsOnActivations();
    for (const auto& transform : transformation) {
-        precisions = precisionIntersection(precisions, transform->getPrecisionsOnActivations());
+        precisions = NetworkHelper::precisionIntersection(precisions, transform->getPrecisionsOnActivations());
    }
    return precisions;
 }
--- a/Show More
+++ b/Show More
`@ -1,2 +1 @@`
	`numpy>=1.16.3`	`numpy~=1.19.5`
	`cython>=0.29.17`
`@ -1,2 +1,2 @@`
	`opencv-python>=3.4.4.19`	`opencv-python==4.5.*`
	`numpy>=1.16.3`	`numpy~=1.19.5`