Merge remote-tracking branch 'upstream/master'

2021-08-20 13:32:53 +09:00 · 2021-08-20 13:32:53 +09:00 · ef937a5a52
commit ef937a5a52
parent 04fed4c2af 184b3cbe42
446 changed files with 10885 additions and 5977 deletions
--- a/.ci/azure/linux_ngraph_onnx.yml
+++ b/.ci/azure/linux_ngraph_onnx.yml
@ -4,17 +4,13 @@ jobs:
    matrix:
      Release:
        BUILD_TYPE: 'Release'
-        PROTOBUF_LITE: 'OFF'
+        PROTOBUF_LITE: 'ON'
        TOX_COMMAND: 'tox && tox -e zoo_models'
      Debug:
        BUILD_TYPE: 'Debug'
        PROTOBUF_LITE: 'OFF'
        TOX_COMMAND: 'tox'
      Protobuf_lite:
        BUILD_TYPE: 'Release'
        PROTOBUF_LITE: 'ON'
-        TOX_COMMAND: 'tox && tox -e zoo_models'
+        TOX_COMMAND: 'tox'
-    maxParallel: 3
+    maxParallel: 2
  # About 300% of total time
  timeoutInMinutes: 90
@ -56,10 +52,10 @@ jobs:
  - script: |
      rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR)
      sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR)
      sudo mkdir -p $(MODELS_DIR)
      sudo apt --assume-yes install nfs-common
      sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(MODELS_DIR) -o vers=4,minorversion=1,sec=sys
      mkdir -p $(MODELS_DIR)/models_data
    displayName: 'Make dirs'
  - checkout: self
@ -76,15 +72,15 @@ jobs:
    workingDirectory: $(WORK_DIR)
    displayName: 'Install dependencies'
  - script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(MODELS_DIR)/models_data -o -s "$(ONNX_MODEL_ZOO_SHA)"
    displayName: 'Update models'
    condition: ne(variables['BUILD_TYPE'], 'Debug')
  - script: sudo docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) .
    displayName: 'Docker build $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'
  - script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o -s "$(ONNX_MODEL_ZOO_SHA)"
    displayName: 'Get models'
    condition: ne(variables['BUILD_TYPE'], 'Debug')
  - script: sudo fallocate -l 64G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h
    displayName: 'Create swap'
-  - script: sudo docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)"
+  - script: sudo docker run --name openvino-onnx-ci-container --volume $(MODELS_DIR)/models_data/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)"
    displayName: 'Docker run $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'
--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@ -16,7 +16,7 @@ jobs:
  timeoutInMinutes: 120
  pool:
-    name: WIN_VMSS_VENV_F8S_WU2
+    name: WIN_VMSS_VENV_F16S_WU2
  variables:
    system.debug: true
@ -34,8 +34,6 @@ jobs:
    INSTALL_DIR: $(WORK_DIR)\install_pkg
    INSTALL_TEST_DIR: $(INSTALL_DIR)\tests
    SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
    IB_DIR: C:\Program Files (x86)\IncrediBuild
    IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
  steps:
  - script: |
@ -59,12 +57,6 @@ jobs:
      rd /Q /S $(BUILD_SAMPLES_DIR) & mkdir $(BUILD_SAMPLES_DIR)
    displayName: 'Make dir'
  - script: |
      certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
      call install_ib_console.bat
    workingDirectory: $(WORK_DIR)
    displayName: 'Install IncrediBuild'
  - checkout: self
    clean: true
    lfs: false
@ -109,9 +101,7 @@ jobs:
  - script: dir $(REPO_DIR)\inference-engine\temp\ /s
    displayName: 'List temp SDKs'
-  - script: |
+  - script: call "$(MSVS_VARS_PATH)" && $(WORK_DIR)\ninja-win\ninja
      set PATH=$(WORK_DIR)\ninja-win;%PATH%
      call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
    workingDirectory: $(BUILD_DIR)
    displayName: 'Build Win'
@ -153,10 +143,8 @@ jobs:
    displayName: 'PaddlePaddle Frontend UT'
    continueOnError: false
-  - script: |
+  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
-      set PATH=$(IB_DIR);%PATH%
+    displayName: 'IE UT old'
      call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests-IB.xml
    displayName: 'IE UT old - IB'
    continueOnError: false
  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\ieUnitTests --gtest_output=xml:TEST-ieUnitTests.xml
@ -187,11 +175,8 @@ jobs:
    displayName: 'TEMPLATE FuncTests'
    continueOnError: false
-    # call $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml
+  - script: $(SETUPVARS) && $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke* --gtest_output=xml:TEST-cpuFuncTests.xml
-  - script: |
+    displayName: 'CPU FuncTests'
      set PATH=$(IB_DIR);%PATH%
      call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\cpuFuncTests.exe --gtest_filter=*smoke*:-*CompareWithRefs/base_size=16_pre_nms_topn=100_post_nms_topn=100_nms_thresh=0.7_feat_stride=1_min_size=1_ratio*:*smoke_GRUSequenceCommonZeroClip/GRUSequenceTest.CompareWithRefs/mode=CONVERT_TO_TI_MAX_SEQ_LEN_CONST_seq_lengths* --gtest_output=xml:TEST-cpuFuncTests-IB.xml /testlevel=24
    displayName: 'CPU FuncTests - IB'
    continueOnError: false
  - script: |
@ -213,8 +198,3 @@ jobs:
      buildPlatform: 'x64' # Optional
      buildConfiguration: 'Windows' # Optional
      #publishRunAttachments: true # Optional
  - script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
    displayName: Stop IncrediBuild
    continueOnError: true
    enabled: false
--- a/.ci/azure/windows_conditional_compilation.yml
+++ b/.ci/azure/windows_conditional_compilation.yml
@ -1,7 +1,7 @@
 jobs:
 - job: WinCC
  # About 150% of total time
-  timeoutInMinutes: 120
+  timeoutInMinutes: 60
  pool:
    name: WIN_VMSS_VENV_F8S_WU2
@ -10,26 +10,22 @@ jobs:
    system.debug: true
    VSTS_HTTP_RETRY: 5
    VSTS_HTTP_TIMEOUT: 200
    WORKERS_NUMBER: 8
    BUILD_TYPE: Release
    REPO_DIR: $(Build.Repository.LocalPath)
    OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)\..\openvino_contrib
    MODELS_PATH: $(REPO_DIR)\..\testdata
    WORK_DIR: $(Pipeline.Workspace)\_w
    BUILD_DIR: D:\build
    BIN_DIR: $(REPO_DIR)\bin\intel64
    MSVS_VARS_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat
    MSVC_COMPILER_PATH: C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.24.28314\bin\Hostx64\x64\cl.exe
    INSTALL_DIR: $(WORK_DIR)\install_pkg
    SETUPVARS: $(INSTALL_DIR)\bin\setupvars.bat
    IB_DIR: C:\Program Files (x86)\IncrediBuild
    IB_TESTCONSOLE: $(IB_DIR)\IBTestConsole.exe
    TEST_ENV_PATH: $(REPO_DIR)\inference-engine\temp\tbb\bin;$(REPO_DIR)\inference-engine\temp\opencv_4.5.2\opencv\bin;$(IB_DIR);%PATH%
  steps:
  - script: |
      powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom"
      where python3
      python3 --version
      where python
      python --version
      where java
@ -46,12 +42,6 @@ jobs:
      rd /Q /S $(BUILD_DIR) & mkdir $(BUILD_DIR)
    displayName: 'Make dir'
  - script: |
      certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
      call install_ib_console.bat
    workingDirectory: $(WORK_DIR)
    displayName: 'Install IncrediBuild'
  - checkout: self
    clean: true
    lfs: false
@ -59,7 +49,8 @@ jobs:
    path: openvino
  - script: |
-      certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip
+      rem Speed up build
      certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
      powershell -command "Expand-Archive -Force ninja-win.zip"
    workingDirectory: $(WORK_DIR)
    displayName: 'Install dependencies'
@ -70,20 +61,19 @@ jobs:
    workingDirectory: $(BUILD_DIR)
    displayName: 'CMake'
-  - script: |
+  - script: dir $(REPO_DIR)\inference-engine\temp\ /s
-      set PATH=$(WORK_DIR)\ninja-win;%PATH%
+    displayName: 'List temp SDKs'
-      call "$(MSVS_VARS_PATH)" && "C:\Program Files (x86)\IncrediBuild\BuildConsole.exe" /COMMAND="ninja"
+
  - script: call "$(MSVS_VARS_PATH)" && $(WORK_DIR)\ninja-win\ninja
    workingDirectory: $(BUILD_DIR)
-    displayName: 'Build Win'
+    displayName: 'Build Win CC'
  - script: dir $(REPO_DIR)\bin\ /s
-    displayName: 'List files'
+    displayName: 'List bin files'
  - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
    workingDirectory: $(BUILD_DIR)
    displayName: 'Install'
-  - script: echo Stop IncrediBuild_Agent && net stop IncrediBuild_Agent
+  - script: dir $(INSTALL_DIR) /s
-    displayName: Stop IncrediBuild
+    displayName: 'List install files'
    continueOnError: true
    enabled: false
--- a/.ci/openvino-onnx/Dockerfile
+++ b/.ci/openvino-onnx/Dockerfile
@ -4,7 +4,7 @@ LABEL version=2021.03.30.1
 # Build configuration arguments
 ARG BUILD_TYPE=Release
-ARG PROTOBUF_LITE=OFF
+ARG PROTOBUF_LITE=ON
 ARG http_proxy
 ARG https_proxy
--- a/4
+++ b/4
@ -75,6 +75,6 @@ azure-pipelines.yml  @openvinotoolkit/openvino-admins
 *.md  @openvinotoolkit/openvino-docs-maintainers
 # Control 3d party dependencies
-*requirements*  @openvino-configuration-mgmt
+**/*requirements*.*  @openvino-configuration-mgmt
-*setup.py  @openvino-configuration-mgmt
+**/setup.py  @openvino-configuration-mgmt
 /scripts/install_dependencies/  @openvino-configuration-mgmt
--- a/cmake/developer_package/ncc_naming_style/openvino.style
+++ b/cmake/developer_package/ncc_naming_style/openvino.style
@ -18,9 +18,11 @@ FunctionTemplate: '^(operator.+|\w+)$'
 TypeAliasName: '^\w+$'
 VariableReference: '^\w+$'
 EnumName: '^[A-Z][\w]+$'
 # excepts element_type
 # TODO: Fix interpolate
 EnumConstantName: '^([A-Z\d_]+|undefined|dynamic|boolean|bf16|f16|f32|f64|i4|i8|i16|i32|i64|u1|u4|u8|u16|u32|u64|asymmetric|align_corners|round_prefer_floor|round_prefer_ceil|floor|ceil|simple|nearest|linear|linear_onnx|cubic|area|scales|sizes|half_pixel|tf_half_pixel_for_nn|pytorch_half_pixel|asymetric)$'
 # TODO: align
 EnumConstantName: '^.*$'
 EnumName: '^.*$'
 UsingDeclaration: '^.*$'
 TypedefName: '^.*$'
--- a/docs/IE_DG/Paddle_Support.md
+++ b/docs/IE_DG/Paddle_Support.md
@ -0,0 +1,34 @@
 # Paddle Support in the OpenVINO™ {#openvino_docs_IE_DG_Paddle_Support}
 Starting from the 2022.1 release, OpenVINO™ supports reading native Paddle models.
 `Core::ReadNetwork()` method provides a uniform way to read models from IR or Paddle format, it is a recommended approach to reading models.
 ## Read Paddle Models from IR
 After [Converting a Paddle Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md) to [Intermediate Representation (IR)](../MO_DG/IR_and_opsets.md), it can be read as recommended. Example:
 ```cpp
 InferenceEngine::Core core;
 auto network = core.ReadNetwork("model.xml");
 ```
 ## Read Paddle Models from Paddle Format (Paddle `inference model` model type)
 **Example:**
 ```cpp
 InferenceEngine::Core core;
 auto network = core.ReadNetwork("model.pdmodel");
 ```
 **Reshape feature:**
 OpenVINO™ does not provide a mechanism to specify pre-processing, such as mean values subtraction and reverse input channels, for the Paddle format.
 If a Paddle model contains dynamic shapes for input, use the `CNNNetwork::reshape` method for shape specialization.
 ## NOTE
 * Paddle [`inference model`](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/inference_en.md) mainly contains two kinds of files `model.pdmodel`(model file) and `model.pdiparams`(params file), which are used for inference.
 * Supported Paddle models list and how to export these models are described in [Convert a Paddle Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md).
 * For `Normalize` Paddle Models, the input data should be in FP32 format.
 * When reading Paddle models from Paddle format, make sure that `model.pdmodel` and `model.pdiparams` are in the same folder directory.
--- a/docs/IE_DG/supported_plugins/AUTO.md
+++ b/docs/IE_DG/supported_plugins/AUTO.md
@ -0,0 +1,128 @@
 # Auto-Device Plugin {#openvino_docs_IE_DG_supported_plugins_AUTO}
 ## Auto-Device Plugin Execution
 Auto-device is a new special "virtual" or "proxy" device in the OpenVINO™ toolkit. 
 Use "AUTO" as the device name to delegate selection of an actual accelerator to OpenVINO. 
 With the 2021.4 release, Auto-device internally recognizes and selects devices from CPU, 
 integrated GPU and discrete Intel GPUs (when available) depending on the device capabilities and the characteristic of CNN models, 
 for example, precisions. Then Auto-device assigns inference requests to the selected device.
 From the application point of view, this is just another device that handles all accelerators in full system. 
 With the 2021.4 release, Auto-device setup is done in three major steps:
 * Step 1: Configure each device as usual (for example, via the conventional <code>SetConfig</code> method)
 * Step 2: Load a network to the Auto-device plugin. This is the only change needed in your application
 * Step 3: Just like with any other executable network (resulted from <code>LoadNetwork</code>), create as many requests as needed to saturate the devices. 
 These steps are covered below in details.
 ## Defining and Configuring the Auto-Device Plugin
 Following the OpenVINO notions of “devices”, the Auto-device has “AUTO” name. The only configuration option for Auto-device is a limited device list:
 | Parameter name     | Parameter values      | Default            |             Description                                                      |
 | :---               | :---                  | :---               |:-----------------------------------------------------------------------------|
 | "AUTO_DEVICE_LIST" | comma-separated device names <span style="color:red">with no spaces</span>| N/A | Device candidate list to be selected    |
 You can use the configuration name directly as a string or use <code>IE::KEY_AUTO_DEVICE_LIST</code> from <code>ie_plugin_config.hpp</code>,
 which defines the same string.
 There are two ways to use Auto-device:
 1. Directly indicate device by “AUTO” or empty string:
@snippet snippets/AUTO0.cpp part0
 2. Use Auto-device configuration to limit the device candidates list to be selected:
@snippet snippets/AUTO1.cpp part1
 Auto-device supports query device optimization capabilities in metric;
 | Parameter name                 | Parameter values         |
 | :---                           | :---                     |
 | "OPTIMIZATION_CAPABILITIES"    | Auto-Device capabilities |
 ## Enumerating Available Devices and Auto-Device Selecting Logic
 ### Enumerating Available Devices
 Inference Engine now features a dedicated API to enumerate devices and their capabilities. 
 See [Hello Query Device C++ Sample](../../../inference-engine/samples/hello_query_device/README.md).
 This is the example output from the sample (truncated to the devices' names only):
 ```sh
 ./hello_query_device
 Available devices: 
    Device: CPU
 ...
    Device: GPU.0
 ...
    Device: GPU.1
 ```
 ###	Default Auto-Device selecting logic
 With the 2021.4 release, Auto-Device selects the most suitable device with following default logic:
 1.	Check if dGPU, iGPU and CPU device are available
 2.	Get the precision of the input model, such as FP32
 3.	According to the priority of dGPU, iGPU and CPU (in this order), if the device supports the precision of input network, select it as the most suitable device
 For example, CPU, dGPU and iGPU can support below precision and optimization capabilities:
 | Device   | OPTIMIZATION_CAPABILITIES       |
 | :---     | :---                            |
 | CPU      | WINOGRAD FP32 FP16 INT8 BIN     |
 | dGPU     | FP32 BIN BATCHED_BLOB FP16 INT8 |
 | iGPU     | FP32 BIN BATCHED_BLOB FP16 INT8 |
 When application use Auto-device to run FP16 IR on system with CPU, dGPU and iGPU, Auto-device will offload this workload to dGPU.
 When application use Auto-device to run FP16 IR on system with CPU and iGPU, Auto-device will offload this workload to iGPU.
 When application use Auto-device to run WINOGRAD-enabled IR on system with CPU, dGPU and iGPU, Auto-device will offload this workload to CPU.
 In any case, when loading the network to dGPU or iGPU fails, the networks falls back to CPU as the last choice.
 ### Limit Auto Target Devices Logic
 According to the Auto-device selection logic from the previous section, 
 the most suitable device from available devices to load mode as follows:
@snippet snippets/AUTO2.cpp part2
 Another way to load mode to device from limited choice of devices is with Auto-device:
@snippet snippets/AUTO3.cpp part3
 ## Configuring the Individual Devices and Creating the Auto-Device on Top
 As described in the first section, configure each individual device as usual and then just create the "AUTO" device on top:
@snippet snippets/AUTO4.cpp part4
 Alternatively, you can combine all the individual device settings into single config and load it, 
 allowing the Auto-device plugin to parse and apply it to the right devices. See the code example here:
@snippet snippets/AUTO5.cpp part5
 ## Using the Auto-Device with OpenVINO Samples and Benchmark App
 Note that every OpenVINO sample that supports "-d" (which stands for "device") command-line option transparently accepts the Auto-device. 
 The Benchmark Application is the best example of the optimal usage of the Auto-device. 
 You do not need to set the number of requests and CPU threads, as the application provides optimal out-of-the-box performance. 
 Below is the example command-line to evaluate AUTO performance with that:
 ```sh
 ./benchmark_app –d AUTO –m <model> -i <input> -niter 1000
 ```
 You can also use the auto-device with limit device choice:
 ```sh
 ./benchmark_app –d AUTO:CPU,GPU –m <model> -i <input> -niter 1000
 ```
 Note that the default CPU stream is 1 if using “-d AUTO”.
 Note that you can use the FP16 IR to work with auto-device.
 Also note that no demos are (yet) fully optimized for the auto-device, by means of selecting the most suitable device, 
 using the GPU streams/throttling, and so on.
--- a/docs/IE_DG/supported_plugins/MYRIAD.md
+++ b/docs/IE_DG/supported_plugins/MYRIAD.md
@ -66,10 +66,8 @@ In addition to common parameters, the MYRIAD plugin accepts the following option
 | Parameter Name        | Parameter Values | Default    | Description                                                                        |
 | :---                  | :---             | :---       | :---                                                                               |
 | `KEY_VPU_MYRIAD_PLATFORM`    | empty string/`VPU_MYRIAD_2450`/`VPU_MYRIAD_2480` | empty string | If set, the plugin will use a device with specific platform to allocate a network. |
 | `KEY_VPU_MYRIAD_PROTOCOL`    | empty string/`VPU_MYRIAD_USB`/`VPU_MYRIAD_PCIE` | empty string | If set, the plugin will use a device with specific protocol to allocate a network. |
 | `KEY_VPU_MYRIAD_FORCE_RESET` | `YES`/`NO`                             | `NO`        | Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
 | `KEY_VPU_PLATFORM`           | empty string/`VPU_2450`/`VPU_2480`     | empty string | **Deprecated** Use `KEY_VPU_MYRIAD_PLATFORM` instead. <br />If set, the plugin will use a device with specific platform to allocate a network. |
 | `KEY_VPU_FORCE_RESET`        | `YES`/`NO`                             | `NO`         | **Deprecated** Use `KEY_VPU_MYRIAD_FORCE_RESET` instead. <br />Enables force reset of all booted devices when new ExecutableNetwork is created.<br />This is a plugin scope option and must be used with the plugin's SetConfig method only.<br />See <a href="#MYRIAD_DEVICE_ALLOC">Device allocation</a> section for details. |
 ## Device allocation <a name="MYRIAD_DEVICE_ALLOC">&nbsp;</a>
--- a/docs/IE_DG/supported_plugins/Supported_Devices.md
+++ b/docs/IE_DG/supported_plugins/Supported_Devices.md
@ -14,6 +14,7 @@ The Inference Engine provides unique capabilities to infer deep learning models
 |[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit)            |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs                                                                                           |
 |[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit)              |Intel&reg; Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel&reg; Pentium&reg; Silver J5005 Processor, Intel&reg; Pentium&reg; Silver N5000 Processor, Intel&reg; Celeron&reg; J4005 Processor, Intel&reg; Celeron&reg; J4105 Processor, Intel&reg; Celeron&reg; Processor N4100, Intel&reg; Celeron&reg; Processor N4000, Intel&reg; Core&trade; i3-8121U Processor, Intel&reg; Core&trade; i7-1065G7 Processor, Intel&reg; Core&trade; i7-1060G7 Processor, Intel&reg; Core&trade; i5-1035G4 Processor, Intel&reg; Core&trade; i5-1035G7 Processor, Intel&reg; Core&trade; i5-1035G1 Processor, Intel&reg; Core&trade; i5-1030G7 Processor, Intel&reg; Core&trade; i5-1030G4 Processor, Intel&reg; Core&trade; i3-1005G1 Processor, Intel&reg; Core&trade; i3-1000G1 Processor, Intel&reg; Core&trade; i3-1000G4 Processor|
 |[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel&reg; devices in parallel    |   
 |[Auto-Device plugin](AUTO.md) |Auto-Device plugin enables selecting Intel&reg; device for inference automatically |   
 |[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel&reg; devices (for example if a device doesn't [support certain layers](#supported-layers)).                                                           |
 Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/).
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md
@ -0,0 +1,62 @@
 # Converting a Paddle* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle}
 A summary of the steps for optimizing and deploying a model that was trained with Paddle\*:
 1. [Configure the Model Optimizer](../Config_Model_Optimizer.md) for Paddle\*.
 2. [Convert a Paddle\* Model](#Convert_From_Paddle) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values.
 3. Test the model in the Intermediate Representation format using the [Inference Engine](../../../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) in the target environment via provided Inference Engine [sample applications](../../../IE_DG/Samples_Overview.md).
 4. [Integrate](../../../IE_DG/Samples_Overview.md) the [Inference Engine](../../../IE_DG/Deep_Learning_Inference_Engine_DevGuide.md) in your application to deploy the model in the target environment.
 ## Supported Topologies
 | Model Name| Model Type| Description|
 | ------------- | ------------ | ------------- |
 |ppocr-det| optical character recognition| Models are exported from [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/). Refer to [READ.md](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/#pp-ocr-20-series-model-listupdate-on-dec-15).|
 |ppocr-rec| optical character recognition| Models are exported from [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/). Refer to [READ.md](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.1/#pp-ocr-20-series-model-listupdate-on-dec-15).|
 |ResNet-50| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
 |MobileNet v2| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
 |MobileNet v3| classification| Models are exported from [PaddleClas](https://github.com/PaddlePaddle/PaddleClas/tree/release/2.1/). Refer to [getting_started_en.md](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.1/docs/en/tutorials/getting_started_en.md#4-use-the-inference-model-to-predict)|
 |BiSeNet v2| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
 |DeepLab v3 plus| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
 |Faster-SCNN| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
 |OCRNET| semantic segmentation| Models are exported from [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1). Refer to [model_export.md](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.1/docs/model_export.md#)|
 |Yolo v3| detection| Models are exported from [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.1). Refer to [EXPORT_MODEL.md](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md#).|
 |ppyolo| detection| Models are exported from [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.1). Refer to [EXPORT_MODEL.md](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md#).|
 > **NOTE:** The verified models are exported from the repository of branch release/2.1.
 ## Convert a Paddle* Model <a name="Convert_From_Paddle"></a>
 To convert a Paddle\* model:
 1. Go to the `$INTEL_OPENVINO_DIR/deployment_tools/model_optimizer` directory.
 2. Use the `mo.py` script to simply convert a model, specifying the framework, the path to the input model `.pdmodel` file and the path to an output directory with write permissions:
 ```sh
 python3 mo.py --input_model <INPUT_MODEL>.pdmodel --output_dir <OUTPUT_MODEL_DIR> --framework=paddle
 ```
 Parameters to convert your model:
 * [Framework-agnostic parameters](Converting_Model_General.md): These parameters are used to convert a model trained with any supported framework.
 > **NOTE:** `--scale`, `--scale_values`, `--mean_values`, `--mean_file` are unsupported in the current version of mo_paddle.
 ### Example of Converting a Paddle* Model
 Below is the example command to convert yolo v3 Paddle\* network to OpenVINO IR network with Model Optimizer.
 ```sh
 python3 mo.py --model_name yolov3_darknet53_270e_coco --output_dir <OUTPUT_MODEL_DIR> --framework=paddle --data_type=FP32 --reverse_input_channels --input_shape=[2,3,608,608],[1,2],[1,2] --input=image,im_shape,scale_factor --output=save_infer_model/scale_0.tmp_1,save_infer_model/scale_1.tmp_1 --input_model=yolov3.pdmodel
 ```
 ## Supported Paddle\* Layers
 Refer to [Supported Framework Layers](../Supported_Frameworks_Layers.md) for the list of supported standard layers.
 ## Frequently Asked Questions (FAQ)
 The Model Optimizer provides explanatory messages if it is unable to run to completion due to issues like typographical errors, incorrectly used options, or other issues. The message describes the potential cause of the problem and gives a link to the [Model Optimizer FAQ](../Model_Optimizer_FAQ.md). The FAQ has instructions on how to resolve most issues. The FAQ also includes links to relevant sections in the Model Optimizer Developer Guide to help you understand what went wrong.
 ## Summary
 In this document, you learned:
 * Basic information about how the Model Optimizer works with Paddle\* models
 * Which Paddle\* models are supported
 * How to convert a trained Paddle\* model using the Model Optimizer with framework-agnostic command-line options
--- a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
+++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md
@ -16,7 +16,7 @@ The <code>mo.py</code> script is the universal entry point that can deduce the f
 * `.onnx` - ONNX\* models
 * `.nnet` - Kaldi\* models.
-If the model files do not have standard extensions, you can use the ``--framework {tf,caffe,kaldi,onnx,mxnet}`` option to specify the framework type explicitly. 
+If the model files do not have standard extensions, you can use the ``--framework {tf,caffe,kaldi,onnx,mxnet,paddle}`` option to specify the framework type explicitly. 
 For example, the following commands are equivalent: 
 ```sh
@ -33,6 +33,7 @@ Framework-specific parameters for:
 * [MXNet](Convert_Model_From_MxNet.md),
 * [ONNX](Convert_Model_From_ONNX.md),
 * [Kaldi](Convert_Model_From_Kaldi.md).
 * [Paddle](Convert_Model_From_Paddle.md).
 ## See Also
--- a/docs/doxygen/ie_docs.xml
+++ b/docs/doxygen/ie_docs.xml
@ -326,6 +326,7 @@ limitations under the License.
                    </tab>
                    <tab type="user" title="Heterogeneous Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_HETERO"/>
                    <tab type="user" title="Multi-Device Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_MULTI"/>
                    <tab type="user" title="Auto-Device Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_AUTO"/>
                    <tab type="user" title="GNA Plugin" url="@ref openvino_docs_IE_DG_supported_plugins_GNA"/>
                </tab>
                <tab type="user" title="Known Issues" url="@ref openvino_docs_IE_DG_Known_Issues_Limitations"/>
--- a/docs/ops/comparison/Greater_1.md
+++ b/docs/ops/comparison/Greater_1.md
@ -4,7 +4,16 @@
 **Category**: Comparison binary operation
-**Short description**: *Greater* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules.
+**Short description**: *Greater* performs element-wise comparison operation with two given tensors applying broadcast rules specified in the `auto_broadcast` attribute.
 **Detailed description**
 Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attribute is not `none`. Broadcasting is performed according to `auto_broadcast` value.
 After broadcasting, *Greater* does the following with the input tensors *a* and *b*:
 \f[
 o_{i} = a_{i} > b_{i}
 \f]
 **Attributes**:
@ -13,39 +22,33 @@
  * **Description**: specifies rules used for auto-broadcasting of input tensors.
  * **Range of values**:
    * *none* - no auto-broadcasting is allowed, all input shapes should match
-    * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
+    * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
    * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
  * **Type**: string
  * **Default value**: "numpy"
  * **Required**: *no*
 **Inputs**
-* **1**: A tensor of type *T*. **Required.**
+* **1**: A tensor of type *T* and arbitrary shape. **Required.**
-* **2**: A tensor of type *T*. **Required.**
+* **2**: A tensor of type *T* and arbitrary shape. **Required.**
 **Outputs**
-* **1**: The result of element-wise comparison operation. A tensor of type boolean.
+* **1**: The result of element-wise comparison operation applied to the input tensors. A tensor of type *T_BOOL* and  shape equal to broadcasted shape of two inputs.
 **Types**
 * *T*: arbitrary supported type.
-
+* *T_BOOL*: `boolean`.
 **Detailed description**
 Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
 After broadcasting *Greater* does the following with the input tensors *a* and *b*:
 \f[
 o_{i} = a_{i} > b_{i}
 \f]
 **Examples**
-*Example 1*
+*Example 1: no broadcast*
 ```xml
 <layer ... type="Greater">
    <data auto_broadcast="none"/>
    <input>
        <port id="0">
            <dim>256</dim>
@ -65,9 +68,10 @@ o_{i} = a_{i} > b_{i}
 </layer>
 ```
-*Example 2: broadcast*
+*Example 2: numpy broadcast*
 ```xml
 <layer ... type="Greater">
    <data auto_broadcast="numpy"/>
    <input>
        <port id="0">
            <dim>8</dim>
--- a/docs/ops/comparison/NotEqual_1.md
+++ b/docs/ops/comparison/NotEqual_1.md
@ -4,7 +4,18 @@
 **Category**: Comparison binary operation
-**Short description**: *NotEqual* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules.
+**Short description**: *NotEqual* performs element-wise comparison operation with two given tensors applying
 multi-directional broadcast rules specified in the `auto_broadcast` attribute.
 **Detailed description**
 Before performing comparison operation, input tensors *a* and *b* are broadcasted if their shapes are different.
 Broadcasting is performed according to `auto_broadcast` value.
 After broadcasting, *NotEqual* does the following with the input tensors *a* and *b*:
 \f[
 o_{i} = a_{i} != b_{i}
 \f]
 **Attributes**:
@ -13,7 +24,8 @@
  * **Description**: specifies rules used for auto-broadcasting of input tensors.
  * **Range of values**:
    * *none* - no auto-broadcasting is allowed, all input shapes should match
-    * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
+    * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
    * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
  * **Type**: string
  * **Default value**: "numpy"
  * **Required**: *no*
@ -31,15 +43,6 @@
 * *T*: arbitrary supported type.
 **Detailed description**
 Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
 After broadcasting *NotEqual* does the following with the input tensors *a* and *b*:
 \f[
 o_{i} = a_{i} \neq b_{i}
 \f]
 **Examples**
 *Example 1*
--- a/docs/ops/logical/LogicalXor_1.md
+++ b/docs/ops/logical/LogicalXor_1.md
@ -6,33 +6,7 @@
 **Short description**: *LogicalXor* performs element-wise logical XOR operation with two given tensors applying multi-directional broadcast rules.
-**Attributes**:
+**Detailed description**: Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
 * *auto_broadcast*
  * **Description**: specifies rules used for auto-broadcasting of input tensors.
  * **Range of values**:
    * *none* - no auto-broadcasting is allowed, all input shapes should match
    * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
  * **Type**: string
  * **Default value**: "numpy"
  * **Required**: *no*
 **Inputs**
 * **1**: A tensor of type *T*. **Required.**
 * **2**: A tensor of type *T*. **Required.**
 **Outputs**
 * **1**: The result of element-wise logical XOR operation. A tensor of type *T*.
 **Types**
 * *T*: boolean type.
 **Detailed description**
 Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
 After broadcasting *LogicalXor* does the following with the input tensors *a* and *b*:
@ -40,9 +14,35 @@ After broadcasting *LogicalXor* does the following with the input tensors *a* an
 o_{i} = a_{i} \oplus b_{i}
 \f]
 **Attributes**:
 * *auto_broadcast*
  * **Description**: specifies rules used for auto-broadcasting of input tensors.
  * **Range of values**:
    * *none* - no auto-broadcasting is allowed, all input shapes must match
    * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
    * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
  * **Type**: string
  * **Default value**: "numpy"
  * **Required**: *no*
 **Inputs**
 * **1**: A tensor of type *T_BOOL* and arbitrary shape. **Required.**
 * **2**: A tensor of type *T_BOOL* and arbitrary shape. **Required.**
 **Outputs**
 * **1**: The result of element-wise *logicalXor* operation. A tensor of type *T_BOOL* and the same shape equal to broadcasted shape of two inputs.
 **Types**
 * *T_BOOL*: `boolean`.
 **Examples**
-*Example 1*
+*Example 1: no broadcast*
 ```xml
 <layer ... type="LogicalXor">
@ -65,7 +65,7 @@ o_{i} = a_{i} \oplus b_{i}
 </layer>
 ```
-*Example 2: broadcast*
+*Example 2: numpy broadcast*
 ```xml
 <layer ... type="LogicalXor">
    <input>
--- a/docs/ops/pooling/MaxPool_1.md
+++ b/docs/ops/pooling/MaxPool_1.md
@ -163,7 +163,7 @@ strides = [1, 1]
 kernel = [2, 2]
 rounding_type = "floor"
 auto_pad = "same_upper"
-output = [[[[5, 5, -6],
+output = [[[[5, 5, 3],
            [8, 9, 9]
            [8, 9, 9]],
           [[6, 5, 5],
--- a/docs/ops/pooling/MaxPool_8.md
+++ b/docs/ops/pooling/MaxPool_8.md
@ -0,0 +1,360 @@
 ## MaxPool <a name="MaxPool"></a> {#openvino_docs_ops_pooling_MaxPool_8}
 **Versioned name**: *MaxPool-8*
 **Category**: *Pooling*
 **Short description**: Performs the max pooling operation on input.
 **Detailed description**: Input shape can be either 3D, 4D, or 5D. The max pooling operation is performed with respect to input shape from the third dimension to the last dimension. If paddings are used, during the pooling calculation their values are `-inf`. The max pooling operation involves sliding a filter over each channel of a feature map and downsampling by choosing the largest value within the region covered by the filter.
 **Attributes**: *Pooling* attributes are specified in the `data` node, which is a child of the layer node.
 * *strides*
  * **Description**: *strides* is a distance (in pixels) to slide the window on the feature map over the (z, y, x) axes for 3D poolings and (y, x) axes for 2D poolings. For example, *strides* equal to "4,2,1" means sliding the window 4 pixels at a time over depth dimension, 2 over height dimension, and 1 over width dimension.
  * **Range of values**: integer values starting from 0
  * **Type**: int[]
  * **Required**: *yes*
 * *dilations*
    * **Description**: *dilations* specify the index of the next pixel to select when pooling. If not present, the dilation defaults to 1, meaning the adjacent pixel is chosen. A value of 2 indicates that one pixel is skipped and every other pixel is considered. Dilations specify one value for each spatial axis of the kernel: `(z, y, x)` for 3D poolings and `(y, x)`  for 2D poolings.
    * **Range of values**: integer values starting from 0
    * **Type**: int[]
    * **Default value**: `[1,1,...]`
    * **Required**: *no*
 * *pads_begin*
  * **Description**: *pads_begin* is a number of pixels to add to the beginning along each axis. For example, *pads_begin* equal to "1,2" means adding 1 pixel to the top of the input and 2 to the left of the input. All added padding values are equal to negative infinity.
  * **Range of values**: integer values starting from 0
  * **Type**: int[]
  * **Required**: *yes*
  * **Note**: the attribute is ignored when *auto_pad* attribute is specified.
 * *pads_end*
  * **Description**: *pads_end* is a number of pixels to add to the ending along each axis. For example, *pads_end* equal to "1,2" means adding 1 pixel to the bottom of the input and 2 to the right of the input. All added padding values are equal to negative infinity.
  * **Range of values**: integer values starting from 0
  * **Type**: int[]
  * **Required**: *yes*
  * **Note**: the attribute is ignored when the *auto_pad* attribute is specified.
 * *kernel*
  * **Description**: *kernel* is a size of each filter. For example, *kernel* equal to (2, 3) means that each filter has height equal to 2 and width equal to 3.
  * **Range of values**: integer values starting from 1
  * **Type**: int[]
  * **Required**: *yes*
 * *rounding_type*
  * **Description**: *rounding_type* is a type of rounding to be used to compute output shape.
  * **Range of values**:
    * *ceil*
    * *floor*
  * **Type**: string
  * **Default value**: *floor*
  * **Required**: *no*
 * *auto_pad*
  * **Description**: *auto_pad* how the padding is calculated. Possible values:
    * *explicit*: explicit padding values from `pads_begin` and `pads_end` are used.
    * *same_upper (same_lower)* the input is padded to match the output size. In case of odd padding value, an extra padding is added at the end (at the beginning).
    * *valid* padding is not used.
  * **Type**: string
  * **Default value**: *explicit*
  * **Required**: *no*
  * **Note**: *pads_begin* and *pads_end* attributes are ignored when *auto_pad* is not equal to explicit.
 * *index_element_type*
    * **Description**: the type of output tensor with indices
    * **Range of values**: "i64" or "i32"
    * **Type**: string
    * **Default value**: "i64"
    * **Required**: *No*
 * *axis*
    * **Description**: indicator of the first dimension in the input shape that should be used to calculate the upper bound of allowed index output values. The upper bound is the product of dimensions starting from the one pointed by the 'axis' attribute until the end of the input shape.
    * **Range of values**: integer number. Negative value means counting dimension from the end. The range is `[-R, R - 1]`, where `R` is the rank of the input tensor.
    * **Type**: int
    * **Default value**: 0
    * **Required**: *No*
 **Inputs**:
 *   **1**: 3D, 4D, or 5D input tensor of type T. Required.
 **Outputs**:
  * **1**: Input shape can be either `[N, C, H]`, `[N, C, H, W]`, or `[N, C, H, W, D]`. The corresponding output shape is `[N, C, H_out]`, `[N, C, H_out, W_out]` or `[N, C, H_out, W_out, D_out]`. Output tensor has the same data type as the input tensor.
  * **2**: Output tensor of type *T_IND* with indices of values selected by the pooling operation.
    Shape of this output matches the first output. The type of this output can be specified using the `index_element_type` attribute.
    Values are computed as indices in a tensor flattened to 1D, not considering padding. Examples for a 5D input tensor:
    * When `axis == 0`, the values are in the range `[0, N * C * H * W * D)`.
    * When `axis == 2`, the values are in the range `[0, H * W * D)`.
    Note: the values of this output can only be calculated correctly if `pads_value` is set to `-infinity`.
 **Types**
 * *T*: floating point or integer type.
 * *T_IND*: `int64` or `int32`.
 **Mathematical Formulation**
 Output shape calculation based on `auto_pad` and `rounding_type`:
  * `auto_pad = explicit` and `rounding_type = floor`
        `H_out = floor((H + pads_begin[0] + pads_end[0] - ((kernel[0] - 1) * dilations[0] + 1)) / strides[0] + 1)`
        `W_out = floor((W + pads_begin[1] + pads_end[1] - ((kernel[1] - 1) * dilations[1] + 1)) / strides[1] + 1)`
        `D_out = floor((D + pads_begin[2] + pads_end[2] - ((kernel[2] - 1) * dilations[2] + 1)) / strides[2] + 1)`
  * `auto_pad = explicit` and `rounding_type = ceil`
        `H_out = ceil((H + pads_begin[0] + pads_end[0] - ((kernel[0] - 1) * dilations[0] + 1)) / strides[0] + 1)`
        `W_out = ceil((W + pads_begin[1] + pads_end[1] - ((kernel[1] - 1) * dilations[1] + 1)) / strides[1] + 1)`
        `D_out = ceil((D + pads_begin[2] + pads_end[2] - ((kernel[2] - 1) * dilations[2] + 1)) / strides[2] + 1)`
  * `auto_pad = valid`
        `H_out = ceil((H - ((kernel[0] - 1) * dilations[0] + 1) + 1) / strides[0])`
        `W_out = ceil((W - ((kernel[1] - 1) * dilations[1] + 1) + 1) / strides[1])`
        `D_out = ceil((D - ((kernel[2] - 1) * dilations[2] + 1) + 1) / strides[2])`
  * `auto_pad = same_upper / same_lower`
        `H_out = H`
        `W_out = W`
        `D_out = D`
 If `H + pads_begin[i] + pads_end[i] - kernel[i]` is not divisible by `strides[i]` evenly, the result is rounded with respect to the `rounding_type` attribute.
 Example 1 shows how *MaxPool* operates with 4D input using 2D kernel and `auto_pad = explicit`.
 ```
 input = [[[[-1, 2, 3],
           [4, 5, -6],
           [-7, 8, 9]]]]
 strides = [1, 1]
 pads_begin = [1, 1]
 pads_end = [1, 1]
 kernel = [2, 2]
 rounding_type = "floor"
 auto_pad = "explicit"
 output0 = [[[[-1, 2, 3, 3],
             [4, 5, 5, -6],
             [4, 8, 9, 9],
             [-7, 8, 9, 9]]]]
 output1 = [[[[0, 1, 2, 2],
             [3, 4, 4, 5],
             [3, 7, 8, 8],
             [6, 7, 8, 8]]]]
 ```
 Example 2 shows how *MaxPool* operates with 3D input using 1D kernel and `auto_pad = valid`.
 ```
 input = [[[-1, 2, 3, 5, -7, 9, 1]]]
 strides = [1]
 kernel = [3]
 rounding_type = "floor"
 auto_pad = "valid"
 output0 = [[[3, 5, 5, 9, 9]]]
 output1 = [[[2, 3, 3, 5, 5]]]
 ```
 Example 3 shows how *MaxPool* operates with 4D input using 2D kernel and `auto_pad = same_lower`.
 ```
 input = [[[[-1, 2, 3],
         [4, 5, -6],
         [-7, 8, 9]]]]
 strides = [1, 1]
 kernel = [2, 2]
 rounding_type = "floor"
 auto_pad = "same_lower"
 output0 = [[[[-1, 2, 3],
            [4, 5, 5]
            [4, 8, 9]]]]
 output1 = [[[[0, 1, 2],
            [3, 4, 4]
            [3, 7, 8]]]]
 ```
 Example 4 shows how *MaxPool* operates with 4D input using 2D kernel and `auto_pad = same_upper`.
 ```
 input = [[[[-1, 2, 3],
           [4, 5, -6],
           [-7, 8, 9]],
          [[2, -1, 5],
           [6, -7, 1],
           [8, 2, -3]]]]
 strides = [1, 1]
 kernel = [2, 2]
 rounding_type = "floor"
 auto_pad = "same_upper"
 output0 = [[[[5, 5, 3],
             [8, 9, 9]
             [8, 9, 9]],
            [[6, 5, 5],
             [8, 2, 1],
             [8, 2, -3]]]]
 output1 = [[[[4, 4, 2],
             [7, 8, 8]
             [7, 8, 8]],
            [[12, 11, 11],
             [15, 16, 14],
             [15, 16, 17]]]]
 ```
 Example 5 shows how *MaxPool* operates with 4D input using 2D kernel, `auto_pad = valid` and `rounding_type = ceil`.
 ```
 input = [[[[-1, 2, 3],
           [4, 5, -6],
           [-7, 8, 9]]]]
 strides = [2, 2]
 kernel = [2, 2]
 rounding_type = "ceil"
 auto_pad = "valid"
 output0 = [[[[5, 3],
             [8, 9]]]]
 output1 = [[[[4, 2],
             [7, 8]]]]
 ```
 Example 6 shows how *MaxPool* operates on 4D input using dilated 2D kernel, `auto_pad = explicit` and `rounding_type = floor`.
 ```
 input = [[[[1, 2, 3],
           [4, 5, 6],
           [7, 8, 9]]]]
 strides = [1, 1]
 kernel = [2, 2]
 dilations = [2, 2]
 rounding_type = "floor"
 auto_pad = "explicit"
 pads_begin = [1, 1]
 pads_end = [1, 1]
 output0 = [[[[5, 6, 5],
             [8, 9, 8],
             [5, 6, 5]]]]
 output1 = [[[[4, 5, 4],
             [7, 8, 7],
             [4, 5, 4]]]]
 ```
 Example 7 shows how *MaxPool* operates on 4D input using 2D kernel, with non-default `axis` value.
 ```
 input = [[[[1, 2, 3],
           [4, 5, 6],
           [7, 8, 9]],
          [[10, 11, 12],
           [13, 14, 15],
           [16, 17, 18]]
           ]]
 strides = [1, 1]
 kernel = [2, 2]
 dilations = [1, 1]
 rounding_type = "floor"
 auto_pad = "explicit"
 pads_begin = [0, 0]
 pads_end = [0, 0]
 axis = 2
 output0 = [[[[5, 6],
             [8, 9]],
            [[14, 15],
             [17, 18]]]]
 output1 = [[[[4, 5],
             [7, 8]],
            [[4, 5],
             [7, 8]]]]
 ```
 **Examples**
 ```xml
 <layer ... type="MaxPool" ... >
    <data auto_pad="same_upper" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
    <input>
        <port id="0">
            <dim>1</dim>
            <dim>3</dim>
            <dim>32</dim>
            <dim>32</dim>
        </port>
    </input>
    <output>
        <port id="1">
            <dim>1</dim>
            <dim>3</dim>
            <dim>32</dim>
            <dim>32</dim>
        </port>
        <port id="2">
            <dim>1</dim>
            <dim>3</dim>
            <dim>32</dim>
            <dim>32</dim>
        </port>
    </output>
 </layer>
 <layer ... type="MaxPool" ... >
    <data auto_pad="explicit" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
    <input>
        <port id="0">
            <dim>1</dim>
            <dim>3</dim>
            <dim>32</dim>
            <dim>32</dim>
        </port>
    </input>
    <output>
        <port id="1">
            <dim>1</dim>
            <dim>3</dim>
            <dim>17</dim>
            <dim>17</dim>
        </port>
        <port id="2">
            <dim>1</dim>
            <dim>3</dim>
            <dim>17</dim>
            <dim>17</dim>
        </port>
    </output>
 </layer>
 <layer ... type="MaxPool" ... >
    <data auto_pad="valid" kernel="2,2" pads_begin="1,1" pads_end="1,1" strides="2,2"/>
    <input>
        <port id="0">
            <dim>1</dim>
            <dim>3</dim>
            <dim>32</dim>
            <dim>32</dim>
        </port>
    </input>
    <output>
        <port id="1">
            <dim>1</dim>
            <dim>3</dim>
            <dim>16</dim>
            <dim>16</dim>
        </port>
        <port id="2">
            <dim>1</dim>
            <dim>3</dim>
            <dim>16</dim>
            <dim>16</dim>
        </port>
    </output>
 </layer>
 ```
--- a/docs/snippets/AUTO0.cpp
+++ b/docs/snippets/AUTO0.cpp
@ -0,0 +1,12 @@
 #include <ie_core.hpp>
 int main() {
 //! [part0]
    InferenceEngine::Core ie;
    InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
    // these 2 lines below are equivalent
    InferenceEngine::ExecutableNetwork exec0 = ie.LoadNetwork(network, "AUTO");
    InferenceEngine::ExecutableNetwork exec1 = ie.LoadNetwork(network, "");
 //! [part0]
 return 0;
 }
--- a/docs/snippets/AUTO1.cpp
+++ b/docs/snippets/AUTO1.cpp
@ -0,0 +1,15 @@
 #include <ie_core.hpp>
 int main() {
 //! [part1]
    InferenceEngine::Core ie;
    InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
    // "AUTO" plugin is (globally) pre-configured with the explicit option:
    ie.SetConfig({{"AUTO_DEVICE_LIST", "CPU,GPU"}}, "AUTO");
    // the below 3 lines are equivalent (the first line leverages the pre-configured AUTO, while second and third explicitly pass the same settings)
    InferenceEngine::ExecutableNetwork exec0 = ie.LoadNetwork(network, "AUTO", {});
    InferenceEngine::ExecutableNetwork exec1 = ie.LoadNetwork(network, "AUTO", {{"AUTO_DEVICE_LIST", "CPU,GPU"}});
    InferenceEngine::ExecutableNetwork exec2 = ie.LoadNetwork(network, "AUTO:CPU,GPU");
 //! [part1]
 return 0;
 }
--- a/docs/snippets/AUTO2.cpp
+++ b/docs/snippets/AUTO2.cpp
@ -0,0 +1,10 @@
 #include <ie_core.hpp>
 int main() {
 //! [part2]
    InferenceEngine::Core ie;
    InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
    InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO");
 //! [part2]
 return 0;
 }
--- a/docs/snippets/AUTO3.cpp
+++ b/docs/snippets/AUTO3.cpp
@ -0,0 +1,10 @@
 #include <ie_core.hpp>
 int main() {
 //! [part3]
    InferenceEngine::Core ie;
    InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
    InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO:CPU,GPU");
 //! [part3]
 return 0;
 }
--- a/docs/snippets/AUTO4.cpp
+++ b/docs/snippets/AUTO4.cpp
@ -0,0 +1,19 @@
 #include <ie_core.hpp>
 int main() {
    const std::map<std::string, std::string> cpu_config  = { { InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::YES } };
    const std::map<std::string, std::string> gpu_config = { { InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::YES } };
    //! [part4]
    InferenceEngine::Core ie; 
    InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
    // configure the CPU device first
    ie.SetConfig(cpu_config, "CPU"); 
    // configure the GPU device
    ie.SetConfig(gpu_config, "GPU"); 
    // load the network to the auto-device
    InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, "AUTO");
    // new metric allows to query the optimization capabilities
    std::vector<std::string> device_cap = exeNetwork.GetMetric(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
    //! [part4]
    return 0;
 }
--- a/docs/snippets/AUTO5.cpp
+++ b/docs/snippets/AUTO5.cpp
@ -0,0 +1,15 @@
 #include <ie_core.hpp>
 int main() {
    std::string device_name = "AUTO:CPU,GPU";
    const std::map< std::string, std::string > full_config = {};
    //! [part5]
    InferenceEngine::Core ie; 
    InferenceEngine::CNNNetwork network = ie.ReadNetwork("sample.xml");
    // 'device_name' can be "AUTO:CPU,GPU" to configure the auto-device to use CPU and GPU
    InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, device_name, full_config);
    // new metric allows to query the optimization capabilities
    std::vector<std::string> device_cap = exeNetwork.GetMetric(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
    //! [part5]
    return 0;
 }
--- a/docs/template_plugin/tests/functional/op_reference/greater.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/greater.cpp
@ -0,0 +1,84 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <gtest/gtest.h>
 #include <ie_core.hpp>
 #include <ie_ngraph_utils.hpp>
 #include <ngraph/ngraph.hpp>
 #include <shared_test_classes/base/layer_test_utils.hpp>
 #include "comparison.hpp"
 using namespace ngraph;
 using namespace InferenceEngine;
 using ComparisonTypes = ngraph::helpers::ComparisonTypes;
 namespace reference_tests {
 namespace ComparisonOpsRefTestDefinitions {
 namespace {
 TEST_P(ReferenceComparisonLayerTest, GreaterCompareWithHardcodedRefs) {
    Exec();
 }
 template <element::Type_t IN_ET>
 std::vector<RefComparisonParams> generateComparisonParams(const element::Type& type) {
    using T = typename element_type_traits<IN_ET>::value_type;
    std::vector<RefComparisonParams> compParams {
        // 1D // 2D // 3D // 4D
        Builder {}
            .compType(ComparisonTypes::GREATER)
            .input1({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
            .input2({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
            .expected({{2, 2}, element::boolean, std::vector<char> {0, 0, 0, 0}}),
        Builder {}
            .compType(ComparisonTypes::GREATER)
            .input1({{2, 3}, type, std::vector<T> {0, 6, 45, 1, 21, 21}})
            .input2({{2, 3}, type, std::vector<T> {1, 18, 23, 1, 19, 21}})
            .expected({{2, 3}, element::boolean, std::vector<char> {0, 0, 1, 0, 1, 0}}),
        Builder {}
            .compType(ComparisonTypes::GREATER)
            .input1({{1}, type, std::vector<T> {53}})
            .input2({{1}, type, std::vector<T> {53}})
            .expected({{1}, element::boolean, std::vector<char> {0}}),
        Builder {}
            .compType(ComparisonTypes::GREATER)
            .input1({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 1, 5, 12, 8}})
            .input2({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 10, 5, 11, 8}})
            .expected({{2, 4}, element::boolean, std::vector<char> {0, 0, 0, 0, 0, 0, 1, 0}}),
        Builder {}
            .compType(ComparisonTypes::GREATER)
            .input1({{3, 1, 2}, type, std::vector<T> {2, 1, 4, 1, 3, 1}})
            .input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
            .expected({{3, 2, 2}, element::boolean, std::vector<char> {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}}),
        Builder {}
            .compType(ComparisonTypes::GREATER)
            .input1({{2, 1, 2, 1}, type, std::vector<T> {2, 1, 4, 1}})
            .input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
            .expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {1, 0, 1, 0}})};
    return compParams;
 }
 std::vector<RefComparisonParams> generateComparisonCombinedParams() {
    const std::vector<std::vector<RefComparisonParams>> compTypeParams {
        generateComparisonParams<element::Type_t::f32>(element::f32),
        generateComparisonParams<element::Type_t::f16>(element::f16),
        generateComparisonParams<element::Type_t::i32>(element::i32),
        generateComparisonParams<element::Type_t::i64>(element::i64),
        generateComparisonParams<element::Type_t::u32>(element::u32),
        generateComparisonParams<element::Type_t::u64>(element::u64),
        generateComparisonParams<element::Type_t::boolean>(element::boolean)};
    std::vector<RefComparisonParams> combinedParams;
    for (const auto& params : compTypeParams) {
        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
    }
    return combinedParams;
 }
 } // namespace
 INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()),
                         ReferenceComparisonLayerTest::getTestCaseName);
 } // namespace ComparisonOpsRefTestDefinitions
 } // namespace reference_tests
--- a/docs/template_plugin/tests/functional/op_reference/logical_xor.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/logical_xor.cpp
@ -0,0 +1,48 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <gtest/gtest.h>
 #include <ie_core.hpp>
 #include <ie_ngraph_utils.hpp>
 #include <ngraph/ngraph.hpp>
 #include <shared_test_classes/base/layer_test_utils.hpp>
 #include <tuple>
 #include "logical.hpp"
 using namespace ngraph;
 using namespace InferenceEngine;
 using LogicalTypes = ngraph::helpers::LogicalTypes;
 namespace reference_tests {
 namespace LogicalOpsRefTestDefinitions {
 namespace {
 std::vector<RefLogicalParams> generateLogicalParams() {
    std::vector<RefLogicalParams> logicalParams {
        Builder {}
            .opType(LogicalTypes::LOGICAL_XOR)
            .input1({{2, 2}, element::boolean, std::vector<char> {true, false, true, false}})
            .input2({{2, 2}, element::boolean, std::vector<char> {false, true, true, false}})
            .expected({{2, 2}, element::boolean, std::vector<char> {true, true, false, false}}),
        Builder {}
            .opType(LogicalTypes::LOGICAL_XOR)
            .input1({{2, 1, 2, 1}, element::boolean, std::vector<char> {true, false, true, false}})
            .input2({{1, 1, 2, 1}, element::boolean, std::vector<char> {true, false}})
            .expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {false, false, false, false}}),
        Builder {}
            .opType(LogicalTypes::LOGICAL_XOR)
            .input1({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, true}})
            .input2({{3, 4}, element::boolean, std::vector<char> {true, true, true, true, true, true, true, false, false, true, true, false}})
            .expected({{3, 4}, element::boolean, std::vector<char> {false, false, false, false, false, true, false, false, false, false, false, true}})};
    return logicalParams;
 }
 INSTANTIATE_TEST_SUITE_P(smoke_LogicalOr_With_Hardcoded_Refs, ReferenceLogicalLayerTest, ::testing::ValuesIn(generateLogicalParams()),
                         ReferenceLogicalLayerTest::getTestCaseName);
 }  // namespace
 }  // namespace LogicalOpsRefTestDefinitions
 }  // namespace reference_tests
--- a/inference-engine/ie_bridges/python/wheel/setup.py
+++ b/inference-engine/ie_bridges/python/wheel/setup.py
@ -7,10 +7,10 @@ import sys
 import errno
 import subprocess  # nosec
 import typing
 import multiprocessing
 from fnmatch import fnmatchcase
 from pathlib import Path
 from shutil import copyfile, rmtree
 from distutils.command.install import install
 from distutils.command.build import build
 from distutils.command.clean import clean
 from distutils.errors import DistutilsSetupError
@ -27,11 +27,11 @@ PYTHON_VERSION = f'python{sys.version_info.major}.{sys.version_info.minor}'
 # The following variables can be defined in environment or .env file
 CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.')
-CORE_LIBS_DIR = config('CORE_LIBS_DIR', '')
+CORE_LIBS_DIR = config('CORE_LIBS_DIR', 'deployment_tools/inference_engine/lib/intel64')
-PLUGINS_LIBS_DIR = config('PLUGINS_LIBS_DIR', '')
+PLUGINS_LIBS_DIR = config('PLUGINS_LIBS_DIR', 'deployment_tools/inference_engine/lib/intel64')
-NGRAPH_LIBS_DIR = config('NGRAPH_LIBS_DIR', '')
+NGRAPH_LIBS_DIR = config('NGRAPH_LIBS_DIR', 'deployment_tools/ngraph/lib')
-TBB_LIBS_DIR = config('TBB_LIBS_DIR', '')
+TBB_LIBS_DIR = config('TBB_LIBS_DIR', 'deployment_tools/inference_engine/external/tbb/lib')
-PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', '')
+PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
 LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path'
 LIB_INSTALL_CFG = {
@ -118,7 +118,66 @@ class PrebuiltExtension(Extension):
 class CustomBuild(build):
    """Custom implementation of build_clib"""
    cmake_build_types = ['Release', 'Debug', 'RelWithDebInfo', 'MinSizeRel']
    user_options = [
        ('config=', None, 'Build configuration [{types}].'.format(types='|'.join(cmake_build_types))),
        ('jobs=', None, 'Specifies the number of jobs to use with make.'),
        ('cmake-args=', None, 'Additional options to be passed to CMake.'),
    ]
    def initialize_options(self):
        """Set default values for all the options that this command supports."""
        super().initialize_options()
        self.build_base = 'build'
        self.config = None
        self.jobs = None
        self.cmake_args = None
    def finalize_options(self):
        """Set final values for all the options that this command supports."""
        super().finalize_options()
        if not self.config:
            if self.debug:
                self.config = 'Debug'
            else:
                self.announce('Set default value for CMAKE_BUILD_TYPE = Release.', level=4)
                self.config = 'Release'
        else:
            build_types = [item.lower() for item in self.cmake_build_types]
            try:
                i = build_types.index(str(self.config).lower())
                self.config = self.cmake_build_types[i]
                self.debug = True if 'Debug' == self.config else False
            except ValueError:
                self.announce('Unsupported CMAKE_BUILD_TYPE value: ' + self.config, level=4)
                self.announce('Supported values: {types}'.format(types=', '.join(self.cmake_build_types)), level=4)
                sys.exit(1)
        if self.jobs is None and os.getenv('MAX_JOBS') is not None:
            self.jobs = os.getenv('MAX_JOBS')
        self.jobs = multiprocessing.cpu_count() if self.jobs is None else int(self.jobs)
    def run(self):
        global CMAKE_BUILD_DIR
        self.jobs = multiprocessing.cpu_count()
        plat_specifier = '.{0}-{1}.{2}'.format(self.plat_name, *sys.version_info[:2])
        self.build_temp = os.path.join(self.build_base, 'temp' + plat_specifier, self.config)
        # if setup.py is directly called use CMake to build product
        if CMAKE_BUILD_DIR == '.':
            openvino_root_dir = os.path.normpath(os.path.join(CMAKE_BUILD_DIR, '../../../../'))
            self.announce('Configuring cmake project', level=3)
            self.spawn(['cmake', '-H' + openvino_root_dir, '-B' + self.build_temp,
                        '-DCMAKE_BUILD_TYPE={type}'.format(type=self.config),
                        '-DENABLE_PYTHON=ON',
                        '-DNGRAPH_ONNX_FRONTEND_ENABLE=ON'])
            self.announce('Building binaries', level=3)
            self.spawn(['cmake', '--build', self.build_temp,
                        '--config', self.config, '-j', str(self.jobs)])
            CMAKE_BUILD_DIR = self.build_temp
        self.run_command('build_clib')
        build.run(self)
        # Copy extra package_data content filtered by find_packages
@ -133,14 +192,6 @@ class CustomBuild(build):
            copyfile(path, dst / path_rel)
 class CustomInstall(install):
    """Enable build_clib during the installation"""
    def run(self):
        self.run_command('build_clib')
        install.run(self)
 class PrepareLibs(build_clib):
    """Prepare prebuilt libraries"""
@ -369,6 +420,7 @@ if os.path.exists(package_license):
 packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG))
 package_data: typing.Dict[str, list] = {}
 setup(
    version=config('WHEEL_VERSION', '0.0.0'),
    author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
@ -376,14 +428,13 @@ setup(
    license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
    author=config('WHEEL_AUTHOR', 'Intel Corporation'),
    description=config('WHEEL_DESC', 'Inference Engine Python* API'),
-    install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'requirements.txt')),
+    install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'meta/openvino.requirements.txt')),
-    long_description=get_description(config('WHEEL_OVERVIEW', 'pypi_overview.md')),
+    long_description=get_description(config('WHEEL_OVERVIEW', 'meta/pypi_overview.md')),
    long_description_content_type='text/markdown',
    download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
    url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
    cmdclass={
        'build': CustomBuild,
        'install': CustomInstall,
        'build_clib': PrepareLibs,
        'build_ext': CopyExt,
        'clean': CustomClean,
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@ -212,6 +212,9 @@ int main(int argc, char* argv[]) {
        bool perf_counts = false;
        // Update config per device according to command line parameters
        for (auto& device : devices) {
            if (device == "AUTO") {
                continue;
            }
            if (!config.count(device))
                config[device] = {};
            std::map<std::string, std::string>& device_config = config.at(device);
--- a/inference-engine/samples/speech_sample/main.cpp
+++ b/inference-engine/samples/speech_sample/main.cpp
@ -627,10 +627,9 @@ int main(int argc, char* argv[]) {
        if (FLAGS_q.compare("user") == 0) {
            if (!FLAGS_rg.empty()) {
-                slog::warn
+                slog::warn << "Custom scale factor will be used for imported gna model: " << FLAGS_rg << slog::endl;
-                    << "Custom scale factor will be ignored - using scale factor from provided imported gna model: "
+            }
-                    << FLAGS_rg << slog::endl;
+
            } else {
            auto scaleFactorInput = ParseScaleFactors(FLAGS_sf);
            if (numInputFiles != scaleFactorInput.size()) {
                std::string errMessage(
@ -641,11 +640,9 @@ int main(int argc, char* argv[]) {
            for (size_t i = 0; i < scaleFactorInput.size(); ++i) {
                slog::info << "For input " << i << " using scale factor of " << scaleFactorInput[i] << slog::endl;
-                    std::string scaleFactorConfigKey =
+                std::string scaleFactorConfigKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i);
                        GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i);
                gnaPluginConfig[scaleFactorConfigKey] = scaleFactorInput[i];
            }
            }
        } else {
            // "static" quantization with calculated scale factor
            if (!FLAGS_rg.empty()) {
--- a/inference-engine/src/auto_plugin/auto_exec_network.cpp
+++ b/inference-engine/src/auto_plugin/auto_exec_network.cpp
@ -1,136 +0,0 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include <memory>
 #include <map>
 #include "ie_metric_helpers.hpp"
 #include "auto_exec_network.hpp"
 #include "auto_infer_request.hpp"
 namespace AutoPlugin {
 using namespace InferenceEngine;
 AutoExecutableNetwork::AutoExecutableNetwork(NetworkFuture cpuFuture,
                                             NetworkFuture acceleratorFuture,
                                             bool          enablePerfCount)
                                             : _cpuFuture(std::move(cpuFuture))
                                             , _acceleratorFuture(std::move(acceleratorFuture))
                                             , _enablePerfCount(enablePerfCount) {
    // both are valid, like AUTO:CPU,GPU
    if (_cpuFuture.valid() && _acceleratorFuture.valid()) {
        try {
            _networkFirstReady = _cpuFuture.get();
            _alreadyActualNetwork = false;
        } catch (const std::exception& e) {
            printf("Warning: load network to CPU failed: %s\n", e.what());
            _networkActualNeeded = _acceleratorFuture.get();
            _alreadyActualNetwork = true;
        }
    } else if (_acceleratorFuture.valid()) {  // only accelerator is valid, like AUTO:GPU
        _networkActualNeeded = _acceleratorFuture.get();
        _alreadyActualNetwork = true;
    } else if (_cpuFuture.valid()) {  // only CPU is valid, like AUTO:CPU
        _networkActualNeeded = _cpuFuture.get();
        _alreadyActualNetwork = true;
    } else {
        IE_THROW() << "No device task available";
    }
 }
 AutoExecutableNetwork::~AutoExecutableNetwork() = default;
 InferenceEngine::IInferRequestInternal::Ptr AutoExecutableNetwork::CreateInferRequestImpl(InputsDataMap networkInputs,
                                                                                          OutputsDataMap networkOutputs) {
    InferenceEngine::SoExecutableNetworkInternal network;
    SoIInferRequestInternal inferRequest;
    if (TryGetActualNetwork(network)) {
        inferRequest = {_networkActualNeeded, _networkActualNeeded->CreateInferRequest()};
    } else {
        inferRequest = {_networkFirstReady, _networkFirstReady->CreateInferRequest()};
    }
    return std::make_shared<AutoInferRequest>(_networkInputs, _networkOutputs, inferRequest,
                                              shared_from_this(), _alreadyActualNetwork,
                                              _enablePerfCount);
 }
 bool AutoExecutableNetwork::TryGetActualNetwork(InferenceEngine::SoExecutableNetworkInternal& soExecNetwork) {
    // try to get actual network
    if (_acceleratorFuture.valid() && _acceleratorFuture.wait_for(std::chrono::nanoseconds(0)) == std::future_status::ready) {
        soExecNetwork = _acceleratorFuture.get();
        _alreadyActualNetwork = true;
        _networkActualNeeded = soExecNetwork;
        // reapply config to actual network
        // fixme: GPU doesn't support SetConfig and throw exception
        try {
            _networkActualNeeded->SetConfig(_cacheConfig);
        } catch (...) {
        }
        return true;
    }
    // if already get actual network
    if (_alreadyActualNetwork) {
        soExecNetwork = _networkActualNeeded;
        return true;
    }
    return false;
 }
 void AutoExecutableNetwork::WaitForActualDevice() const {
    if (_alreadyActualNetwork) {
        return;
    }
    if (_acceleratorFuture.valid()) {
        _networkActualNeeded = _acceleratorFuture.get();
        _alreadyActualNetwork = true;
    } else {
        IE_THROW() << "Export failed due to no valid executable network";
    }
 }
 void AutoExecutableNetwork::Export(std::ostream& networkModel) {
    //fixme: the Export  should work with actual device, so we have to wait!!!
    WaitForActualDevice();
    _networkActualNeeded->Export(networkModel);
 }
 RemoteContext::Ptr AutoExecutableNetwork::GetContext() const {
    // fixme: the GetContext  should work with actual device, so we have to wait!!!
    WaitForActualDevice();
    return _networkActualNeeded->GetContext();
 }
 InferenceEngine::CNNNetwork AutoExecutableNetwork::GetExecGraphInfo() {
    WaitForActualDevice();
    return _networkActualNeeded->GetExecGraphInfo();
 }
 Parameter AutoExecutableNetwork::GetMetric(const std::string &name) const {
    // fixme: should we wait actual device? meanwhile it will block inference, how to fix?
 //    WaitForActualDevice();
    if (_alreadyActualNetwork) {
        return _networkActualNeeded->GetMetric(name);
    } else {
        return _networkFirstReady->GetMetric(name);
    }
 }
 void AutoExecutableNetwork::SetConfig(const std::map<std::string, Parameter>& config) {
    //fixme: have to store the config and reapply when the networks swapped
    _cacheConfig = config;
    if (_alreadyActualNetwork) {
        _networkActualNeeded->SetConfig(config);
    } else {
        _networkFirstReady->SetConfig(config);
    }
 }
 Parameter AutoExecutableNetwork::GetConfig(const std::string& name) const {
    //fixme: carefuly select between FirstLoaded and ActuallyNeeded
    return _cacheConfig;
 }
 }  // namespace AutoPlugin
--- a/inference-engine/src/auto_plugin/auto_exec_network.hpp
+++ b/inference-engine/src/auto_plugin/auto_exec_network.hpp
@ -1,56 +0,0 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <atomic>
 #include <mutex>
 #include <queue>
 #include <unordered_map>
 #include <map>
 #include <vector>
 #include <string>
 #include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
 #include <threading/ie_itask_executor.hpp>
 namespace AutoPlugin {
 using DeviceName = std::string;
 using NetworkFuture = std::future<InferenceEngine::SoExecutableNetworkInternal>;
 class AutoExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal {
 public:
    using Ptr = std::shared_ptr<AutoExecutableNetwork>;
    explicit AutoExecutableNetwork(NetworkFuture cpuTask,
                                   NetworkFuture acceleratorTask,
                                   bool          enablePerfCount);
    void Export(std::ostream& networkModel) override;
    InferenceEngine::RemoteContext::Ptr GetContext() const override;
    InferenceEngine::CNNNetwork GetExecGraphInfo() override;
    InferenceEngine::Parameter GetMetric(const std::string &name) const override;
    void SetConfig(const std::map<std::string, InferenceEngine::Parameter>& config) override;
    InferenceEngine::Parameter GetConfig(const std::string& name) const override;
    InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
                                                                       InferenceEngine::OutputsDataMap networkOutputs) override;
    bool TryGetActualNetwork(InferenceEngine::SoExecutableNetworkInternal& soExecNetwork);
    ~AutoExecutableNetwork();
 private:
    void WaitForActualDevice() const;
 private:
    InferenceEngine::SoExecutableNetworkInternal _networkFirstReady;
    mutable InferenceEngine::SoExecutableNetworkInternal _networkActualNeeded;
    NetworkFuture _cpuFuture;
    mutable NetworkFuture _acceleratorFuture;
    bool _enablePerfCount;
    mutable std::atomic<bool> _alreadyActualNetwork = {false};
    std::map<std::string, InferenceEngine::Parameter> _cacheConfig;
 };
 }  // namespace AutoPlugin
--- a/inference-engine/src/auto_plugin/auto_infer_request.cpp
+++ b/inference-engine/src/auto_plugin/auto_infer_request.cpp
@ -1,103 +0,0 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <iostream>
 #include "auto_infer_request.hpp"
 #include <ie_input_info.hpp>
 #include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
 namespace AutoPlugin {
    using namespace InferenceEngine;
 AutoInferRequest::AutoInferRequest(const InputsDataMap&              networkInputs,
                                   const OutputsDataMap&             networkOutputs,
                                   const SoIInferRequestInternal&    inferRequest,
                                   const InferenceEngine::IExecutableNetworkInternal::Ptr autoExecutableNetwork,
                                   bool alreadyActualNetwork,
                                   bool enablePerfCount)
    : IInferRequestInternal(networkInputs, networkOutputs)
    , _inferRequest(inferRequest)
    , _autoExecutableNetwork(std::dynamic_pointer_cast<AutoPlugin::AutoExecutableNetwork>(autoExecutableNetwork))
    , _alreadyActualNetwork(alreadyActualNetwork)
    , _enablePerfCount(enablePerfCount) {
    IE_ASSERT(_autoExecutableNetwork != nullptr);
    for (const auto &it : _networkInputs)
        _inputs[it.first] = _inferRequest->GetBlob(it.first);
    for (const auto &it : _networkOutputs)
        _outputs[it.first] = _inferRequest->GetBlob(it.first);
 }
 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> AutoInferRequest::GetPerformanceCounts() const {
    if (_enablePerfCount) {
        try {
            return _inferRequest->GetPerformanceCounts();
        } catch (...) {
            return {};
        }
    } else {
        return {};
    }
 }
 void AutoInferRequest::InferImpl() {
    HotSwapRequests(); //safe to call here (before actual inference started)
    SetBlobsToDeviceRequest();
    _inferRequest->Infer();
 }
 void AutoInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) {
    IInferRequestInternal::SetBlob(name, data);
 }
 Blob::Ptr AutoInferRequest::GetBlob(const std::string& name) {
    return IInferRequestInternal::GetBlob(name);
 }
 void AutoInferRequest::Cancel() {
    _inferRequest->Cancel();
 }
 void AutoInferRequest::StartAsync() {
    HotSwapRequests(); //safe to call here (before actual inference started)
    SetBlobsToDeviceRequest();
    _inferRequest->StartAsync();
 }
 InferenceEngine::StatusCode AutoInferRequest::Wait(int64_t millis_timeout) {
    return _inferRequest->Wait(millis_timeout);
 }
 void AutoInferRequest::SetCallback(Callback callback) {
    _callback = callback;
    _inferRequest->SetCallback(callback);
 }
 void AutoInferRequest::HotSwapRequests() {
    if (!_alreadyActualNetwork) {
        InferenceEngine::SoExecutableNetworkInternal tempSoExecNetwork;
        if (_autoExecutableNetwork->TryGetActualNetwork(tempSoExecNetwork)) {
            _alreadyActualNetwork = true;
            _inferRequest = {tempSoExecNetwork, tempSoExecNetwork->CreateInferRequest()};
            _inferRequest->SetCallback(_callback);
        }
    }
 }
 void AutoInferRequest::SetBlobsToDeviceRequest() {
        for (const auto &it : _networkInputs) {
            const auto &name = it.first;
            // this assumes the request is already in BUSY state
            auto blob = GetBlob(name);
            if (_inferRequest->GetBlob(name) != blob)
                _inferRequest->SetBlob(name, blob);
        }
        for (const auto &it : _networkOutputs) {
            const auto &name = it.first;
            // this assumes the request is already in BUSY state
            auto blob = GetBlob(name);
            if (_inferRequest->GetBlob(name) != blob)
                _inferRequest->SetBlob(name, blob);
        }
    }
 }  // namespace AutoPlugin
--- a/inference-engine/src/auto_plugin/auto_infer_request.hpp
+++ b/inference-engine/src/auto_plugin/auto_infer_request.hpp
@ -1,55 +0,0 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <atomic>
 #include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
 #include <ie_blob.h>
 #include <ie_common.h>
 #include <map>
 #include <memory>
 #include <mutex>
 #include <queue>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 #include "auto_exec_network.hpp"
 namespace AutoPlugin {
 class AutoInferRequest : public InferenceEngine::IInferRequestInternal {
 public:
    using Ptr = std::shared_ptr<AutoInferRequest>;
    explicit AutoInferRequest(const InferenceEngine::InputsDataMap&             networkInputs,
                              const InferenceEngine::OutputsDataMap&            networkOutputs,
                              const InferenceEngine::SoIInferRequestInternal&   inferRequest,
                              const InferenceEngine::IExecutableNetworkInternal::Ptr executeNetwork,
                              bool alreadyActualNetwork,
                              bool enablePerfCount);
    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
    void InferImpl() override;
    void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr& data) override;
    InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
    void Cancel() override;
    //async impl
    void StartAsync() override;
    InferenceEngine::StatusCode Wait(int64_t millis_timeout) override;
    void SetCallback(Callback callback) override;
 private:
    void HotSwapRequests();
    void SetBlobsToDeviceRequest();
 private:
    InferenceEngine::SoIInferRequestInternal _inferRequest;
    AutoPlugin::AutoExecutableNetwork::Ptr _autoExecutableNetwork;
    Callback _callback; // need to save the callback for hot-swap of the requests
    bool _alreadyActualNetwork{ false };
    bool _enablePerfCount { false };
 };
 }  // namespace AutoPlugin
--- a/inference-engine/src/auto_plugin/auto_plugin.cpp
+++ b/inference-engine/src/auto_plugin/auto_plugin.cpp
@ -2,397 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <string>
 #include <vector>
 #include <memory>
 #include <map>
 #include <unordered_set>
 #include <ie_metric_helpers.hpp>
 #include <threading/ie_executor_manager.hpp>
 #include <ie_algorithm.hpp>
 #include <ngraph/opsets/opset1.hpp>
 #include <transformations/utils/utils.hpp>
 #include <ie_icore.hpp>
 #include "auto_plugin.hpp"
 #include "ngraph_ops/convolution_ie.hpp"
 #include "ngraph_ops/deconvolution_ie.hpp"
 namespace AutoPlugin {
 namespace {
    std::string GetNetworkPrecision(const InferenceEngine::CNNNetwork &network) {
        auto nGraphFunc = network.getFunction();
        bool isINTModel = ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc);
        if (isINTModel) {
            return METRIC_VALUE(INT8);
        }
        for (auto & node : nGraphFunc->get_ordered_ops()) {
            if (std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node) ||
                std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node) ||
                std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(node) ||
                std::dynamic_pointer_cast<ngraph::opset1::ConvolutionBackpropData>(node) ||
                std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node) ||
                std::dynamic_pointer_cast<ngraph::op::DeconvolutionIE>(node)) {
                auto layerType = node->input(1).get_element_type().get_type_name();
                if (layerType == "f32")
                    return METRIC_VALUE(FP32);
                if (layerType == "f16")
                    return METRIC_VALUE(FP16);
            }
        }
        return METRIC_VALUE(FP32);
    }
 }  // namespace
 AutoInferencePlugin::AutoInferencePlugin() {
    _pluginName = "AUTO";
 }
 IE::IExecutableNetworkInternal::Ptr AutoInferencePlugin::LoadNetwork(const std::string& fileName,
                                                                     const ConfigType&  config) {
    return LoadNetworkImpl(fileName, {}, config);
 }
 IE::IExecutableNetworkInternal::Ptr AutoInferencePlugin::LoadExeNetworkImpl(const IE::CNNNetwork& network,
                                                                            const ConfigType&     config) {
    if (network.getFunction() == nullptr) {
        IE_THROW() << "AUTO device supports just ngraph network representation";
    }
    auto networkPrecision = GetNetworkPrecision(network);
    return LoadNetworkImpl({}, network, config, networkPrecision);
 }
 std::shared_ptr<AutoExecutableNetwork> AutoInferencePlugin::LoadNetworkImpl(const std::string& modelPath,
                                                                            const InferenceEngine::CNNNetwork& network,
                                                                            const ConfigType& config,
                                                                            const std::string& networkPrecision) {
    if (GetCore() == nullptr) {
        IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
    }
    if (modelPath.empty() && network.getFunction() == nullptr) {
        IE_THROW() << "AUTO device supports just ngraph network representation";
    }
    auto fullConfig = mergeConfigs(_config, config);
    CheckConfig(fullConfig);
    auto metaDevices = GetDeviceList(fullConfig);
    auto core = GetCore(); // shared_ptr that holds the Core while the lambda below (which captures that by val) works
    auto LoadNetworkAsync =
        [core, modelPath, network](const std::string& device)
            -> IE::SoExecutableNetworkInternal {
            IE::SoExecutableNetworkInternal executableNetwork;
            if (!modelPath.empty()) {
                executableNetwork = core->LoadNetwork(modelPath, device, {});
            } else {
                executableNetwork = core->LoadNetwork(network, device, {});
            }
            return executableNetwork;
        };
    NetworkFuture cpuFuture;
    NetworkFuture acceleratorFuture;
    // start CPU task
    const auto CPUIter = std::find_if(metaDevices.begin(), metaDevices.end(),
                                      [=](const std::string& d)->bool{return d.find("CPU") != std::string::npos;});
    if (CPUIter != metaDevices.end()) {
        cpuFuture = std::async(std::launch::async, LoadNetworkAsync, *CPUIter);
    }
    // start accelerator task, like GPU
    const auto accelerator = SelectDevice(metaDevices, networkPrecision);
    bool isAccelerator = accelerator.find("CPU") == std::string::npos;
    if (isAccelerator) {
        acceleratorFuture = std::async(std::launch::async, LoadNetworkAsync, accelerator);
    }
    bool enablePerfCount = fullConfig.find(IE::PluginConfigParams::KEY_PERF_COUNT) != fullConfig.end();
    return std::make_shared<AutoExecutableNetwork>(std::move(cpuFuture), std::move(acceleratorFuture), enablePerfCount);
 }
 IE::QueryNetworkResult AutoInferencePlugin::QueryNetwork(const IE::CNNNetwork& network, const ConfigType& config) const {
    IE::QueryNetworkResult queryResult = {};
    if (GetCore() == nullptr) {
        IE_THROW() << "Please, work with AUTO device via InferencEngine::Core object";
    }
    if (network.getFunction() == nullptr) {
        IE_THROW() << "AUTO device supports just ngraph network representation";
    }
    auto fullConfig = mergeConfigs(_config, config);
    auto metaDevices = GetDeviceList(fullConfig);
    std::unordered_set<std::string> supportedLayers;
    for (auto&& value : metaDevices) {
        try {
            auto deviceQr = GetCore()->QueryNetwork(network, value, {});
            std::unordered_set<std::string> deviceSupportedLayers;
            for (auto &&layerQr : deviceQr.supportedLayersMap) {
                deviceSupportedLayers.emplace(layerQr.first);
            }
            supportedLayers = supportedLayers.empty()
                            ? deviceSupportedLayers : (deviceSupportedLayers.empty()
                            ? supportedLayers : IE::details::Intersection(
                                 supportedLayers, deviceSupportedLayers));
            break;
        } catch (...) {
        }
    }
    for (auto&& supportedLayer : supportedLayers) {
        queryResult.supportedLayersMap[supportedLayer] = GetName();
    }
    return queryResult;
 }
 IE::Parameter AutoInferencePlugin::GetConfig(const std::string& name,
                                             const std::map<std::string, IE::Parameter> & options) const {
    auto it = _config.find(name);
    if (it == _config.end()) {
        IE_THROW() << "Unsupported config key: " << name;
    } else {
        return { it->second };
    }
 }
 void AutoInferencePlugin::SetConfig(const ConfigType& config) {
    for (auto && kvp : config) {
        if (kvp.first.find("AUTO_") == 0) {
            _config[kvp.first] = kvp.second;
        } else if (kvp.first == IE::PluginConfigParams::KEY_PERF_COUNT) {
            if (kvp.second == IE::PluginConfigParams::YES ||
                kvp.second == IE::PluginConfigParams::NO) {
                _config[kvp.first] = kvp.second;
            } else {
                IE_THROW() << "Unsupported config value: " << kvp.second
                           << " for key: " << kvp.first;
            }
        } else {
            IE_THROW() << "Unsupported config key: " << kvp.first;
        }
    }
 }
 IE::Parameter AutoInferencePlugin::GetMetric(const std::string& name,
                                             const std::map<std::string, IE::Parameter> & options) const {
    if (name == METRIC_KEY(SUPPORTED_METRICS)) {
        std::vector<std::string> metrics;
        metrics.emplace_back(METRIC_KEY(SUPPORTED_METRICS));
        metrics.emplace_back(METRIC_KEY(FULL_DEVICE_NAME));
        metrics.emplace_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
        metrics.emplace_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
        IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
    } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
        std::string device_name = {"Inference Engine AUTO device"};
        IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, device_name);
    } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
        std::vector<std::string> configKeys = {
            IE::KEY_AUTO_DEVICE_LIST,
            IE::PluginConfigParams::KEY_PERF_COUNT
        };
        IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
    } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
        std::vector<std::string> capabilities = GetOptimizationCapabilities(options);
        IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
    } else {
        IE_THROW() << "Unsupported metric key " << name;
    }
 }
 //////////////////////////////////// private & protected functions ///////////////////
 std::vector<DeviceName> AutoInferencePlugin::GetDeviceList(const ConfigType& config) const {
    std::vector<DeviceName> deviceList;
    auto deviceListConfig = config.find(IE::KEY_AUTO_DEVICE_LIST);
    if (deviceListConfig == config.end()) {
        deviceList = GetCore()->GetAvailableDevices();
    } else {
        deviceList = IE::DeviceIDParser::getHeteroDevices(deviceListConfig->second);
    }
    if (deviceList.empty()) {
        IE_THROW() << "Please, check environment due to no supported devices can be used";
    }
    return deviceList;
 }
 std::vector<std::string> AutoInferencePlugin::GetOptimizationCapabilities(const std::map<std::string, IE::Parameter> & options) const {
    // FIXME: workaround to get devicelist.
    std::unordered_set<std::string> capabilities;
    std::vector<std::string> queryDeviceLists{"CPU", "GPU"};
    if (options.find(IE::KEY_AUTO_DEVICE_LIST) != options.end()) {
        auto deviceListConfig = options.at(IE::KEY_AUTO_DEVICE_LIST).as<std::string>();
        queryDeviceLists = IE::DeviceIDParser::getHeteroDevices(deviceListConfig);
    } else if (_config.find(IE::KEY_AUTO_DEVICE_LIST) != _config.end()) {
        auto deviceListConfig = _config.at(IE::KEY_AUTO_DEVICE_LIST);
        queryDeviceLists = IE::DeviceIDParser::getHeteroDevices(deviceListConfig);
    }
    for (auto &item : queryDeviceLists) {
        try {
            std::vector<std::string> device_cap =
                GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
            for (auto &cap : device_cap) {
                capabilities.insert(cap);
            }
        } catch (...) {
        }
    }
    return {capabilities.begin(), capabilities.end()};
 }
 void AutoInferencePlugin::CheckConfig(const ConfigType& config) {
    std::vector<std::string> supportedConfigKeys = GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), {});
    for (auto&& kvp : config) {
        if (kvp.first.find("AUTO_") == 0) {
            continue;
        } else if (kvp.first == IE::PluginConfigParams::KEY_PERF_COUNT) {
            if (kvp.second == IE::PluginConfigParams::YES ||
                kvp.second == IE::PluginConfigParams::NO) {
                continue;
            } else {
                IE_THROW() << "Unsupported config value: " << kvp.second
                           << " for key: " << kvp.first;
            }
        } else {
            IE_THROW() << "Unsupported config key: " << kvp.first;
        }
    }
 }
 DeviceName AutoInferencePlugin::SelectDevice(const std::vector<DeviceName>& metaDevices, const std::string& networkPrecision) {
    if (metaDevices.empty()) {
        IE_THROW(NotFound) << "No available device to select in AUTO plugin";
    }
    if (metaDevices.size() == 1) {
        return metaDevices.at(0);
    }
    std::vector<DeviceName> CPU;
    std::vector<DeviceName> dGPU;
    std::vector<DeviceName> iGPU;
    std::vector<DeviceName> MYRIAD;
    std::vector<DeviceName> VPUX;
    for (auto& item : metaDevices) {
        if (item.find("CPU") == 0) {
            CPU.push_back(item);
            continue;
        }
        if (item.find("MYRIAD") == 0) {
            MYRIAD.push_back(item);
            continue;
        }
        if (item.find("VPUX") == 0) {
            VPUX.push_back(item);
            continue;
        }
        if (item.find("GPU") == 0) {
            auto gpuFullDeviceName = GetCore()->GetMetric(item, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
            if (gpuFullDeviceName.find("iGPU") != std::string::npos) {
                iGPU.push_back(item);
            } else if (gpuFullDeviceName.find("dGPU") != std::string::npos) {
                dGPU.push_back(item);
            }
            continue;
        }
    }
    if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) {
        IE_THROW(NotFound) << "No available device found";
    }
    // Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU
    if (!dGPU.empty()) {
        for (auto&& item : dGPU) {
            std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
            if (supportNetwork != capability.end()) {
                return item;
            }
        }
    } else if (!VPUX.empty()) {
        for (auto&& item : VPUX) {
            std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
            if (supportNetwork != capability.end()) {
                return item;
            }
        }
    } else if (!iGPU.empty()) {
        for (auto&& item : iGPU) {
            std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
            if (supportNetwork != capability.end()) {
                return item;
            }
        }
    } else if (!MYRIAD.empty()) {
        for (auto&& item : MYRIAD) {
            std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
            if (supportNetwork != capability.end()) {
                return item;
            }
        }
    }
    // If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16.
    if (networkPrecision == "FP32") {
        if (!dGPU.empty()) {
            for (auto&& item : dGPU) {
                std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
                if (supportNetwork != capability.end()) {
                    return item;
                }
            }
        } else if (!VPUX.empty()) {
            for (auto&& item : VPUX) {
                std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
                if (supportNetwork != capability.end()) {
                    return item;
                }
            }
        } else if (!iGPU.empty()) {
            for (auto&& item : iGPU) {
                std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
                if (supportNetwork != capability.end()) {
                    return item;
                }
            }
        } else if (!MYRIAD.empty()) {
            for (auto&& item : MYRIAD) {
                std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
                if (supportNetwork != capability.end()) {
                    return item;
                }
            }
        }
    }
    if (CPU.empty()) {
        IE_THROW() << "Cannot select any device";
    }
    return CPU[0];
 }
 ConfigType AutoInferencePlugin::mergeConfigs(ConfigType config, const ConfigType& local) {
    for (auto && kvp : local) {
        config[kvp.first] = kvp.second;
    }
    return config;
 }
 // define CreatePluginEngine to create plugin instance
-static const IE::Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoPlugin"};
+static const InferenceEngine::Version version = {{2, 1}, CI_BUILD_NUMBER, "AutoPlugin"};
 IE_DEFINE_PLUGIN_CREATE_FUNCTION(AutoInferencePlugin, version)
 }  // namespace AutoPlugin
--- a/inference-engine/src/auto_plugin/auto_plugin.hpp
+++ b/inference-engine/src/auto_plugin/auto_plugin.hpp
@ -4,43 +4,14 @@
 #pragma once
 #include <map>
 #include <vector>
 #include <string>
 #include <unordered_set>
 #include <type_traits>
 #include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
 #include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
 #include <threading/ie_executor_manager.hpp>
 #include "auto_exec_network.hpp"
 namespace AutoPlugin {
-namespace IE = InferenceEngine;
+class AutoInferencePlugin : public InferenceEngine::IInferencePlugin {
 using ConfigType = std::map<std::string, std::string>;
 class AutoInferencePlugin : public IE::IInferencePlugin {
 public:
-    AutoInferencePlugin();
+    AutoInferencePlugin() = default;
    ~AutoInferencePlugin() = default;
    IE::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const IE::CNNNetwork& network, const ConfigType& config) override;
    IE::IExecutableNetworkInternal::Ptr LoadNetwork(const std::string& fileName, const ConfigType& config) override;
    IE::QueryNetworkResult QueryNetwork(const IE::CNNNetwork& network, const ConfigType& config) const override;
    IE::Parameter GetMetric(const std::string& name, const std::map<std::string, IE::Parameter>& options) const override;
    IE::Parameter GetConfig(const std::string& name, const std::map<std::string, IE::Parameter> & options) const override;
    void SetConfig(const ConfigType& config) override;
 private:
    std::shared_ptr<AutoExecutableNetwork> LoadNetworkImpl(const std::string& modelPath,
                                                           const InferenceEngine::CNNNetwork& network,
                                                           const ConfigType &config,
                                                           const std::string &networkPrecision = METRIC_VALUE(FP32));
    std::vector<DeviceName> GetDeviceList(const ConfigType&  config) const;
    std::vector<std::string> GetOptimizationCapabilities(const std::map<std::string, IE::Parameter>& options) const;
    DeviceName SelectDevice(const std::vector<DeviceName>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
    void CheckConfig(const ConfigType& config);
    static ConfigType mergeConfigs(ConfigType config, const ConfigType& local);
 };
 }  // namespace AutoPlugin
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@ -60,6 +60,7 @@
 #include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
 #include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
 #include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
 #include <transformations/op_conversions/convert_gather_downgrade.hpp>
 #include <transformations/op_conversions/convert_gather_0d.hpp>
 #include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
 #include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
@ -362,6 +363,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
            pass_config->disable<ngraph::pass::ConvertBroadcast3>();
            pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
            pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
            pass_config->enable<ngraph::pass::ConvertGather8ToGather7>();
            if (!config.enable_loop_unrolling) {
                pass_config->disable<ngraph::pass::ConvertTensorIteratorToRNNSequence>();
@ -388,11 +390,12 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
            OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::LPT");
            using namespace ngraph::pass::low_precision;
            ngraph::pass::Manager manager;
            // Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers
            // With this key users can work-around such issues
            if (!config.enable_fp16_for_quantized_models) {
                ngraph::pass::Manager manager;
                manager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }});
                manager.run_passes(nGraphFunc);
            }
            auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
--- a/inference-engine/src/cldnn_engine/ops/interpolate.cpp
+++ b/inference-engine/src/cldnn_engine/ops/interpolate.cpp
@ -15,15 +15,15 @@ namespace CLDNNPlugin {
 static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngraph::op::v4::Interpolate::CoordinateTransformMode mode) {
    switch (mode) {
-    case ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel:
+    case ngraph::op::v4::Interpolate::CoordinateTransformMode::HALF_PIXEL:
        return cldnn::coordinate_transformation_mode::half_pixel;
-    case ngraph::op::v4::Interpolate::CoordinateTransformMode::pytorch_half_pixel:
+    case ngraph::op::v4::Interpolate::CoordinateTransformMode::PYTORCH_HALF_PIXEL:
        return cldnn::coordinate_transformation_mode::pytorch_half_pixel;
-    case ngraph::op::v4::Interpolate::CoordinateTransformMode::asymmetric:
+    case ngraph::op::v4::Interpolate::CoordinateTransformMode::ASYMMETRIC:
        return cldnn::coordinate_transformation_mode::asymmetric;
-    case ngraph::op::v4::Interpolate::CoordinateTransformMode::tf_half_pixel_for_nn:
+    case ngraph::op::v4::Interpolate::CoordinateTransformMode::TF_HALF_PIXEL_FOR_NN:
        return cldnn::coordinate_transformation_mode::tf_half_pixel_for_nn;
-    case ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners:
+    case ngraph::op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS:
        return cldnn::coordinate_transformation_mode::align_corners;
    }
@ -32,15 +32,15 @@ static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngr
 static cldnn::nearest_mode GetNearestMode(ngraph::op::v4::Interpolate::NearestMode mode) {
    switch (mode) {
-    case ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor:
+    case ngraph::op::v4::Interpolate::NearestMode::ROUND_PREFER_FLOOR:
        return cldnn::nearest_mode::round_prefer_floor;
-    case ngraph::op::v4::Interpolate::NearestMode::round_prefer_ceil:
+    case ngraph::op::v4::Interpolate::NearestMode::ROUND_PREFER_CEIL:
        return cldnn::nearest_mode::round_prefer_ceil;
-    case ngraph::op::v4::Interpolate::NearestMode::floor:
+    case ngraph::op::v4::Interpolate::NearestMode::FLOOR:
        return cldnn::nearest_mode::floor;
-    case ngraph::op::v4::Interpolate::NearestMode::ceil:
+    case ngraph::op::v4::Interpolate::NearestMode::CEIL:
        return cldnn::nearest_mode::ceil;
-    case ngraph::op::v4::Interpolate::NearestMode::simple:
+    case ngraph::op::v4::Interpolate::NearestMode::SIMPLE:
        return cldnn::nearest_mode::simple;
    }
@ -49,18 +49,18 @@ static cldnn::nearest_mode GetNearestMode(ngraph::op::v4::Interpolate::NearestMo
 static cldnn::shape_calculation_mode GetShapeCalculationMode(ngraph::op::v4::Interpolate::ShapeCalcMode mode) {
    switch (mode) {
-    case ngraph::op::v4::Interpolate::ShapeCalcMode::sizes:  return cldnn::shape_calculation_mode::sizes;
+    case ngraph::op::v4::Interpolate::ShapeCalcMode::SIZES:  return cldnn::shape_calculation_mode::sizes;
-    case ngraph::op::v4::Interpolate::ShapeCalcMode::scales: return cldnn::shape_calculation_mode::scales;
+    case ngraph::op::v4::Interpolate::ShapeCalcMode::SCALES: return cldnn::shape_calculation_mode::scales;
    }
    IE_THROW() << "Unknown shape calculation mode: " << static_cast<int>(mode);
 }
 static cldnn::resample_type GetResampleType(ngraph::op::v4::Interpolate::InterpolateMode mode) {
    switch (mode) {
-    case ngraph::op::v4::Interpolate::InterpolateMode::nearest: return cldnn::resample_type::nearest;
+    case ngraph::op::v4::Interpolate::InterpolateMode::NEAREST: return cldnn::resample_type::nearest;
-    case ngraph::op::v4::Interpolate::InterpolateMode::linear: return cldnn::resample_type::caffe_bilinear;
+    case ngraph::op::v4::Interpolate::InterpolateMode::LINEAR: return cldnn::resample_type::caffe_bilinear;
-    case ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx: return cldnn::resample_type::linear_onnx;
+    case ngraph::op::v4::Interpolate::InterpolateMode::LINEAR_ONNX: return cldnn::resample_type::linear_onnx;
-    case ngraph::op::v4::Interpolate::InterpolateMode::cubic: return cldnn::resample_type::cubic;
+    case ngraph::op::v4::Interpolate::InterpolateMode::CUBIC: return cldnn::resample_type::cubic;
    }
    IE_THROW() << "Unknown interpolation mode: " << static_cast<int>(mode);
 }
--- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
@ -7,6 +7,7 @@
 #include "dnn_types.h"
 #include <cstdint>
 #include <cpp/ie_cnn_network.h>
 #include <ie_algorithm.hpp>
 namespace GNAPluginNS {
 namespace GNALimitations {
@ -114,5 +115,10 @@ public:
 bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
 inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) {
    auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
    return total_size / bufferMaxSize + 1;
 }
 } // namespace GNALimitations
 } // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@ -19,6 +19,7 @@
 #include "gna_slope_scale.h"
 #include "runtime/pwl.h"
 #include "gna_data_types.hpp"
 #include "round_float_define.hpp"
 namespace GNAPluginNS {
 namespace frontend {
@ -41,8 +42,8 @@ struct ScaleFactorUpdateResult {
 * @param p2 Second float value
 * @return Returns true if two float values are equal
 */
-static bool fp32eq(float p1, float p2) {
+static bool fp32eq(float p1, float p2, float accuracy = 0.00001f) {
-    return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
+    return (std::abs(p1 - p2) <= accuracy * std::min(std::abs(p1), std::abs(p2)));
 }
 /**
@ -73,14 +74,14 @@ static float selectBestOutputScaleFactors(float inScale, std::vector<float> outS
        auto sd = 0.0;
        for (size_t j = 0; j < slopes.size(); ++j) {
            auto s = gna_slope(slopes[j], inScale, outScale);
-            auto slope = static_cast<uint32_t>(s.slope * s.slope_scale);
+            auto slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
-            if (slope < static_cast<uint32_t>(std::numeric_limits<int16_t>::min()) && slope > static_cast<uint32_t>(std::numeric_limits<int16_t>::max())) {
+            if (slope < std::numeric_limits<int16_t>::min() || slope > std::numeric_limits<int16_t>::max()) {
                sd += std::numeric_limits<int8_t>::max();
                continue;
            }
            auto testSlope = static_cast<double>(slope) / s.slope_scale * inScale / outScale;
-            if (fp32eq(testSlope, slopes[j])) {
+            if (fp32eq(testSlope, slopes[j], 1.0E-6)) {
                return outScale;
            }
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -683,7 +683,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
    auto input = layer->insData[0].lock();
    auto outputs = *layer->outData.begin();
-    auto reshaped_dims = Get2DReshapedData(input, 8)->getDims();
+    auto reshaped_dims = Get2DReshapedData(input, GNALimitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
    const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
        GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
    uint32_t num_rows_in = reshaped_dims[1];
@ -908,7 +908,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
    auto inputs = layer->insData.begin()->lock();
    auto outputs = *layer->outData.begin();
-    auto reshaped_dims = Get2DReshapedData(inputs, 8)->getDims();
+    auto reshaped_dims = Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
    uint32_t num_rows_in = reshaped_dims[1];
    uint32_t num_columns_in = reshaped_dims[0];
    uint32_t num_rows_out = num_rows_in;
@ -1410,7 +1410,8 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
        noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor;
    }
-    auto input_data = HasTo2DReshapeData(layer) ? Get2DReshapedData(inputs, 8) : inputs;
+    auto input_data = HasTo2DReshapeData(layer) ?
        Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs;
    auto in_dims = input_data->getDims();
    auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
    uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
@ -2212,8 +2213,8 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
                    nextMemoryLayer.reserved_size = ALIGN64(memorySize);
                } else {
-                    IE_ASSERT(nextMemoryLayer.reserved_size >= ALIGN64(num_data_bytes_out));
+                    // We may need to extend memory buffer if connected input size is bigger, for example for concat connection
-                    gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
+                    gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
                }
                return;
            }
@ -2498,8 +2499,8 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
            memoryLayer.reserved_size = ALIGN64(memorySize);
        } else {
-            IE_ASSERT(memoryLayer.reserved_size >= ALIGN64(num_data_bytes_in));
+            // We may need to extend memory buffer if connected input size is bigger, for example for concat connection
-            gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
+            gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
        }
        return prevLayer;
--- a/inference-engine/src/gna_plugin/gna_groups.hpp
+++ b/inference-engine/src/gna_plugin/gna_groups.hpp
@ -15,7 +15,9 @@ namespace GNAPluginNS {
 * @param input a pointer to data to be reshaped
 * @param maxZeroDimSize the maximum size of zero dimension
 */
-inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t maxZeroDimSize) {
+inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t minZeroDimSize,
    size_t maxZeroDimSize) {
    IE_ASSERT(minZeroDimSize > 0);
    auto dims = input->getDims();
    uint32_t numRowsIn = InferenceEngine::details::product(begin(dims), end(dims));
    uint32_t numColumnsIn = 1;
@ -23,7 +25,7 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input
    if (numRowsIn % 8 == 0) {
        if (dims.size() >= 2 || dims[0] >= maxZeroDimSize) {
            size_t indexDivide = maxZeroDimSize;
-            while (indexDivide > 1) {
+            while (indexDivide > minZeroDimSize) {
                if ((numRowsIn / 8) % indexDivide == 0) break;
                --indexDivide;
            }
@ -55,4 +57,5 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
    // Don't reshape diagonallayers with bias connection
    return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
 }
 } // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@ -54,6 +54,7 @@
 #include <transformations/common_optimizations/pull_transpose_through_fq.hpp>
 #include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
 #include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
 #include <transformations/common_optimizations/transpose_sinking.hpp>
 #include <transformations/utils/utils.hpp>
 #include "transformations/remove_extra_reshapes.hpp"
@ -703,9 +704,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
        manager.register_pass<SplitConvolutionWithBias>();
        manager.register_pass<SplitConvolution>();
        manager.register_pass<HandleTransposesAroundMatMul>();
        manager.register_pass<SwapInputMatMul>();
        manager.register_pass<SwapInputMatMulWithBias>();
        manager.register_pass<SwapInputMatMulWithFq>();
        manager.register_pass<SwapInputMatMulWithBias>();
        manager.register_pass<SwapInputMatMul>();
        manager.register_pass<InsertTransposeAfterConvOrPool>();
        manager.register_pass<ReorderActivationAndPooling>();
        manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
@ -727,6 +728,8 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
        pass_config->disable<ngraph::pass::ReluFakeQuantizeFusion>();
        // Consider to enable after per-channel quantization on FakeQuantize layer is supported in GNAPlugin, see issue 52034
        pass_config->disable<ngraph::pass::AddFakeQuantizeFusion>();
        // TransposeReduction can be enabled when Transpose-Conv-Transpose patterns will be handled in ngraph transformations
        pass_config->disable<ngraph::pass::TransposeReduction>();
        manager.run_passes(graph);
        convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, clonedNetwork);
    }
@ -1576,6 +1579,18 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
            transpose_inputs_info,
            transpose_outputs_info);
    // If scale factors are defined in configuration we still need to use them instead of imported values,
    // for example to change the scale factors for the old models.
    if (!config.inputScaleFactors.empty()) {
        IE_ASSERT(config.inputScaleFactors.size() == inputsDesc->inputScaleFactors.size());
        for (size_t i = 0; i < config.inputScaleFactors.size(); ++i) {
            if (config.inputScaleFactors[i] != GNAPluginNS::kScaleFactorDefault) {
                gnalog() << "[Import Network] Using input scale factor defined in configuration for input " << i << std::endl;
                inputsDesc->inputScaleFactors[i] = config.inputScaleFactors[i];
            }
        }
    }
 #if GNA_LIB_VER == 2
    auto getOrientation = [](Gna2Operation & gnaOperation) {
        return gnaOperation.Type == Gna2OperationTypeConvolution ?
--- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
@ -95,7 +95,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
            }
            // missing scale factors are set to be 1.0f
            if (inputScaleFactors.size() <= input_index) {
-                inputScaleFactors.resize(input_index + 1, 1.f);
+                inputScaleFactors.resize(input_index + 1, GNAPluginNS::kScaleFactorDefault);
            }
            inputScaleFactors[input_index] = InferenceEngine::CNNLayer::ie_parse_float(value);
        } else if (key == GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE)) {
--- a/inference-engine/src/gna_plugin/gna_plugin_config.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.hpp
@ -18,6 +18,8 @@
 namespace GNAPluginNS {
 static const float kScaleFactorDefault = 1.f;
 struct Config {
    Config() {
        AdjustKeyMapValues();
--- a/inference-engine/src/gna_plugin/layers/gna_split_layer.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_split_layer.hpp
@ -45,4 +45,18 @@ public:
    };
    std::vector<SplitConnectedLayerInfo> splitOutputLayers;
 };
 // @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
 static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = 64) {
    std::vector<uint32_t> splitSizes;
    uint32_t maxAlignedSplitSize = maxSplitSize - maxSplitSize % alignment;
    uint32_t usedSize = 0;
    while (usedSize < totalSize) {
        uint32_t partSize = std::min(totalSize - usedSize, maxAlignedSplitSize);
        splitSizes.push_back(partSize);
        usedSize += partSize;
    }
    return splitSizes;
 }
 }  // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -87,7 +87,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
    });
    IE_ASSERT(inputLayer != nullptr);
    size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
-        Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1];
+        Get2DReshapedData(nextLayer->outData[0], GNALimitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1];
    std::vector<float> weightsValues(weightsSize, fillValue);
    IE_ASSERT(diagLayer != nullptr);
    diagLayer->_weights = make_shared_blob<float>(
@ -1113,6 +1113,9 @@ void InsertConcatAligningFilterPass::run() {
                                            SizeVector({filterWeights.size()}),
                                            Layout::C));
                concatAligningFilter->_weights->allocate();
                if (!concatAligningFilter->_weights->buffer().as<float*>()) {
                    THROW_GNA_EXCEPTION << "Failed to allocate weights of size " << filterWeights.size() << " for " << filterName;
                }
                CopyVectorToBlob(concatAligningFilter->_weights, filterWeights);
@ -1395,15 +1398,20 @@ void EltwiseSplitOverChannelsPass::run() {
            THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1";
        }
        auto oData = l->outData.front();
-        auto out_width = GetDataDimSize(oData, DataDimName::W);
+        auto oDims = oData->getDims();
-        auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end());
+        auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
-         // gna limit this to be OxFFFF
+        if (totalElementsSize <= GNALimitations::bufferMaxSize) {
        auto maxAffineElements = 65536 - 64;
        if (totalElementsForOutput <= maxAffineElements) {
            continue;
        }
-        auto totalSplits = 1 + totalElementsForOutput / maxAffineElements;
+        auto firstValuableDim = std::find_if(std::begin(oDims), std::end(oDims), [](size_t val) { return val > 1; });
        IE_ASSERT(firstValuableDim != std::end(oDims));
        auto splittedElementsSize = *firstValuableDim;
        auto splittedDimIx = std::distance(std::begin(oDims), firstValuableDim);
        // Split output size should be multiple by 64 to avoid align filters insertion
        auto splitSizes = GetAlignedSplitSizes(splittedElementsSize,
            GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize);
        pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
        auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
@ -1421,27 +1429,13 @@ void EltwiseSplitOverChannelsPass::run() {
            auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();
            // create split layer outputs
-            size_t usedElements = 0;
+            for (auto elementsNum : splitSizes) {
-            for (size_t i = 0; i < totalSplits; i++) {
+                auto newDims = oDims;
-                SizeVector newDims;
+                newDims[splittedDimIx] = elementsNum;
                size_t elements_num = std::min(totalElementsForOutput - usedElements,
                        static_cast<size_t>(maxAffineElements));
                if (inputDesc.getDims().size() == 2) {
                    newDims = SizeVector{1, elements_num};
                } else {
                    elements_num = elements_num - elements_num % out_width;
                    newDims = SizeVector{1, elements_num / out_width, out_width};
                }
                auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
                auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
                getCreatorLayer(data) = split;
                split->outData.push_back(data);
                usedElements += elements_num;
                if (usedElements == totalElementsForOutput) {
                    break;
                }
            }
            // replacing connection X->eltwise to X->split
            auto oData = CNNLayerFindOutData(l, kThEltwiseInput);
@ -1461,7 +1455,7 @@ void EltwiseSplitOverChannelsPass::run() {
        concat->outData.push_back(masterEltwise->outData.front());
        getCreatorLayer(masterEltwise->outData.front()) = concat;
-        for (size_t k = 0; k != totalSplits; k++) {
+        for (size_t k = 0; k != splitSizes.size(); k++) {
            auto eltwiseRaw = std::make_shared<EltwiseLayer>(
                    LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
            IE_ASSERT(eltwiseRaw != nullptr);
@ -1521,7 +1515,9 @@ void SubstituteScaleShiftBroadCastPass::run() {
        if (was_reshaped) {
            dataDims = reshaped_data[insData->getName()];
        } else {
-            dataDims = HasTo2DReshapeData(l) ? Get2DReshapedData(insData, 8)->getDims() : insData->getDims();
+            dataDims = HasTo2DReshapeData(l) ?
                Get2DReshapedData(insData, GNALimitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() :
                insData->getDims();
        }
        if (dataDims.size() <= 2) {
--- a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp
+++ b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp
@ -12,6 +12,7 @@
 #include <ngraph/pattern/op/wrap_type.hpp>
 #include <ngraph/rt_info.hpp>
 #include "backend/gna_limitations.hpp"
 #include "layers/gna_split_layer.hpp"
 using namespace GNAPluginNS;
@ -19,22 +20,6 @@ NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0);
 NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0);
 NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0);
 static std::vector<int64_t> GetConvSplitSizes(std::shared_ptr<ngraph::Node> conv) {
    uint32_t width = conv->get_input_shape(0).back();
    uint32_t in_channels = conv->get_input_shape(0).at(1);
    uint32_t usedWidth = 0;
    std::vector<int64_t> split_sizes;
    uint32_t width_max_size = GNALimitations::bufferMaxSize / in_channels;
    width_max_size = width_max_size - width_max_size % 64;
    while (usedWidth < width) {
        uint32_t width_part = std::min(width - usedWidth, width_max_size);
        split_sizes.push_back(width_part);
        usedWidth += width_part;
    }
    IE_ASSERT(usedWidth == width);
    return split_sizes;
 }
 static bool Convert(std::shared_ptr<ngraph::Node> conv,
                    std::shared_ptr<ngraph::Node> add,
                    std::shared_ptr<ngraph::Node> bias,
@ -45,15 +30,21 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
        return false;
    }
-    auto split_sizes = GetConvSplitSizes(conv);
+    uint32_t width = conv->get_input_shape(0).back();
    uint32_t in_channels = conv->get_input_shape(0).at(1);
    auto split_sizes = GetAlignedSplitSizes(width, GNALimitations::bufferMaxSize / in_channels);
    IE_ASSERT(split_sizes.size() > 1);
    std::vector<int64_t> split_sizes_casted(split_sizes.size());
    std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {
        return static_cast<int64_t>(size);
    });
    /* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1,
        otherwise this split axis isn't supported */
    const int64_t width_axis = conv->get_input_shape(0).size() - 1;
    auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0),
        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}),
-        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes.size()}), split_sizes));
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_casted.size()}), split_sizes_casted));
    ngraph::copy_runtime_info(conv, split_node);
    split_node->set_friendly_name(conv->get_friendly_name() + "/split");
    ngraph::OutputVector convOutputs;
--- a/inference-engine/src/inference_engine/include/ie/vpu/myriad_plugin_config.hpp
+++ b/inference-engine/src/inference_engine/include/ie/vpu/myriad_plugin_config.hpp
@ -41,23 +41,6 @@ namespace VPUConfigParams {
 INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::MYRIAD_ENABLE_FORCE_RESET instead")
 DECLARE_VPU_MYRIAD_CONFIG_KEY(FORCE_RESET);
 /**
 * @deprecated
 * @brief This option allows to specify device.
 * If specified device is not available then creating infer request will throw an exception.
 */
 INFERENCE_ENGINE_DEPRECATED("")
 DECLARE_VPU_MYRIAD_CONFIG_KEY(PLATFORM);
 /**
 * @deprecated
 * @brief Supported keys definition for VPU_MYRIAD_CONFIG_KEY(PLATFORM) option.
 */
 INFERENCE_ENGINE_DEPRECATED("")
 DECLARE_VPU_MYRIAD_CONFIG_VALUE(2450);
 INFERENCE_ENGINE_DEPRECATED("")
 DECLARE_VPU_MYRIAD_CONFIG_VALUE(2480);
 /**
 * @deprecated Use InferenceEngine::MYRIAD_DDR_TYPE instead
 * @brief This option allows to specify device memory type.
--- a/inference-engine/src/inference_engine/include/openvino/runtime/core.hpp
+++ b/inference-engine/src/inference_engine/include/openvino/runtime/core.hpp
@ -19,10 +19,6 @@
 #include "ie_plugin_config.hpp"
 #include "ie_version.hpp"
 namespace ngraph {
 class Function;
 }  // namespace ngraph
 namespace InferenceEngine {
 class IExtension;
 class Blob;
@ -30,6 +26,9 @@ class RemoteContext;
 }  // namespace InferenceEngine
 namespace ov {
 class Function;
 namespace runtime {
 /**
@ -72,7 +71,7 @@ public:
     *  * binPath parameter is not used.
     * @return Function
     */
-    std::shared_ptr<ngraph::Function> read_model(const std::wstring& modelPath, const std::wstring& binPath = {}) const;
+    std::shared_ptr<ov::Function> read_model(const std::wstring& modelPath, const std::wstring& binPath = {}) const;
 #endif
    /**
@ -86,7 +85,7 @@ public:
     *  * binPath parameter is not used.
     * @return Function
     */
-    std::shared_ptr<ngraph::Function> read_model(const std::string& modelPath, const std::string& binPath = {}) const;
+    std::shared_ptr<ov::Function> read_model(const std::string& modelPath, const std::string& binPath = {}) const;
    /**
     * @brief Reads models from IR and ONNX formats
     * @param model string with model in IR or ONNX format
@ -101,7 +100,7 @@ public:
     * constant data becomes to point to invalid memory.
     * @return Function
     */
-    std::shared_ptr<ngraph::Function> read_model(const std::string& model,
+    std::shared_ptr<ov::Function> read_model(const std::string& model,
                                             const std::shared_ptr<const InferenceEngine::Blob>& weights) const;
    /**
@ -116,7 +115,7 @@ public:
     * operation
     * @return An executable network reference
     */
-    InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ngraph::Function>& network,
+    InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ov::Function>& network,
                                                     const std::string& deviceName,
                                                     const std::map<std::string, std::string>& config = {});
@ -145,7 +144,7 @@ public:
     * operation
     * @return An executable network object
     */
-    InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ngraph::Function>& network,
+    InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ov::Function>& network,
                                                     const std::shared_ptr<InferenceEngine::RemoteContext>& context,
                                                     const std::map<std::string, std::string>& config = {});
@ -189,7 +188,7 @@ public:
     * @param config Optional map of pairs: (config parameter name, config parameter value)
     * @return An object containing a map of pairs a layer name -> a device name supporting this layer.
     */
-    InferenceEngine::QueryNetworkResult query_model(const std::shared_ptr<const ngraph::Function>& network,
+    InferenceEngine::QueryNetworkResult query_model(const std::shared_ptr<const ov::Function>& network,
                                                    const std::string& deviceName,
                                                    const std::map<std::string, std::string>& config = {}) const;
--- a/inference-engine/src/inference_engine/src/ie_core.cpp
+++ b/inference-engine/src/inference_engine/src/ie_core.cpp
@ -62,18 +62,17 @@ Parsed<T> parseDeviceNameIntoConfig(const std::string& deviceName, const std::ma
    } else if (deviceName_.find("MULTI:") == 0) {
        deviceName_ = "MULTI";
        config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(6);
-    } else if (deviceName_.find("AUTO") == 0) {
+    } else if (deviceName.find("AUTO") == 0) {
-        deviceName_ = "AUTO";
+        deviceName_ = "MULTI";
-        if (deviceName.size() > std::string("AUTO").size()) {
+        if (deviceName.find("AUTO:") == 0) {
-            std::string deviceList = deviceName.substr(std::string("AUTO:").size());
+            config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] =
-            if (deviceList.find("AUTO") != std::string::npos) {
+                deviceName.substr(std::string("AUTO:").size());
                IE_THROW() << "Device list for AUTO should not be AUTO";
            }
            config_[InferenceEngine::KEY_AUTO_DEVICE_LIST] = deviceName.substr(std::string("AUTO:").size());
        }
        config_.insert({CONFIG_KEY_INTERNAL(WORK_MODE), ""});
    } else {
-        if (deviceName_.empty()) {
+        if (deviceName_ == "AUTO") {
-            deviceName_ = "AUTO";
+            deviceName_ = "MULTI";
            config_.insert({CONFIG_KEY_INTERNAL(WORK_MODE), ""});
        }
        InferenceEngine::DeviceIDParser parser(deviceName_);
        deviceName_ = parser.getDeviceName();
@ -579,7 +578,21 @@ public:
            }
        }
-        auto parsed = parseDeviceNameIntoConfig(deviceName);
+        // AUTO case
        {
            if (deviceName.find("AUTO:") == 0) {
                IE_THROW()
                    << "You can get specific metrics with the GetMetric only for the MULTI itself (without devices). "
                       "To get individual devices's metrics call GetMetric for each device separately";
            }
        }
        std::string pluginName = deviceName;
        if (pluginName == "AUTO") {
            pluginName = "MULTI";
        }
        auto parsed = parseDeviceNameIntoConfig(pluginName);
        // we need to return a copy of Parameter object which is created on Core side,
        // not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread
@ -629,11 +642,14 @@ public:
     * @param deviceName A name of device
     * @return Reference to a CPP plugin wrapper
     */
-    InferenceEngine::InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
+    InferenceEngine::InferencePlugin GetCPPPluginByName(const std::string& pluginName) const {
        OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "CoreImpl::GetCPPPluginByName");
        std::lock_guard<std::mutex> lock(pluginsMutex);
-
+        auto deviceName = pluginName;
        if (deviceName == "AUTO") {
            deviceName = "MULTI";
        }
        auto it = pluginRegistry.find(deviceName);
        if (it == pluginRegistry.end()) {
            IE_THROW() << "Device with \"" << deviceName << "\" name is not registered in the InferenceEngine";
@ -856,9 +872,9 @@ public:
            } else if (deviceName.find("AUTO") == 0) {
                auto pos = deviceName.find_first_of(":");
                if (pos != std::string::npos) {
-                    deviceNames = InferenceEngine::DeviceIDParser::getHeteroDevices(deviceName.substr(pos + 1));
+                    deviceNames = InferenceEngine::DeviceIDParser::getMultiDevices(deviceName.substr(pos + 1));
                }
-                deviceNames.emplace_back("AUTO");
+                deviceNames.emplace_back("MULTI");
            } else {
                deviceNames.push_back(deviceName);
            }
--- a/inference-engine/src/inference_engine/src/threading/ie_tbb_streams_executor.cpp
+++ b/inference-engine/src/inference_engine/src/threading/ie_tbb_streams_executor.cpp
@ -0,0 +1,301 @@
 // Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "threading/ie_tbb_streams_executor.hpp"
 #include <atomic>
 #include <list>
 #include <memory>
 #include <queue>
 #include <thread>
 #include <tuple>
 #include <utility>
 #include "details/ie_exception.hpp"
 #include "ie_parallel.hpp"
 #include "ie_parallel_custom_arena.hpp"
 #include "ie_system_conf.h"
 #include "threading/ie_thread_affinity.hpp"
 #if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
 #    include <tbb/concurrent_queue.h>
 #    include <tbb/enumerable_thread_specific.h>
 #    include <tbb/global_control.h>
 #    include <tbb/task_group.h>
 #    include <tbb/task_scheduler_observer.h>
 namespace InferenceEngine {
 struct TBBStreamsExecutor::Impl {
    struct Stream;
    using TaskQueue = tbb::concurrent_queue<Task>;
    using StreamQueue = tbb::concurrent_bounded_queue<Stream*>;
    using LocalStreams = tbb::enumerable_thread_specific<Stream*>;
    struct Shared : public std::enable_shared_from_this<Shared> {
        using Ptr = std::shared_ptr<Shared>;
        TaskQueue _taskQueue;
        StreamQueue _streamQueue;
    };
    struct Stream {
        struct Observer : tbb::task_scheduler_observer {
            Stream* _thisStream = nullptr;
            LocalStreams* _localStream = nullptr;
            CpuSet _mask;
            int _ncpus = 0;
            int _threadBindingStep = 0;
            int _offset = 0;
            Observer(custom::task_arena& arena,
                     Stream* thisStream,
                     LocalStreams* localStream,
                     const bool pinToCores,
                     const int streamId,
                     const int threadsPerStream,
                     const int threadBindingStep,
                     const int threadBindingOffset)
                : tbb::task_scheduler_observer{static_cast<tbb::task_arena&>(arena)},
                  _thisStream{thisStream},
                  _localStream{localStream},
                  _threadBindingStep{threadBindingStep},
                  _offset{streamId * threadsPerStream + threadBindingOffset} {
                if (pinToCores) {
                    std::tie(_mask, _ncpus) = GetProcessMask();
                }
            }
            void on_scheduler_entry(bool) override {
                _localStream->local() = _thisStream;
                if (nullptr != _mask) {
                    PinThreadToVacantCore(_offset + tbb::this_task_arena::current_thread_index(),
                                          _threadBindingStep,
                                          _ncpus,
                                          _mask);
                }
            }
            void on_scheduler_exit(bool) override {
                _localStream->local() = nullptr;
                if (nullptr != _mask) {
                    PinCurrentThreadByMask(_ncpus, _mask);
                }
            }
            ~Observer() override = default;
        };
        explicit Stream(Impl* impl, const bool externStream = false) : _impl{impl} {
            {
                std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
                if (_impl->_streamIdQueue.empty()) {
                    _streamId = _impl->_streamId++;
                } else {
                    _streamId = _impl->_streamIdQueue.front();
                    _impl->_streamIdQueue.pop();
                }
            }
            _numaNodeId = _impl->_config._streams
                              ? _impl->_usedNumaNodes.at((_streamId % _impl->_config._streams) /
                                                         ((_impl->_config._streams + _impl->_usedNumaNodes.size() - 1) /
                                                          _impl->_usedNumaNodes.size()))
                              : _impl->_usedNumaNodes.at(_streamId % _impl->_usedNumaNodes.size());
            auto concurrency =
                (0 == _impl->_config._threadsPerStream) ? tbb::task_arena::automatic : _impl->_config._threadsPerStream;
            auto masterThreads = externStream ? 1u : 0u;
            if (ThreadBindingType::HYBRID_AWARE == _impl->_config._threadBindingType) {
                if (Config::PreferredCoreType::ROUND_ROBIN != _impl->_config._threadPreferredCoreType) {
                    if (Config::PreferredCoreType::ANY == _impl->_config._threadPreferredCoreType) {
                        _arena.initialize(concurrency);
                    } else {
                        const auto selected_core_type =
                            Config::PreferredCoreType::BIG == _impl->_config._threadPreferredCoreType
                                ? custom::info::core_types().back()    // running on Big cores only
                                : custom::info::core_types().front();  // running on Little cores only
                        _arena.initialize(custom::task_arena::constraints{}
                                              .set_core_type(selected_core_type)
                                              .set_max_concurrency(concurrency));
                    }
                } else {
                    // assigning the stream to the core type in the round-robin fashion
                    // wrapping around total_streams (i.e. how many streams all different core types can handle
                    // together)
                    const auto total_streams = _impl->_totalSreamsOnCoreTypes.back().second;
                    const auto streamId_wrapped = _streamId % total_streams;
                    const auto& selected_core_type =
                        std::find_if(_impl->_totalSreamsOnCoreTypes.cbegin(),
                                     _impl->_totalSreamsOnCoreTypes.cend(),
                                     [streamId_wrapped](const decltype(_impl->_totalSreamsOnCoreTypes)::value_type& p) {
                                         return p.second > streamId_wrapped;
                                     })
                            ->first;
                    _arena.initialize(custom::task_arena::constraints{}
                                          .set_core_type(selected_core_type)
                                          .set_max_concurrency(concurrency));
                }
            } else if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
                _arena.initialize(custom::task_arena::constraints{_numaNodeId, concurrency});
            } else {
                _arena.initialize(concurrency, masterThreads);
            }
            _observer.reset(new Observer{_arena,
                                         this,
                                         &(_impl->_localStream),
                                         (ThreadBindingType::CORES == _impl->_config._threadBindingType),
                                         _streamId,
                                         _impl->_config._threadsPerStream,
                                         _impl->_config._threadBindingStep,
                                         _impl->_config._threadBindingOffset});
            _observer->observe(true);
        }
        ~Stream() {
            static_cast<tbb::task_arena&>(_arena).terminate();
            _observer->observe(false);
            {
                std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
                _impl->_streamIdQueue.push(_streamId);
            }
        }
        Impl* _impl = nullptr;
        int _streamId = 0;
        int _numaNodeId = 0;
        custom::task_arena _arena;
        std::unique_ptr<Observer> _observer;
    };
    using Streams = std::list<Stream>;
    using ExternStreams = tbb::enumerable_thread_specific<Stream>;
    explicit Impl(const Config& config)
        : _config{config},
          _shared{std::make_shared<Shared>()},
          _localStream{nullptr},
          _externStreams{this, true} {
        if (_config._streams * _config._threadsPerStream >= static_cast<int>(std::thread::hardware_concurrency())) {
            _maxTbbThreads.reset(
                new tbb::global_control{tbb::global_control::max_allowed_parallelism,
                                        static_cast<std::size_t>(_config._streams * _config._threadsPerStream + 1)});
        }
        auto numaNodes = getAvailableNUMANodes();
        if (_config._streams != 0) {
            std::copy_n(std::begin(numaNodes),
                        std::min(static_cast<std::size_t>(_config._streams), numaNodes.size()),
                        std::back_inserter(_usedNumaNodes));
        } else {
            _usedNumaNodes = numaNodes;
        }
        if (ThreadBindingType::HYBRID_AWARE == config._threadBindingType) {
            const auto core_types = custom::info::core_types();
            const int threadsPerStream =
                (0 == config._threadsPerStream) ? std::thread::hardware_concurrency() : config._threadsPerStream;
            int sum = 0;
            // reversed order, so BIG cores are first
            for (auto iter = core_types.rbegin(); iter < core_types.rend(); iter++) {
                const auto& type = *iter;
                // calculating the #streams per core type
                const int num_streams_for_core_type =
                    std::max(1,
                             custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(type)) /
                                 threadsPerStream);
                sum += num_streams_for_core_type;
                // prefix sum, so the core type for a given stream id will be deduced just as a upper_bound
                // (notice that the map keeps the elements in the descending order, so the big cores are populated
                // first)
                _totalSreamsOnCoreTypes.emplace_back(type, sum);
            }
        }
        _shared->_streamQueue.set_capacity(_config._streams);
        for (int streamId = 0; streamId < _config._streams; ++streamId) {
            _streams.emplace_back(this);
            _shared->_streamQueue.push(&(_streams.back()));
        }
    }
    ~Impl() {
        for (int streamId = 0; streamId < _config._streams; ++streamId) {
            Stream* stream = nullptr;
            _shared->_streamQueue.pop(stream);
            (void)stream;
        }
    }
    static void Schedule(Shared::Ptr& shared, Task task) {
        Stream* stream = nullptr;
        if (shared->_streamQueue.try_pop(stream)) {
            struct TryPop {
                void operator()() const {
                    try {
                        do {
                            Task task = std::move(_task);
                            task();
                        } while (_shared->_taskQueue.try_pop(_task));
                    } catch (...) {
                    }
                    if (_shared->_streamQueue.try_push(_stream)) {
                        if (_shared->_taskQueue.try_pop(_task)) {
                            Schedule(_shared, std::move(_task));
                        }
                    }
                }
                Stream* _stream;
                mutable Shared::Ptr _shared;
                mutable Task _task;
            };
            stream->_arena.enqueue(TryPop{stream, shared->shared_from_this(), std::move(task)});
        } else {
            shared->_taskQueue.push(std::move(task));
        }
    }
    Config _config;
    std::unique_ptr<tbb::global_control> _maxTbbThreads;
    std::mutex _streamIdMutex;
    int _streamId = 0;
    std::queue<int> _streamIdQueue;
    std::vector<int> _usedNumaNodes;
    Shared::Ptr _shared;
    LocalStreams _localStream;
    ExternStreams _externStreams;
    Streams _streams;
    using StreamIdToCoreTypes = std::vector<std::pair<custom::core_type_id, int>>;
    StreamIdToCoreTypes _totalSreamsOnCoreTypes;
 };
 TBBStreamsExecutor::TBBStreamsExecutor(const Config& config) : _impl{new TBBStreamsExecutor::Impl{config}} {}
 TBBStreamsExecutor::~TBBStreamsExecutor() {
    _impl.reset();
 }
 int TBBStreamsExecutor::GetStreamId() {
    auto stream = _impl->_localStream.local();
    if (nullptr == stream) {
        stream = &(_impl->_externStreams.local());
    }
    return stream->_streamId;
 }
 int TBBStreamsExecutor::GetNumaNodeId() {
    auto stream = _impl->_localStream.local();
    if (nullptr == stream) {
        stream = &(_impl->_externStreams.local());
    }
    return stream->_numaNodeId;
 }
 void TBBStreamsExecutor::run(Task task) {
    if (_impl->_config._streams == 0) {
        Execute(std::move(task));
    } else {
        Impl::Schedule(_impl->_shared, std::move(task));
    }
 }
 void TBBStreamsExecutor::Execute(Task task) {
    auto stream = _impl->_localStream.local();
    if (nullptr == stream) {
        _impl->_externStreams.local()._arena.execute(std::move(task));
    } else {
        stream->_arena.execute(std::move(task));
    }
 }
 }  // namespace InferenceEngine
 #endif  //  ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
--- a/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/fc_bias_fusion.cpp
+++ b/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/fc_bias_fusion.cpp
@ -43,7 +43,9 @@ ngraph::pass::FullyConnectedBiasFusion::FullyConnectedBiasFusion() {
        Shape bias_shape(bias->get_shape());
        Shape output_shape(fc->get_shape());
        size_t bias_size = std::accumulate(bias_shape.begin(), bias_shape.end(), size_t{1}, std::multiplies<int64_t>());
-        if (bias_shape.empty() || bias_shape.back() != output_shape.back() || bias_shape.back() != bias_size) {
+        if (bias_shape.empty() ||
            (bias_shape.back() != output_shape.back() && bias_shape.back() != 1) ||
            bias_shape.back() != bias_size) {
            return false;
        }
--- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
@ -131,7 +131,7 @@ public:
        const float dequantizationMul,
        const float dequantizationSub,
        const ngraph::element::Type originalPrecision,
-        const ngraph::PartialShape dataNodeOutputShape,
+        const ngraph::PartialShape& dataNodeOutputShape,
        element::Type precision,
        const element::Type deqPrecision = element::f32,
        std::shared_ptr<ngraph::Node> input = nullptr);
--- a/inference-engine/src/low_precision_transformations/include/low_precision/pad.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/pad.hpp
@ -0,0 +1,26 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <memory>
 #include <ngraph/ngraph.hpp>
 #include "layer_transformation.hpp"
 namespace ngraph {
 namespace pass {
 namespace low_precision {
 class LP_TRANSFORMATIONS_API PadTransformation : public LayerTransformation {
 public:
    NGRAPH_RTTI_DECLARATION;
    PadTransformation(const Params& params = Params());
    bool transform(TransformationContext& context, pattern::Matcher& m) override;
    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
    bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
 };
 } // namespace low_precision
 } // namespace pass
 } // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp
@ -17,11 +17,13 @@ class LP_TRANSFORMATIONS_API AvgPoolPrecisionPreservedAttribute : public Precisi
 };
 using AvgPoolPrecisionPreservedAttributePtr = std::shared_ptr<AvgPoolPrecisionPreservedAttribute>;
 } // namespace ngraph
-extern template class LP_TRANSFORMATIONS_API VariantImpl<AvgPoolPrecisionPreservedAttributePtr>;
+namespace ov {
 extern template class LP_TRANSFORMATIONS_API VariantImpl<ngraph::AvgPoolPrecisionPreservedAttributePtr>;
 template<>
-class LP_TRANSFORMATIONS_API VariantWrapper<AvgPoolPrecisionPreservedAttributePtr> : public VariantImpl<AvgPoolPrecisionPreservedAttributePtr> {
+class LP_TRANSFORMATIONS_API VariantWrapper<ngraph::AvgPoolPrecisionPreservedAttributePtr> : public VariantImpl<ngraph::AvgPoolPrecisionPreservedAttributePtr> {
 public:
    static constexpr VariantTypeInfo type_info{ "LowPrecision::AvgPoolPrecisionPreserved", 0 };
@ -31,9 +33,9 @@ public:
    VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
-    AvgPoolPrecisionPreservedAttributePtr get() { return this->m_value; }
+    ngraph::AvgPoolPrecisionPreservedAttributePtr get() { return this->m_value; }
-    void merge(std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AvgPoolPrecisionPreservedAttribute>>>>& attributes);
+    void merge(std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<ngraph::AvgPoolPrecisionPreservedAttribute>>>>& attributes);
    std::string to_string() override;
 };
-} // namespace ngraph
+} // namespace ov
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp
@ -62,12 +62,15 @@ public:
 };
 using IntervalsAlignmentAttributePtr = std::shared_ptr<IntervalsAlignmentAttribute>;
 } // namespace ngraph
-extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<IntervalsAlignmentAttributePtr>;
+namespace ov {
 extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::IntervalsAlignmentAttributePtr>;
 template<>
-class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>> :
+class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>> :
-    public VariantImpl<std::shared_ptr<IntervalsAlignmentAttribute>> {
+    public VariantImpl<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>> {
 public:
    static constexpr VariantTypeInfo type_info{ "LowPrecision::IntervalsAlignment", 0 };
@ -77,12 +80,13 @@ public:
    VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
-    std::shared_ptr<IntervalsAlignmentAttribute> get() const { return this->m_value; }
+    std::shared_ptr<ngraph::IntervalsAlignmentAttribute> get() const { return this->m_value; }
-    static std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>> create(
+    static std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>>> create(
        const std::shared_ptr<ngraph::Node>& node,
        const AttributeParameters& params);
-    void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>>>& attributes);
+    void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::IntervalsAlignmentAttribute>>>>& attributes);
    std::string to_string() override;
 };
-} // namespace ngraph
+
 }  // namespace ov
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/per_tensor_quantization_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/per_tensor_quantization_attribute.hpp
@ -16,11 +16,14 @@
 namespace ngraph {
 class LP_TRANSFORMATIONS_API PerTensorQuantizationAttribute {
 };
 } // namespace ngraph
-extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<PerTensorQuantizationAttribute>;
+namespace ov {
 extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::PerTensorQuantizationAttribute>;
 template<>
-class LP_TRANSFORMATIONS_API VariantWrapper<PerTensorQuantizationAttribute> : public VariantImpl<PerTensorQuantizationAttribute> {
+class LP_TRANSFORMATIONS_API VariantWrapper<ngraph::PerTensorQuantizationAttribute> : public VariantImpl<ngraph::PerTensorQuantizationAttribute> {
 public:
    static constexpr VariantTypeInfo type_info { "LowPrecision::PerTensorQuantization", 0 };
@ -30,4 +33,5 @@ public:
        return type_info;
    }
 };
-} // namespace ngraph
+
 } // namespace ov
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp
@ -31,10 +31,14 @@ public:
 using PrecisionPreservedAttributePtr = std::shared_ptr<PrecisionPreservedAttribute>;
-extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<PrecisionPreservedAttributePtr>;
+} // namespace ngraph
 namespace ov {
 extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::PrecisionPreservedAttributePtr>;
 template<>
-class LP_TRANSFORMATIONS_API VariantWrapper<PrecisionPreservedAttributePtr> : public VariantImpl<PrecisionPreservedAttributePtr> {
+class LP_TRANSFORMATIONS_API VariantWrapper<ngraph::PrecisionPreservedAttributePtr> : public VariantImpl<ngraph::PrecisionPreservedAttributePtr> {
 public:
    static constexpr VariantTypeInfo type_info{ "LowPrecision::PrecisionPreserved", 0 };
@ -44,8 +48,9 @@ public:
    VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
-    PrecisionPreservedAttributePtr get() { return this->m_value; }
+    ngraph::PrecisionPreservedAttributePtr get() { return this->m_value; }
    std::string to_string() override;
 };
-} // namespace ngraph
+
 }  // namespace ov
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp
@ -34,11 +34,14 @@ public:
    static const std::vector<ngraph::element::Type> defaultPrecisions;
    PrecisionsAttribute(const std::vector<ngraph::element::Type>& precisions = defaultPrecisions);
 };
 } // namespace ngraph
-extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<std::shared_ptr<PrecisionsAttribute>>;
+namespace ov {
 extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<std::shared_ptr<ngraph::PrecisionsAttribute>>;
 template<>
-class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<PrecisionsAttribute>> : public VariantImpl<std::shared_ptr<PrecisionsAttribute>> {
+class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<ngraph::PrecisionsAttribute>> : public VariantImpl<std::shared_ptr<ngraph::PrecisionsAttribute>> {
 public:
    static constexpr VariantTypeInfo type_info{ "LowPrecision::Precisions", 0 };
@ -50,15 +53,16 @@ public:
    std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
-    std::shared_ptr<PrecisionsAttribute> get() { return this->m_value; }
+    std::shared_ptr<ngraph::PrecisionsAttribute> get() { return this->m_value; }
    // create attribute instance for node
-    static std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>> create(
+    static std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::PrecisionsAttribute>>> create(
        const std::shared_ptr<ngraph::Node>& node,
        const AttributeParameters& params);
    // merge attribute instances which can be got from different sources: node, input port or output port
-    void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>>& attributes);
+    void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::PrecisionsAttribute>>>>& attributes);
    // vizualize shared attributes details in VizualizeTree pass
    std::string to_string() override;
 };
-} // namespace ngraph
+
 }  // namespace ov
--- a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp
@ -32,12 +32,15 @@ public:
 };
 using QuantizationAlignmentAttributePtr = std::shared_ptr<QuantizationAlignmentAttribute>;
 } // namespace ngraph
-extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<QuantizationAlignmentAttributePtr>;
+namespace ov {
 extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<ngraph::QuantizationAlignmentAttributePtr>;
 template<>
-class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>> :
+class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>> :
-    public VariantImpl<std::shared_ptr<QuantizationAlignmentAttribute>> {
+    public VariantImpl<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>> {
 public:
    static constexpr VariantTypeInfo type_info{ "LowPrecision::QuantizationAlignment", 0 };
@ -49,12 +52,12 @@ public:
    std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
-    std::shared_ptr<QuantizationAlignmentAttribute> get() { return this->m_value; }
+    std::shared_ptr<ngraph::QuantizationAlignmentAttribute> get() { return this->m_value; }
-    static std::shared_ptr<VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>>> create(
+    static std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>>> create(
        const std::shared_ptr<ngraph::Node>& node,
        const AttributeParameters& params);
-    void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>>>>& attributes);
+    void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<ngraph::QuantizationAlignmentAttribute>>>>& attributes);
    std::string to_string() override;
 };
-} // namespace ngraph
+} // namespace ov
--- a/inference-engine/src/low_precision_transformations/src/convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp
@ -55,8 +55,8 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
    auto convolution = m.get_match_root();
    if (!canConvolutionBeTransformed(context, convolution)) {
-        auto weightInput = convolution->get_input_node_shared_ptr(1);
+        const auto weightInput = convolution->get_input_node_shared_ptr(1);
-        std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
+        const auto reshapeFromWeights = as_type_ptr<opset1::Reshape>(weightInput);
        FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
                                                    NetworkHelper::getDequantization(convolution, 1ul) :
                                                    NetworkHelper::getDequantization(reshapeFromWeights);
@ -69,7 +69,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
                        reshapeFromWeights->input_value(1),
                        false);
            }
-            if (as_type_ptr<opset1::Constant>(resultConstant)) {
+            if (is_type<opset1::Constant>(resultConstant)) {
                replace_node(weightInput, resultConstant);
            }
        } else {
@ -84,10 +84,9 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
    {
        std::shared_ptr<opset1::Subtract> subtract;
        if (dequantization.subtract != nullptr) {
-            std::shared_ptr<ngraph::Node> layer = dequantization.subtract;
+            NetworkHelper::cleanRunTimeInfo(dequantization.subtract->shared_from_this());
            ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
            auto optimizedSubtract = NetworkHelper::optimizeSubtract(dequantization.subtract);
            if (optimizedSubtract == nullptr) {
                optimizedSubtract = dequantization.subtract;
            }
@ -99,7 +98,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
            size_t length = subtract->get_output_partial_shape(0).rank().get_length();
            // Insert explicit broadcast for channel dimension [1] and immediately fold it
-            Shape broadcastShape(subtract->get_output_partial_shape(0).rank().get_length(), 1);
+            Shape broadcastShape(length, 1);
            broadcastShape[1] = subtract->get_output_partial_shape(0)[1].get_length();
            std::shared_ptr<Node> newShift = fold<opset1::Broadcast>(
@ -122,11 +121,9 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
        const size_t groupsCount = NetworkHelper::getGroupsCount(convolution);
        std::shared_ptr<Node> newMultiplyAfterConst;
        if (groupsCount > 1ul) {
-            std::shared_ptr<opset1::Constant> multiplyConst = as_type_ptr<opset1::Constant>(dequantization.multiply->get_input_node_shared_ptr(1));
+            const std::vector<float> scales = dequantization.multiplyConstant->cast_vector<float>();
            const std::vector<float> scales = multiplyConst->cast_vector<float>();
            if (scales.size() == 1ul) {
-                newMultiplyAfterConst = dequantization.multiply->input_value(1).get_node_shared_ptr()->clone_with_new_inputs({});
+                newMultiplyAfterConst = dequantization.multiplyConstant->clone_with_new_inputs({});
            } else {
                const ngraph::PartialShape inputPShape = convolution->get_input_partial_shape(0);
                const size_t inputChannelsInGroup = inputPShape[1].get_length() / groupsCount;
@ -150,17 +147,15 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
                }
                newMultiplyAfterConst = std::make_shared<opset1::Constant>(
-                    dequantization.multiply->get_input_element_type(1),
+                    dequantization.multiplyConstant->get_element_type(),
                    newMulShape,
                    outputScales);
            }
        } else {
            std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(
                dequantization.multiply->input_value(1).get_node_shared_ptr());
            newMultiplyAfterConst = std::make_shared<opset1::Constant>(
-                reducedConstant->get_output_element_type(0),
+                dequantization.multiplyConstant->get_element_type(),
                Shape{ 1 },
-                reducedConstant->cast_vector<float>()[0]);
+                dequantization.multiplyConstant->cast_vector<float>()[0]);
        }
        const auto copyNode = convolution->clone_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) });
@ -190,7 +185,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
        if (is_type<opset1::Convert>(convolution->get_input_node_ptr(0))) {
            auto newConvolution = convolution->clone_with_new_inputs({
-                convolution->get_input_node_ptr(0)->get_input_source_output(0),
+                convolution->get_input_node_ptr(0)->input_value(0),
                convolution->input_value(1)});
            replace_node(convolution, newConvolution);
            NetworkHelper::copyInfo(convolution, newConvolution);
@ -206,7 +201,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
            return false;
        }
-        std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(convolution->input_value(1).get_node_shared_ptr());
+        std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(convolution->get_input_node_shared_ptr(1));
        dequantization = reshapeFromWeights == nullptr ?
            NetworkHelper::getDequantization(convolution, 1ul) :
@ -221,12 +216,15 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
        std::shared_ptr<opset1::Multiply> multiplyFromWeights = as_type_ptr<opset1::Multiply>(
            reshapeFromWeights == nullptr ?
-            convolution->input_value(1).get_node_shared_ptr() :
+            convolution->get_input_node_shared_ptr(1) :
            convolution->get_input_node_ptr(1)->get_input_node_shared_ptr(0));
        std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
        {
-            Shape newScaleShape = multiplyFromWeights->get_input_shape(1);
+            const auto newScalePShape = multiplyFromWeights->get_input_partial_shape(1);
            assert(newScalePShape.is_static());
            Shape newScaleShape = newScalePShape.to_shape();
            if (!newScaleShape.empty()) {
                // that's all we need: [C, 1, 1, 1] => [C, 1, 1]
                newScaleShape.pop_back();
@ -268,9 +266,12 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
            } else {
                subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
-                const Shape weightsShape = subtractFromWeights->input(0).get_shape();
+                const auto weightsPShape = subtractFromWeights->get_input_partial_shape(0);
-                Shape zeroPointShape(weightsShape.size(), 1ul);
+                assert(weightsPShape.is_static());
-                zeroPointShape[0] = weightsShape[0];
+
                const size_t weightsRankValue = weightsPShape.rank().get_length();
                Shape zeroPointShape(weightsRankValue, 1ul);
                zeroPointShape[0] = static_cast<size_t>(weightsPShape[0].get_length());
                auto zeroPointConstant = fold<opset1::Broadcast>(
                    subtractFromWeights->input_value(1),
@ -288,7 +289,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
            std::shared_ptr<Node> childNode = reshapeFromWeights == nullptr ? convolution : reshapeFromWeights;
            auto newConvolution = convolution->clone_with_new_inputs({
-                convolution->get_input_source_output(0),
+                convolution->input_value(0),
                childNode.get() == convolution.get() ?
                    convolution->get_input_node_ptr(1)->input_value(0) :
                    childNode->copy_with_new_inputs({convertFromWeights->input_value(0), childNode->input_value(1)})});
@ -311,7 +312,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
    std::shared_ptr<ngraph::opset1::Multiply> finalDequantization = NetworkHelper::optimizeMultipliesAfter(
        convolution->output(0).get_target_inputs().begin()->get_node()->shared_from_this());
-    ngraph::copy_runtime_info({ convolution, finalDequantization }, finalDequantization);
+    copy_runtime_info({ convolution, finalDequantization }, finalDequantization);
    updateOutput(context, finalDequantization, convolution);
    // [C, 1, 1] -> [1, C, 1, 1]
--- a/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp
@ -87,7 +87,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
                        reshapeFromWeights->input_value(1),
                        false);
            }
-            if (as_type_ptr<opset1::Constant>(resultConstant)) {
+            if (is_type<opset1::Constant>(resultConstant)) {
                replace_node(weightsInput, resultConstant);
            }
        } else {
@ -100,16 +100,14 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
    FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
    {
        if (dequantization.subtract != nullptr) {
-            std::shared_ptr<ngraph::Node> layer = dequantization.subtract;
+            NetworkHelper::cleanRunTimeInfo(dequantization.subtract->shared_from_this());
            ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(layer);
            NetworkHelper::optimizeSubtract(dequantization.subtract);
        }
-        std::shared_ptr<opset1::Constant> reducedConstant = as_type_ptr<opset1::Constant>(dequantization.multiplyConstant);
+
        std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>(
-                reducedConstant->get_output_element_type(0),
+            dequantization.multiplyConstant->get_element_type(),
            Shape{ 1 },
-                reducedConstant->cast_vector<float>()[0]);
+            dequantization.multiplyConstant->cast_vector<float>()[0]);
        auto inputs = convolutionBackpropData->input_values();
        inputs[0] = dequantization.multiply->input_value(0);
        const auto copyNode = convolutionBackpropData->copy_with_new_inputs(inputs);
@ -126,7 +124,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
            ngraph::op::TemporaryReplaceOutputType(newMultiplyAfterConst, deqPrecision).get());
        replace_node(convolutionBackpropData, newMultiplyAfter);
-        convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
+        convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
        inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
        if (is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
            auto newConvolution = convolutionBackpropData->copy_with_new_inputs(inputs);
@ -137,7 +135,6 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
    {
        decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
        dequantization = NetworkHelper::getDequantization(convolutionBackpropData, 1ul);
        if (is_type<opset1::FakeQuantize>(dequantization.data.get_node())) {
@ -152,7 +149,10 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
        std::shared_ptr<opset1::Subtract> subtractFromWeights = as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
        {
-            Shape newScaleShape = multiplyFromWeights->get_input_shape(1);
+            const auto newScalePShape = multiplyFromWeights->get_input_partial_shape(1);
            assert(newScalePShape.is_static());
            Shape newScaleShape = newScalePShape.to_shape();
            auto inputs = convolutionBackpropData->input_values();
            inputs[1] = multiplyFromWeights->input_value(0);
            auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
@ -164,7 +164,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
                        false),
                    convolutionBackpropData->get_output_element_type(0)));
            replace_node(convolutionBackpropData, newMultiplyAfter);
-            convolutionBackpropData = newMultiplyAfter->input_value(0).get_node_shared_ptr();
+            convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
        }
        if (subtractFromWeights != nullptr) {
@ -175,9 +175,12 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
            } else {
                subtractFromWeights = as_type_ptr<opset1::Subtract>(optimizedSubtract);
-                const Shape weightsShape = subtractFromWeights->input(0).get_shape();
+                const auto weightsPShape = subtractFromWeights->get_input_partial_shape(0);
-                Shape zeroPointShape(weightsShape.size(), 1ul);
+                assert(weightsPShape.is_static());
-                zeroPointShape[1] = weightsShape[1];
+
                const size_t weightsRankValue = weightsPShape.rank().get_length();
                Shape zeroPointShape(weightsRankValue, 1ul);
                zeroPointShape[1] = static_cast<size_t>(weightsPShape[1].get_length());
                auto zeroPointConstant = fold<opset1::Broadcast>(
                        subtractFromWeights->get_input_node_shared_ptr(1),
@ -215,7 +218,6 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
        rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared<ngraph::VariantWrapper<std::string>>("");
    }
    return true;
 }
--- a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp
@ -56,8 +56,10 @@ bool FakeQuantizeTransformation::transform(TransformationContext& context, ngrap
 namespace fq {
 static std::shared_ptr<Node> updateShape(std::shared_ptr<Node> constantOp, const PartialShape& targetShape) {
    assert(constantOp->get_output_partial_shape(0).is_static());
    const Shape shape = constantOp->get_output_shape(0);
-    if ((shape.size() < static_cast<size_t>(targetShape.rank().get_length())) && (shape.size() > 1ul)) {
+
    if ((shape.size() > 1ul) && (shape.size() < static_cast<size_t>(targetShape.rank().get_length()))) {
        constantOp = fold<opset1::Unsqueeze>(
            constantOp,
            std::make_shared<opset1::Constant>(ngraph::element::i32, Shape{ 1 }, std::vector<size_t>({ 0ul })));
@ -93,19 +95,19 @@ static std::shared_ptr<opset1::Constant> getConstant(const std::shared_ptr<Node>
 }  // namespace fq
 bool FakeQuantizeTransformation::checkElementwise(const std::shared_ptr<Node>& eltwise) {
    const std::shared_ptr<opset1::Constant> constant = fq::getConstant(eltwise);
    if (constant == nullptr) {
        return false;
    }
    Shape shape = constant->get_shape();
    if (shape_size(shape) != 1ul) {
        const auto eltwiseInputPShape = eltwise->get_input_partial_shape(0);
        const auto eltwiseOutputPShape = eltwise->get_output_partial_shape(0);
        if (eltwiseInputPShape != eltwiseOutputPShape || eltwiseInputPShape.rank().is_dynamic() || eltwiseOutputPShape.rank().is_dynamic()) {
            return false;
        }
    std::shared_ptr<opset1::Constant> constant = fq::getConstant(eltwise);
    if (constant == nullptr) {
        return false;
    }
    Shape shape = constant->get_output_shape(0);
    if ((!shape.empty()) && (shape_size(shape) != 1ul)) {
        if ((eltwiseOutputPShape.rank().get_length() - shape.size()) > 1) {
            return false;
        }
@ -179,8 +181,8 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
        inputHighConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0));
    } else if (is_type<opset1::Convert>(eltwise)) {
        // issue #40611
-        if ((eltwise->input(0).get_element_type() == element::i32) &&
+        if ((eltwise->get_input_element_type(0) == element::i32) &&
-            ((eltwise->output(0).get_element_type() == element::f16) || (eltwise->output(0).get_element_type() == element::f32))) {
+            ((eltwise->get_output_element_type(0) == element::f16) || (eltwise->get_output_element_type(0) == element::f32))) {
            return nullptr;
        }
    } else {
@ -190,7 +192,7 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
    const auto data = fq::getData(eltwise);
    const size_t outputIdx = NetworkHelper::getParentOutputIndex(data, eltwise);
-    std::shared_ptr<opset1::FakeQuantize> newFakeQuantize = as_type_ptr<opset1::FakeQuantize>(fakeQuantize->clone_with_new_inputs({
+    const auto newFakeQuantize = as_type_ptr<opset1::FakeQuantize>(fakeQuantize->clone_with_new_inputs({
        data->output(outputIdx),
        inputLowConst_f32,
        inputHighConst_f32,
--- a/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fake_quantize_dequantization.cpp
@ -90,7 +90,7 @@ bool FakeQuantizeDequantization::checkShape(const std::shared_ptr<ngraph::Node>&
    if (!inPShape.rank().is_dynamic()) {
        for (int i = 0; i < inPShape.rank().get_length(); ++i) {
-            if (inPShape[i] != outPShape[i] && !inPShape.is_dynamic()) {
+            if (inPShape[i] != outPShape[i] && !inPShape[i].is_dynamic()) {
                return false;
            }
        }
@ -108,7 +108,7 @@ bool FakeQuantizeDequantization::checkElementwise(const std::shared_ptr<ngraph::
        return false;
    }
-    const ngraph::Shape constShape = constant->get_output_shape(0);
+    const ngraph::Shape constShape = constant->get_shape();
    if ((constShape.size() > 5ul)) {
        return false;
    }
--- a/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp
@ -40,8 +40,12 @@ bool FuseFakeQuantizeTransformation::transform(TransformationContext& context, n
 namespace fuse_fq {
-std::shared_ptr<Node> updateShape(std::shared_ptr<Node> op, const Shape& targetShape) {
+std::shared_ptr<Node> updateShape(std::shared_ptr<Node> op, const PartialShape& targetPShape) {
    assert(targetPShape.is_static());
    assert(op->get_output_partial_shape(0).is_static());
    const Shape targetShape = targetPShape.to_shape();
    const Shape shape = op->get_output_shape(0);
    if ((shape.size() < targetShape.size()) && (shape.size() > 1ul)) {
        op = fold<opset1::Unsqueeze>(
            op,
@ -81,14 +85,19 @@ bool eltwiseWithConstant(const std::shared_ptr<Node>& eltwise) {
        return false;
    }
-    Shape shape = constant->get_output_shape(0);
+    Shape shape = constant->get_shape();
    if ((!shape.empty()) && (shape_size(shape) != 1ul)) {
-        const Shape eltwiseShape = eltwise->get_output_shape(0);
+        const auto eltwisePShape = eltwise->get_output_partial_shape(0);
-        if ((eltwiseShape.size() - shape.size()) > 1) {
+        if (eltwisePShape.rank().is_dynamic()) {
            return false;
        }
-        if ((eltwiseShape.size() - shape.size()) == 1ul) {
+        const size_t eltwiseOutRank = eltwisePShape.rank().get_length();
        if ((eltwiseOutRank - shape.size()) > 1) {
            return false;
        }
        if ((eltwiseOutRank - shape.size()) == 1ul) {
            shape.insert(shape.begin(), 1ul);
        }
@ -118,22 +127,22 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
            constant :
            foldConvert(constant, eltwise->get_output_element_type(0));
-        inputLowConst = fuse_fq::updateShape(fold<opset1::Divide>(inputLowConst, value), fakeQuantize->get_output_shape(0));
+        inputLowConst = fuse_fq::updateShape(fold<opset1::Divide>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
-        inputHightConst = fuse_fq::updateShape(fold<opset1::Divide>(inputHightConst, value), fakeQuantize->get_output_shape(0));
+        inputHightConst = fuse_fq::updateShape(fold<opset1::Divide>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
    } else if (is_type<opset1::Divide>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
        const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
            constant :
            foldConvert(constant, eltwise->get_output_element_type(0));
-        inputLowConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputLowConst, value), fakeQuantize->get_output_shape(0));
+        inputLowConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
-        inputHightConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputHightConst, value), fakeQuantize->get_output_shape(0));
+        inputHightConst = fuse_fq::updateShape(fold<opset1::Multiply>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
    } else if (is_type<opset1::Subtract>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
        const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ?
            constant :
            foldConvert(constant, eltwise->get_output_element_type(0));
-        inputLowConst = fuse_fq::updateShape(fold<opset1::Add>(inputLowConst, value), fakeQuantize->get_output_shape(0));
+        inputLowConst = fuse_fq::updateShape(fold<opset1::Add>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
-        inputHightConst = fuse_fq::updateShape(fold<opset1::Add>(inputHightConst, value), fakeQuantize->get_output_shape(0));
+        inputHightConst = fuse_fq::updateShape(fold<opset1::Add>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
    } else if (is_type<opset1::Add>(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) {
        if (is_type<opset1::Convolution>(fuse_fq::getData(eltwise)) ||
            is_type<opset1::GroupConvolution>(fuse_fq::getData(eltwise))) {
@ -144,8 +153,8 @@ std::shared_ptr<opset1::FakeQuantize> FuseFakeQuantizeTransformation::handle(
            constant :
            foldConvert(constant, eltwise->get_output_element_type(0));
-        inputLowConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputLowConst, value), fakeQuantize->get_output_shape(0));
+        inputLowConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputLowConst, value), fakeQuantize->get_output_partial_shape(0));
-        inputHightConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputHightConst, value), fakeQuantize->get_output_shape(0));
+        inputHightConst = fuse_fq::updateShape(fold<opset1::Subtract>(inputHightConst, value), fakeQuantize->get_output_partial_shape(0));
    } else if (is_type<opset1::Convert>(eltwise)) {
        // issue #40611
        if ((eltwise->input(0).get_element_type() == element::i32) && (eltwise->output(0).get_element_type() == element::f32)) {
--- a/inference-engine/src/low_precision_transformations/src/interpolate.cpp
+++ b/inference-engine/src/low_precision_transformations/src/interpolate.cpp
@ -72,7 +72,7 @@ bool InterpolateTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer
    std::shared_ptr<opset4::Interpolate> interpolate4 = as_type_ptr<opset4::Interpolate>(layer);
    if (interpolate4) {
        const auto attrs = interpolate4->get_attrs();
-        return attrs.mode == op::v4::Interpolate::InterpolateMode::nearest;
+        return attrs.mode == op::v4::Interpolate::InterpolateMode::NEAREST;
    }
    return false;
@ -108,7 +108,7 @@ bool InterpolateTransformation::canBeTransformed(const TransformationContext& co
    if (interpolate4) {
        const auto interpAttrs = interpolate4->get_attrs();
-        if (interpAttrs.mode != op::v4::Interpolate::InterpolateMode::nearest) {
+        if (interpAttrs.mode != op::v4::Interpolate::InterpolateMode::NEAREST) {
            return false;
        }
@ -126,7 +126,7 @@ bool InterpolateTransformation::canBeTransformed(const TransformationContext& co
            }
        }
-        if (interpAttrs.coordinate_transformation_mode == op::v4::Interpolate::CoordinateTransformMode::align_corners) {
+        if (interpAttrs.coordinate_transformation_mode == op::v4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS) {
            return false;
        }
    }
--- a/inference-engine/src/low_precision_transformations/src/low_precision.cpp
+++ b/inference-engine/src/low_precision_transformations/src/low_precision.cpp
@ -50,6 +50,7 @@
 #include "low_precision/multiply.hpp"
 #include "low_precision/mvn.hpp"
 #include "low_precision/normalize_l2.hpp"
 #include "low_precision/pad.hpp"
 #include "low_precision/prelu.hpp"
 #include "low_precision/reduce_max.hpp"
 #include "low_precision/reduce_mean.hpp"
@ -219,6 +220,7 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_function(std::shared_ptr<
    common->add_matcher<ngraph::pass::low_precision::MultiplyTransformation>(params);
    common->add_matcher<ngraph::pass::low_precision::MVNTransformation>(params);
    common->add_matcher<ngraph::pass::low_precision::NormalizeL2Transformation>(params);
    common->add_matcher<ngraph::pass::low_precision::PadTransformation>(params);
    common->add_matcher<ngraph::pass::low_precision::PReluTransformation>(params);
    common->add_matcher<ngraph::pass::low_precision::ReduceMaxTransformation>(params);
    common->add_matcher<ngraph::pass::low_precision::ReduceMeanTransformation>(params);
--- a/inference-engine/src/low_precision_transformations/src/markup_precisions.cpp
+++ b/inference-engine/src/low_precision_transformations/src/markup_precisions.cpp
@ -141,6 +141,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::isPrecisionPreserved(const s
        { name<opset1::ReduceMin>() },
        { name<opset1::Relu>() },
        // TODO: there are conditions
        { name<opset1::Pad>() },
        { name<opset1::Reshape>() },
        { name<opset1::Squeeze>() },
        { name<opset1::Split>() },
@ -166,7 +167,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::isPrecisionPreserved(const s
        std::shared_ptr<opset4::Interpolate> interpolate4 = as_type_ptr<opset4::Interpolate>(node);
        if (interpolate4) {
            const auto attrs = interpolate4->get_attrs();
-            return attrs.mode == op::v4::Interpolate::InterpolateMode::nearest;
+            return attrs.mode == op::v4::Interpolate::InterpolateMode::NEAREST;
        }
    }
@ -194,6 +195,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::isSupported(const std::share
        { name<ngraph::op::MVN>() },
        { name<opset6::MVN>() },
        { name<opset1::NormalizeL2>() },
        { name<opset1::Pad>() },
        { name<opset1::PRelu>() },
        { name<opset1::ReduceMax>() },
        { name<opset1::ReduceMean>() },
--- a/inference-engine/src/low_precision_transformations/src/mat_mul.cpp
+++ b/inference-engine/src/low_precision_transformations/src/mat_mul.cpp
@ -94,7 +94,10 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
            Shape(dequantization1.subtract->get_output_partial_shape(0).rank().get_length(), 1) :
            dequantization1.subtractConstant->get_shape();
-        const auto weightsShape = newMatMul->get_input_shape(1);
+        const auto weightsPShape = newMatMul->get_input_partial_shape(1);
        assert(weightsPShape.is_static());
        const auto weightsShape = weightsPShape.to_shape();
        const size_t firstWeightsIdx = matMul->get_transpose_b() ? weightsShape.size() - 1ul : weightsShape.size() - 2ul;
        const size_t lastDataIdx = matMul->get_transpose_a() ? broadcastShape.size() - 2 : broadcastShape.size() - 1;
        broadcastShape[lastDataIdx] = weightsShape[firstWeightsIdx];
@ -118,8 +121,8 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
        parent = newSubtract;
    }
-    auto transpose = [](const std::shared_ptr<Node>& node) -> std::shared_ptr<Node> {
+    auto transpose = [](const std::shared_ptr<opset1::Constant>& node) -> std::shared_ptr<Node> {
-        const Shape outputShape = node->get_output_shape(0);
+        const Shape outputShape = node->get_shape();
        if (outputShape.size() < 2ul) {
            return node;
        }
@ -153,7 +156,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
        }
    }
-    const auto newMulConst = NetworkHelper::toScalarIfPossible(fold<ngraph::opset1::Multiply>(
+    const auto newMulConst = NetworkHelper::toScalarIfPossible(fold<opset1::Multiply>(
            mulConst1,
            foldConvert(mulConst2, element::f32)));
--- a/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp
@ -164,17 +164,17 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma
    Shape constShape;
    int inputIndex;
-    if (is_type<opset1::Constant>(operation->get_input_node_shared_ptr(1))) {
+    if (const auto constant = as_type_ptr<opset1::Constant>(operation->get_input_node_shared_ptr(1))) {
        inputIndex = 0;
-        constShape = operation->get_input_shape(1);
+        constShape = constant->get_shape();
        if (is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)) ||
            (is_type<opset1::Subtract>(operation->get_input_node_shared_ptr(0)) &&
            is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)->get_input_node_shared_ptr(0)))) {
            return false;
        }
-    } else if (is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0))) {
+    } else if (const auto constant = as_type_ptr<opset1::Constant>(operation->get_input_node_shared_ptr(0))) {
        inputIndex = 1;
-        constShape = operation->get_input_shape(0);
+        constShape = constant->get_shape();
    } else {
        return false;
    }
--- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp
+++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
@ -191,12 +191,12 @@ size_t NetworkHelper::getInputChannelsCount(std::shared_ptr<Node> layer) {
 }
 size_t NetworkHelper::getGroupsCount(std::shared_ptr<Node> layer) {
-    if (as_type_ptr<opset1::Convolution>(layer)) {
+    if (is_type<opset1::Convolution>(layer)) {
        return 1;
-    } else if (auto group_convolution = as_type_ptr<opset1::GroupConvolution>(layer)) {
+    } else if (is_type<opset1::GroupConvolution>(layer)) {
-        return layer->get_input_shape(1)[0];    // input weights for opset1::GC is in format GOI..., see the specification
+        return layer->get_input_partial_shape(1)[0].get_length();    // input weights for opset1::GC is in format GOI..., see the specification
    } else {
-        THROW_TRANSFORMATION_EXCEPTION << "Invalid layer type of " << layer->get_friendly_name() << "; expected Convolutino or GroupConvolution";
+        THROW_TRANSFORMATION_EXCEPTION << "Invalid layer type of " << layer->get_friendly_name() << "; expected Convolution or GroupConvolution";
    }
 }
@ -239,9 +239,15 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
    auto b = addAfterMultiply->get_input_node_shared_ptr(multiplyBranch == 0 ? 1 : 0);
    std::shared_ptr<Node> bDivA;
-    if (shape_size(b->get_output_shape(0)) == 1 ||
+    const auto aPShape = a->get_output_partial_shape(0);
-        shape_size(a->get_output_shape(0)) == 1 ||
+    assert(aPShape.is_static());
-        shape_size(b->get_output_shape(0)) == shape_size(a->get_output_shape(0))) {
+    const auto aShape = aPShape.to_shape();
    const auto bPShape = b->get_output_partial_shape(0);
    assert(bPShape.is_static());
    const auto bShape = bPShape.to_shape();
    if ((shape_size(bShape) == 1) || (shape_size(aShape) == 1) || (shape_size(bShape) == shape_size(aShape))) {
        // safely division to avoid NaN
        const std::vector<float> bValues = as_type_ptr<opset1::Constant>(b)->cast_vector<float>();
        const std::vector<float> aValues = as_type_ptr<opset1::Constant>(a)->cast_vector<float>();
@ -263,7 +269,7 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
        auto aPrecision = a->get_output_element_type(0);
        bDivA = std::make_shared<opset1::Constant>(
                aPrecision,
-                aBroadcasted ? b->get_output_shape(0) : a->get_output_shape(0),
+                aBroadcasted ? bShape : aShape,
                bDivAValues);
    } else {
        b = foldConvert(b, element::f32);
@ -463,7 +469,14 @@ std::shared_ptr<ngraph::opset1::Multiply> NetworkHelper::optimizeMultipliesAfter
            }
            auto newInput = multiply->input_value(1 - constant1->output(0).get_target_inputs().begin()->get_index());
-            auto newConst = fold<opset1::Multiply>(constant1, constant2);
+            auto multiplyResult = fold<opset1::Multiply>(constant1, constant2);
            {
                // optimize constant shape: used in rfcn-resnet101-coco
                const auto multiplyResultConstant = as_type_ptr<opset1::Constant>(multiplyResult);
                if ((multiplyResultConstant != nullptr) && NetworkHelper::isScalarLike(multiplyResultConstant)) {
                    multiplyResult = NetworkHelper::toScalar(multiplyResultConstant);
                }
            }
            auto inputPrecision0 = nextMultiply->get_origin_input_type(0);
            auto inputPrecision1 = nextMultiply->get_origin_input_type(1);
            auto outputPrecision = nextMultiply->get_overridden_output_type(0);
@ -472,7 +485,7 @@ std::shared_ptr<ngraph::opset1::Multiply> NetworkHelper::optimizeMultipliesAfter
                            std::vector<element::Type>{ inputPrecision0, inputPrecision1 },
                            std::vector<element::Type>{ outputPrecision },
                            ngraph::op::TemporaryReplaceOutputType(newInput, inputPrecision0).get(),
-                            ngraph::op::TemporaryReplaceOutputType(newConst, inputPrecision1).get());
+                            ngraph::op::TemporaryReplaceOutputType(multiplyResult, inputPrecision1).get());
            copy_runtime_info(multiply, newMultiply);
            replace_node(nextMultiply, newMultiply);
            return newMultiply;
@ -734,9 +747,12 @@ std::shared_ptr<Node> NetworkHelper::foldFakeQuantize(
    auto constant = as_type_ptr<opset1::Constant>(fq->get_input_node_shared_ptr(0));
    if (constant) {
-        const bool roundValues = roundValuesWasSet ? roundValuesArg : fq->output(0).get_element_type().is_integral();
+        const bool roundValues = roundValuesWasSet ? roundValuesArg : fq->get_output_element_type(0).is_integral();
        const auto constPShape = fq->get_output_partial_shape(0);
        assert(constPShape.is_static());
        const Shape constShape = constPShape.to_shape();
        Shape constShape = fq->get_output_shape(0);
        if (constShape.empty() || constShape.size() > 5lu) {
            THROW_IE_LPT_EXCEPTION(*fq) << "Unexpected dimensions count " << constShape.size();
        }
@ -1117,7 +1133,7 @@ FakeQuantizeDequantization NetworkHelper::makeDequantization(
    const float dequantizationMul,
    const float dequantizationSub,
    const ngraph::element::Type originalPrecision,
-    const ngraph::PartialShape dataNodeOutputShape,
+    const ngraph::PartialShape& dataNodeOutputShape,
    element::Type precision,
    const ngraph::element::Type deqPrecision,
    std::shared_ptr<ngraph::Node> input) {
@ -1767,7 +1783,9 @@ std::vector<element::Type> NetworkHelper::precisionIntersection(
 bool NetworkHelper::isFQByDynamicDimension(const std::shared_ptr<opset1::FakeQuantize>& fq) {
    const auto pInputShape = fq->get_input_partial_shape(0);
-    auto olShape = fq->get_input_shape(3);
+    const auto olPShape = fq->get_input_partial_shape(3);
    assert(olPShape.is_static());
    auto olShape = olPShape.to_shape();
    if (shape_size(olShape) > 1ul) {
        if (pInputShape.rank().is_dynamic()) {
--- a/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp
+++ b/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp
@ -78,12 +78,12 @@ bool NormalizeL2Transformation::canBeTransformed(const TransformationContext& co
    const std::vector<int64_t> axesByChannels = { 1, 2, 3 };
    std::vector<int64_t> axesValues = axes->cast_vector<int64_t>();
-    if (!(axesValues == axesAcrossSpatial || axesValues == axesByChannels)) {
+    if ((axesValues != axesAcrossSpatial) && (axesValues != axesByChannels)) {
        return false;
    }
-    const ngraph::Shape outputShape = scalesConst->get_output_shape(0);
+    const Shape outputShape = scalesConst->get_shape();
-    const size_t size = ngraph::shape_size(outputShape);
+    const size_t size = shape_size(outputShape);
    if (size != 1ul) {
        const auto channelsInterval = operation->get_output_partial_shape(0)[1];
        if (channelsInterval.is_dynamic() || static_cast<size_t>(channelsInterval.get_length()) != size) {
--- a/inference-engine/src/low_precision_transformations/src/pad.cpp
+++ b/inference-engine/src/low_precision_transformations/src/pad.cpp
@ -0,0 +1,277 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "low_precision/pad.hpp"
 #include <memory>
 #include <ngraph/ngraph.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
 #include "low_precision/network_helper.hpp"
 namespace ngraph {
 namespace pass {
 namespace low_precision {
 NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::PadTransformation, "PadTransformation", 0);
 PadTransformation::PadTransformation(const Params& params) : LayerTransformation(params) {
    auto mul = pattern::wrap_type<opset1::Multiply>();
    auto padsBegin = pattern::wrap_type<opset1::Constant>();
    auto padsEnd = pattern::wrap_type<opset1::Constant>();
    auto padsValue = pattern::wrap_type<opset1::Constant>();
    auto matcher = pattern::wrap_type<opset1::Pad>({ mul, padsBegin, padsEnd, padsValue });
    ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
        auto op = m.get_match_root();
        if (transformation_callback(op)) {
            return false;
        }
        return transform(*context, m);
    };
    auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "PadTransformation");
    this->register_matcher(m, callback);
 }
 bool PadTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) {
    if (!canBeTransformed(context, m.get_match_root())) {
        return false;
    }
    const auto pad = as_type_ptr<opset1::Pad>(NetworkHelper::separateInStandaloneBranch(m.get_match_root()));
    const auto padConstant = as_type_ptr<opset1::Constant>(pad->get_input_node_shared_ptr(3));
    const auto padConstantValue = padConstant->cast_vector<float>()[0];
    const auto padsBegin = pad->get_pads_begin();
    const auto padsEnd = pad->get_pads_end();
    const auto padMode = pad->get_pad_mode();
    auto dequantization = NetworkHelper::getDequantization(pad);
    if (padMode == op::PadMode::CONSTANT) {
        auto bcastConstant = [&](const std::shared_ptr<opset1::Constant> &constant) {
            size_t padIdx = 0;
            for (size_t i = 0; i < padsBegin.size(); ++i) {
                if (padsBegin[i] != 0 || padsEnd[i] != 0) {
                    padIdx = i;
                    break;
                }
            }
            const auto inputPShape = pad->get_input_partial_shape(0);
            assert(inputPShape[padIdx].is_static());
            assert(inputPShape.rank().is_static());
            auto bcastedShape = Shape(inputPShape.rank().get_length(), 1ul);
            bcastedShape[padIdx] = inputPShape[padIdx].get_length();
            const auto bCastConst = opset1::Constant::create(element::i32, Shape{bcastedShape.size()}, bcastedShape);
            return as_type_ptr<opset1::Constant>(fold<opset1::Broadcast>(constant, bCastConst));
        };
        if (dequantization.subtract && shape_size(dequantization.subtractConstant->get_shape()) == 1ul) {
            const auto broadcastedConstant = bcastConstant(dequantization.subtractConstant);
            replace_node(dequantization.subtractConstant, broadcastedConstant);
            dequantization.subtractConstant = broadcastedConstant;
        }
        if (padConstantValue != 0.f && shape_size(dequantization.multiplyConstant->get_shape()) == 1ul) {
            const auto broadcastedConstant = bcastConstant(dequantization.multiplyConstant);
            replace_node(dequantization.multiplyConstant, broadcastedConstant);
            dequantization.multiplyConstant = broadcastedConstant;
        }
    }
    auto foldConstantIfNecessary = [&padMode, &padsBegin, &padsEnd](
        const std::shared_ptr<opset1::Constant>& constant,
        const std::shared_ptr<opset1::Pad>& pad,
        float padVal) {
        const auto constantShape = constant->get_shape();
        if (shape_size(constantShape) == 1ul) {
            return NetworkHelper::toScalar(constant);
        }
        std::vector<size_t> padsForConstantBegin(constantShape.size(), 0ul);
        std::vector<size_t> padsForConstantEnd(constantShape.size(), 0ul);
        bool foldingIsNecessary = false;
        // folding is necessary when dequantization and padding by the same dimension
        for (size_t i = 0; i < constantShape.size(); ++i) {
            if (padsBegin[i] != 0ul && constantShape[i] != 1ul) {
                foldingIsNecessary = true;
                padsForConstantBegin[i] = padsBegin[i];
            }
            if (padsEnd[i] != 0ul && constantShape[i] != 1ul) {
                foldingIsNecessary = true;
                padsForConstantEnd[i] = padsEnd[i];
            }
        }
        if (foldingIsNecessary) {
            const auto beginConst = opset1::Constant::create(element::u32, { padsForConstantBegin.size() }, padsForConstantBegin);
            const auto endConst = opset1::Constant::create(element::u32, { padsForConstantEnd.size() }, padsForConstantEnd);
            const auto padValueConstant = opset1::Constant::create(constant->get_element_type(), Shape{}, { padVal });
            const auto foldedConstant = fold<opset1::Pad>(constant, beginConst, endConst, padValueConstant, padMode);
            return as_type_ptr<opset1::Constant>(foldedConstant);
        } else {
            return constant;
        }
    };
    if (dequantization.subtract) {
        const auto normalizedSubConst = NetworkHelper::normalizeDequantizationShape(dequantization.subtract);
        float padValueForSub = padConstantValue;
        if (padMode == op::PadMode::CONSTANT) {
            padValueForSub = 0.f;
        }
        const auto newSubConstant = foldConstantIfNecessary(normalizedSubConst, pad, padValueForSub);
        replace_node(normalizedSubConst, newSubConstant);
        dequantization.subtractConstant = newSubConstant;
    }
    {
        const auto normalizedMulConst = NetworkHelper::normalizeDequantizationShape(dequantization.multiply);
        float padValueForMul = padConstantValue;
        if (padMode == op::PadMode::CONSTANT) {
            padValueForMul = 1.f;
        }
        const auto newMulConstant = foldConstantIfNecessary(normalizedMulConst, pad, padValueForMul);
        replace_node(normalizedMulConst, newMulConstant);
        dequantization.multiplyConstant = newMulConstant;
    }
    // we must convert pad value in low precision
    const auto convertedZero = opset1::Constant::create(dequantization.data.get_element_type(), Shape{}, { padConstantValue });
    pad->set_argument(3, convertedZero);
    moveDequantizationAfter(context, pad, dequantization, true);
    return true;
 }
 bool PadTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const {
    if (!LayerTransformation::canBeTransformedSpatialDimension(context, op)) {
        return false;
    }
    const auto pad = as_type_ptr<opset1::Pad>(op);
    if (!pad) {
        return false;
    }
    const auto dequantization = NetworkHelper::getDequantization(op);
    if (dequantization.empty()) {
        return false;
    }
    const auto mode = pad->get_pad_mode();
    if (mode == op::PadMode::CONSTANT) {
        auto padAndDqByTheSameDimension = [&](const std::shared_ptr<opset1::Constant>& deqConst) {
            const auto padsBegin = pad->get_pads_begin();
            const auto padsEnd = pad->get_pads_end();
            int beginNonZeroIdx = -1;
            for (size_t i = 0; i < padsBegin.size(); ++i) {
                const bool padDimensionNotUnique = (beginNonZeroIdx != -1) && (padsBegin[i] != 0);
                if (padDimensionNotUnique) {
                    return false;
                }
                if (padsBegin[i] != 0) {
                    beginNonZeroIdx = i;
                }
            }
            int endNonZeroIdx = -1;
            for (size_t i = 0; i < padsEnd.size(); ++i) {
                const bool padDimensionNotUnique = (endNonZeroIdx != -1) && (padsEnd[i] != 0);
                if (padDimensionNotUnique) {
                    return false;
                }
                if (padsEnd[i] != 0) {
                    endNonZeroIdx = i;
                }
            }
            if ((beginNonZeroIdx != endNonZeroIdx) && (beginNonZeroIdx != -1) && (endNonZeroIdx != -1)) {
                return false;
            }
            const size_t paddingDimension = beginNonZeroIdx != -1 ? beginNonZeroIdx : endNonZeroIdx;
            const auto padInputPShape = pad->get_input_partial_shape(0);
            const auto padInputRank = padInputPShape.rank();
            if (padInputRank.is_dynamic() || padInputPShape[paddingDimension].is_dynamic()) {
                return false;
            }
            const size_t inputRankValue = padInputRank.get_length();
            auto deqShape = deqConst->get_shape();
            if (shape_size(deqShape) > 1ul) {
                while (deqShape.size() < inputRankValue) {
                    deqShape.insert(deqShape.begin(), 1ul);
                }
                for (size_t i = 0; i < deqShape.size(); ++i) {
                    const bool deqAndPadDimensionsMismatched = (deqShape[i] > 1ul) && (i != paddingDimension);
                    if (deqAndPadDimensionsMismatched) {
                        return false;
                    }
                }
            }
            return true;
        };
        if (dequantization.subtract && !padAndDqByTheSameDimension(dequantization.subtractConstant)) {
            return false;
        }
        const auto constant = as_type_ptr<opset1::Constant>(pad->get_input_node_shared_ptr(3));
        const auto constantValue = constant->cast_vector<float>()[0];
        if (constantValue != 0.f && !padAndDqByTheSameDimension(dequantization.multiplyConstant)) {
            return false;
        }
    }
    if (mode == op::PadMode::REFLECT) {
        auto deqShape = dequantization.multiplyConstant->get_shape();
        if (shape_size(deqShape) == 1ul) {
            return true;
        } else {
            const auto padInputRank = pad->get_input_partial_shape(0).rank();
            if (padInputRank.is_dynamic()) {
                return false;
            }
            const size_t inputRankValue = padInputRank.get_length();
            while (deqShape.size() < inputRankValue) {
                deqShape.insert(deqShape.begin(), 1ul);
            }
            const auto padsBegin = pad->get_pads_begin();
            const auto padsEnd = pad->get_pads_end();
            // PadTransformation with "REFLECT" mode doesn't support dequantization and padding by the same dimension
            for (size_t i = 0; i < deqShape.size(); ++i) {
                if (deqShape[i] != 1ul && (padsBegin[i] != 0ul || padsEnd[i] != 0ul)) {
                    return false;
                }
            }
        }
    }
    return true;
 }
 bool PadTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
    return true;
 }
 } // namespace low_precision
 } // namespace pass
 } // namespace ngraph
--- a/inference-engine/src/low_precision_transformations/src/reshape.cpp
+++ b/inference-engine/src/low_precision_transformations/src/reshape.cpp
@ -47,7 +47,7 @@ void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& resha
    auto replaceConstant = [](const std::shared_ptr<opset1::Reshape>& reshape, const std::shared_ptr<opset1::Constant>& originalConstant) {
        // reshape for element-wise constant is not required
        auto constantShape = originalConstant->get_shape();
-        if (shape_size(constantShape) == 1ul) {
+        if (NetworkHelper::isScalarLike(originalConstant)) {
            if (!constantShape.empty()) {
                const auto newConstant = NetworkHelper::toScalar(originalConstant);
                replace_node(originalConstant, newConstant);
@ -75,19 +75,28 @@ void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& resha
            return;
        }
-        Shape newOperationConstantBroadcastedShape = originalConstant->output(0).get_shape();
+        auto getBCastedConst = [](const std::shared_ptr<opset1::Constant>& constant, size_t dimensionsToBroadcast) -> std::shared_ptr<Node> {
            if (dimensionsToBroadcast == 1ul) {
                return constant;
            }
            Shape newOperationConstantBroadcastedShape = constant->get_shape();
            // add dimensions to broadcast values
            if (newOperationConstantBroadcastedShape.size() == 2ul) {
                newOperationConstantBroadcastedShape.push_back(dimensionsToBroadcast);
            } else {
                newOperationConstantBroadcastedShape[2] = dimensionsToBroadcast;
            }
-        const std::shared_ptr<Node> broadcastedConstant = fold<opset1::Broadcast>(
+
-            originalConstant,
+            const auto targetShapeConstant = opset1::Constant::create(
            std::make_shared<opset1::Constant>(
                element::i32,
-                Shape({ newOperationConstantBroadcastedShape.size() }),
+                Shape{ newOperationConstantBroadcastedShape.size() },
-                newOperationConstantBroadcastedShape));
+                newOperationConstantBroadcastedShape);
            return fold<opset1::Broadcast>(constant, targetShapeConstant);
        };
        const std::shared_ptr<Node> broadcastedConstant = getBCastedConst(originalConstant, dimensionsToBroadcast);
        std::vector<int> newReshapeConstValues(reshapeOutputRank.get_length(), 1ul);
        newReshapeConstValues[1] = reshapeOutputPShape[1].get_length();
@ -190,7 +199,7 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex
        subtractShapeWithBatch.insert(subtractShapeWithBatch.begin(), 1ul);
    }
-    const Shape multiplyShape = dequantization.multiply == nullptr ? Shape{} : dequantization.multiply->input(1).get_shape();
+    const Shape multiplyShape = dequantization.multiply == nullptr ? Shape{} : dequantization.multiplyConstant->get_shape();
    Shape multiplyShapeWithBatch = multiplyShape;
    if ((dequantization.multiply != nullptr) &&
        (multiplyShapeWithBatch.size() > 1ul) &&
--- a/inference-engine/src/low_precision_transformations/src/rt_info/avg_pool_precision_preserved_attribute.cpp
+++ b/inference-engine/src/low_precision_transformations/src/rt_info/avg_pool_precision_preserved_attribute.cpp
@ -9,6 +9,7 @@
 #include <ngraph/variant.hpp>
 using namespace ngraph;
 using namespace ov;
 template class ngraph::VariantImpl<AvgPoolPrecisionPreservedAttributePtr>;
--- a/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp
+++ b/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp
@ -12,6 +12,7 @@
 #include "low_precision/network_helper.hpp"
 using namespace ngraph;
 using namespace ov;
 using namespace ngraph::pass::low_precision;
 IntervalsAlignmentAttribute::IntervalsAlignmentAttribute(
--- a/inference-engine/src/low_precision_transformations/src/rt_info/per_tensor_quantization_attribute.cpp
+++ b/inference-engine/src/low_precision_transformations/src/rt_info/per_tensor_quantization_attribute.cpp
@ -5,6 +5,7 @@
 #include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
 using namespace ngraph;
 using namespace ov;
 template class ngraph::VariantImpl<PerTensorQuantizationAttribute>;
 constexpr VariantTypeInfo VariantWrapper<PerTensorQuantizationAttribute>::type_info;
--- a/inference-engine/src/low_precision_transformations/src/rt_info/precision_preserved_attribute.cpp
+++ b/inference-engine/src/low_precision_transformations/src/rt_info/precision_preserved_attribute.cpp
@ -8,6 +8,7 @@
 #include <string>
 using namespace ngraph;
 using namespace ov;
 PrecisionPreservedAttribute::PrecisionPreservedAttribute(const bool value) {
    sharedValue->value = value;
--- a/inference-engine/src/low_precision_transformations/src/rt_info/precisions_attribute.cpp
+++ b/inference-engine/src/low_precision_transformations/src/rt_info/precisions_attribute.cpp
@ -14,6 +14,7 @@
 #include "low_precision/network_helper.hpp"
 using namespace ngraph;
 using namespace ov;
 // order defines default precision
 const std::vector<ngraph::element::Type> PrecisionsAttribute::defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 };
--- a/inference-engine/src/low_precision_transformations/src/rt_info/quantization_alignment_attribute.cpp
+++ b/inference-engine/src/low_precision_transformations/src/rt_info/quantization_alignment_attribute.cpp
@ -12,6 +12,7 @@
 #include <ngraph/opsets/opset1.hpp>
 #include "low_precision/network_helper.hpp"
 using namespace ov;
 using namespace ngraph;
 using namespace ngraph::pass::low_precision;
--- a/inference-engine/src/low_precision_transformations/src/transpose.cpp
+++ b/inference-engine/src/low_precision_transformations/src/transpose.cpp
@ -42,47 +42,40 @@ void transposeDequantizationConstant(std::shared_ptr<Node>& transpose) {
        return;
    }
    if (dequantization.multiply->get_input_node_ptr(1)->get_output_shape(0).size() > 1ul) {
    auto transposeDeqConstant = [](
-            std::shared_ptr<Node> dequantizationConstant,
+        const std::shared_ptr<opset1::Constant>& dequantizationConstant,
-            const PartialShape& transposeOutputShape,
+        const PartialShape& transposeOutputPShape,
        const std::shared_ptr<Node>& transposeConstant) -> std::shared_ptr<Node> {
-            const auto dequantizationShape = dequantizationConstant->get_output_shape(0);
+            const auto constantShape = dequantizationConstant->get_shape();
-            if (dequantizationShape.empty() || (dequantizationShape.size() == 1ul)) {
+            if (shape_size(constantShape) == 1ul) {
-                return nullptr;
+                return NetworkHelper::toScalar(dequantizationConstant);
            }
-            if (dequantizationShape.size() != static_cast<size_t>(transposeOutputShape.rank().get_length())) {
+            assert(transposeOutputPShape.rank().is_static());
-                dequantizationConstant = fold<opset1::Unsqueeze>(
+            const size_t transposeOutRank = transposeOutputPShape.rank().get_length();
-                    dequantizationConstant,
+            if (constantShape.size() != transposeOutRank) {
-                    std::make_shared<opset1::Constant>(element::i32, Shape{ 1 }, std::vector<size_t>{0}));
+                const auto unsqueezeConst = opset1::Constant::create(element::i32, Shape{ 1 }, std::vector<size_t>{ 0 });
-            }
+                const auto deqConstantWithBatch = fold<opset1::Unsqueeze>(dequantizationConstant, unsqueezeConst);
                return fold<opset1::Transpose>(deqConstantWithBatch, transposeConstant);
            } else {
                return fold<opset1::Transpose>(dequantizationConstant, transposeConstant);
            }
    };
    if (dequantization.subtract != nullptr) {
-            auto constant = transposeDeqConstant(
+        const auto constant = transposeDeqConstant(
            dequantization.subtractConstant,
            transpose->get_output_partial_shape(0),
            transpose->get_input_node_shared_ptr(1));
-            if (constant != nullptr) {
+        replace_node(dequantization.subtractConstant, constant);
                replace_node(
                    dequantization.subtract->get_input_node_shared_ptr(1),
                    constant);
            }
    }
    if (dequantization.multiply != nullptr) {
-            auto constant = transposeDeqConstant(
+        const auto constant = transposeDeqConstant(
            dequantization.multiplyConstant,
            transpose->get_output_partial_shape(0),
            transpose->get_input_node_shared_ptr(1));
-            if (constant != nullptr) {
+        replace_node(dequantization.multiplyConstant, constant);
                replace_node(
                    dequantization.multiply->get_input_node_shared_ptr(1),
                    constant);
            }
        }
    }
 }
--- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
@ -74,14 +74,13 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
            return false;
        }
-        const std::shared_ptr<opset1::Constant> multiplyConst = as_type_ptr<opset1::Constant>(dequantization.multiply->get_input_node_shared_ptr(1));
+        const Shape multiplyConstShape = dequantization.multiplyConstant->get_shape();
        const Shape multiplyConstShape = multiplyConst->get_output_shape(0);
        if (!multiplyConstShape.empty() && (shape_size(multiplyConstShape) != 1ul)) {
            const size_t groupsCount = NetworkHelper::getGroupsCount(layer);
-            const ngraph::PartialShape inputPShape = layer->get_input_partial_shape(0);
+            const PartialShape inputPShape = layer->get_input_partial_shape(0);
            const size_t inputChannelsInGroup = inputPShape[1].get_length() / groupsCount;
-            const std::vector<float> scales = multiplyConst->cast_vector<float>();
+            const std::vector<float> scales = dequantization.multiplyConstant->cast_vector<float>();
            for (size_t group = 0; group < groupsCount; ++group) {
                for (size_t i = 0; i < inputChannelsInGroup; ++i) {
                    if (scales[group * inputChannelsInGroup] != scales[group * inputChannelsInGroup + i]) {
@ -90,30 +89,33 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
                }
            }
-            const ngraph::PartialShape outputPShape = layer->get_output_partial_shape(0);
+            const PartialShape outputPShape = layer->get_output_partial_shape(0);
-            const auto rank = outputPShape.rank().get_length();
+            const auto rank = outputPShape.rank();
-            if ((rank != 4) && (rank != 5)) {
+            if (rank.is_dynamic()) {
                return false;
            }
            const auto rankVal = rank.get_length();
            if ((rankVal != 4) && (rankVal != 5)) {
                return false;
            }
        }
    } else {
-        const std::shared_ptr<opset1::Multiply> multiply = as_type_ptr<opset1::Multiply>(layer->input_value(0).get_node_shared_ptr());
+        const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer);
-        if (multiply == nullptr) {
+        if (dequantization.multiply == nullptr) {
            return false;
        }
-        // SS takes inputs [0: data, 1: scales, 2: shifts], takes scales (index = 1)
+        if (dequantization.multiplyConstant == nullptr) {
        const std::shared_ptr<opset1::Constant> multiplyConst = as_type_ptr<opset1::Constant>(multiply->input_value(1).get_node_shared_ptr());
        if (multiplyConst == nullptr) {
            return false;
        }
        // exactly cast vector as original code has a conversion;
        // optimize cast:
        // two branches depending on real type of the constant?
-        const auto scalesBuffer = multiplyConst->cast_vector<float>();
+        const auto scalesBuffer = dequantization.multiplyConstant->cast_vector<float>();
-        size_t scalesBufferSize = shape_size(multiplyConst->get_output_shape(0));
+        size_t scalesBufferSize = shape_size(dequantization.multiplyConstant->get_shape());
-        for (size_t i = 1lu; i < scalesBufferSize; ++i) {
+        for (size_t i = 1ul; i < scalesBufferSize; ++i) {
            if (scalesBuffer[i - 1] != scalesBuffer[i]) {
                return false;
            }
@ -132,11 +134,11 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
    // TODO Implement similar checks in other weightable operaitons
-    const std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(layer->input_value(1).get_node_shared_ptr());
+    const std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(layer->get_input_node_shared_ptr(1));
    std::shared_ptr<opset1::FakeQuantize> fqFromWeights;
    if (reshapeFromWeights == nullptr) {
-        fqFromWeights = as_type_ptr<opset1::FakeQuantize>(layer->input_value(1).get_node_shared_ptr());
+        fqFromWeights = as_type_ptr<opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1));
        if (fqFromWeights == nullptr) {
            const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer, 1ul);
            fqFromWeights = as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
@ -154,23 +156,29 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
            return false;
        }
-        const Shape constOutputShape = fqFromWeights->get_input_node_ptr(3)->get_output_shape(0);
+        const auto olPShape = fqFromWeights->get_input_partial_shape(3);
-        if (fqFromWeights->get_input_node_ptr(4)->get_output_shape(0) != constOutputShape) {
+        const auto ohPShape = fqFromWeights->get_input_partial_shape(4);
        if (olPShape.is_dynamic() || ohPShape.is_dynamic() || olPShape != ohPShape) {
            return false;
        }
-        const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
+
-        if (
+        const auto fqOutPShape = fqFromWeights->get_output_partial_shape(0);
-            // expected, it's ok: return true
+        const size_t outChannelsIdx = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
-            (shape_size(constOutputShape) != 1ul) &&
+        if (fqOutPShape.rank().is_dynamic() || fqOutPShape[outChannelsIdx].is_dynamic()) {
            // not expected, something wrong: return false
            ((constOutputShape.size() <= outChannelsShapeIndex) ||
            // Check if all dimensions of scale except the output channels are all ones
            (shape_size(constOutputShape) != constOutputShape[outChannelsShapeIndex]) ||
            ((constOutputShape[outChannelsShapeIndex] != 1ul) &&
                (fqFromWeights->get_output_shape(0)[outChannelsShapeIndex] != constOutputShape[outChannelsShapeIndex])))) {
            return false;
        }
        const Shape constShape = olPShape.to_shape();
        if (shape_size(constShape) != 1ul) {
            const size_t constChannels = constShape[outChannelsIdx];
            const size_t fqOutChannels = fqOutPShape[outChannelsIdx].get_length();
            const bool constChannelsAndFqChannelsMismatched = (constChannels != 1ul) && (fqOutChannels != constChannels);
            if ((constShape.size() <= outChannelsIdx) || (shape_size(constShape) != constChannels) || constChannelsAndFqChannelsMismatched) {
                return false;
            }
        }
    } else {
        // TODO: LPT: is it possible to share with isQuantized?
        const FakeQuantizeDequantization dequantizationOnWeights = reshapeFromWeights == nullptr ?
@ -180,33 +188,33 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
            return false;
        }
-        const opset1::Constant* weightsData = as_type<opset1::Constant>(dequantizationOnWeights.data.get_node());
+        const auto weightsData = as_type_ptr<opset1::Constant>(dequantizationOnWeights.data.get_node_shared_ptr());
        if (weightsData == nullptr) {
            return false;
        }
-        const ngraph::element::Type weightsDataPrecision = weightsData->output(0).get_element_type();
+        const auto weightsDataPrecision = weightsData->get_element_type();
        if (!DataPrecision::isSupported(weightsDataPrecision)) {
            return false;
        }
        if ((dequantizationOnWeights.subtract != nullptr) && (dequantizationOnWeights.subtractConvert != nullptr)) {
-            const auto subtractConstantType = dequantizationOnWeights.subtractConstant->output(0).get_element_type();
+            const auto subtractConstantType = dequantizationOnWeights.subtractConstant->get_element_type();
            if (subtractConstantType != weightsDataPrecision) {
                return false;
            }
        }
-        const size_t outChannelsShapeIndex = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
+        const size_t outChannelsIdx = is_type<opset1::ConvolutionBackpropData>(layer) ? 1ul : 0ul;
        if (dequantizationOnWeights.subtract) {
            const auto subConstShape = dequantizationOnWeights.subtractConstant->get_shape();
-            if (shape_size(subConstShape) > 1ul && shape_size(subConstShape) != subConstShape[outChannelsShapeIndex]) {
+            if (shape_size(subConstShape) > 1ul && shape_size(subConstShape) != subConstShape[outChannelsIdx]) {
                return false;
            }
        }
        if (dequantizationOnWeights.multiply) {
            const auto mulConstShape = dequantizationOnWeights.multiplyConstant->get_shape();
-            if (shape_size(mulConstShape) > 1ul && shape_size(mulConstShape) != mulConstShape[outChannelsShapeIndex]) {
+            if (shape_size(mulConstShape) > 1ul && shape_size(mulConstShape) != mulConstShape[outChannelsIdx]) {
                return false;
            }
        }
@ -321,7 +329,7 @@ bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st
 }
 bool WeightableLayerTransformation::isGroup(const std::shared_ptr<Node>& layer) {
-    if (!as_type_ptr<opset1::Convolution>(layer) && !as_type_ptr<opset1::GroupConvolution>(layer)) {
+    if (!is_type<opset1::Convolution>(layer) && !is_type<opset1::GroupConvolution>(layer)) {
        return false;
    }
@ -341,7 +349,7 @@ bool WeightableLayerTransformation::isDepthwise(const std::shared_ptr<Node>& lay
 }
 std::shared_ptr<opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node) {
-    auto fq = as_type_ptr<opset1::FakeQuantize>(node->input_value(1).get_node_shared_ptr());
+    auto fq = as_type_ptr<opset1::FakeQuantize>(node->get_input_node_shared_ptr(1));
    // TODO: temporary workaround
    if (fq == nullptr) {
        fq = as_type_ptr<opset1::FakeQuantize>(node->get_input_node_ptr(1)->get_input_node_shared_ptr(0));
--- a/inference-engine/src/mkldnn_plugin/config.cpp
+++ b/inference-engine/src/mkldnn_plugin/config.cpp
@ -26,8 +26,13 @@ Config::Config() {
    // for the TBB code-path, additional configuration depending on the OS and CPU types
    #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
        #if defined(__APPLE__) || defined(_WIN32)
-        // 'CORES' is not implemented for Win/MacOS; so the 'NUMA' is default
+        // 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default
        auto numaNodes = getAvailableNUMANodes();
        if (numaNodes.size() > 1) {
            streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA;
        } else {
            streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NONE;
        }
        #endif
        if (getAvailableCoresTypes().size() > 1 /*Hybrid CPU*/) {
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
@ -12,8 +12,11 @@
 #include "mkldnn_itt.h"
 #include "nodes/mkldnn_memory_node.hpp"
 #include <threading/ie_executor_manager.hpp>
-
+#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
 #include <threading/ie_tbb_streams_executor.hpp>
 #else
 #include <threading/ie_cpu_streams_executor.hpp>
 #endif
 #include <ie_system_conf.h>
 #include <algorithm>
 #include <unordered_set>
@ -32,6 +35,14 @@ MKLDNNExecNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap network
    return std::make_shared<MKLDNNInferRequest>(networkInputs, networkOutputs, std::static_pointer_cast<MKLDNNExecNetwork>(shared_from_this()));
 }
 struct ImmediateSerialExecutor : public ITaskExecutor {
    void run(InferenceEngine::Task task) override {
        std::lock_guard<std::mutex> l{_mutex};
        task();
    }
    std::mutex _mutex;
 };
 MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
                                     const Config &cfg,
                                     const MKLDNNExtensionManager::Ptr& extMgr,
@ -61,11 +72,20 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network,
    } else {
        auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg.streamExecutorConfig, isFloatModel);
        streamsExecutorConfig._name = "CPUStreamsExecutor";
-        _taskExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
+#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
        _taskExecutor = std::make_shared<TBBStreamsExecutor>(streamsExecutorConfig);
 #else
        _taskExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
 #endif
    }
    if (0 != cfg.streamExecutorConfig._streams) {
-        _callbackExecutor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(
+#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
        // There is no additional threads but we still need serialize callback execution to preserve legacy behaviour
        _callbackExecutor = std::make_shared<ImmediateSerialExecutor>();
 #else
        _callbackExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(
                                IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0, IStreamsExecutor::ThreadBindingType::NONE});
 #endif
    } else {
        _callbackExecutor = _taskExecutor;
    }
@ -146,6 +166,19 @@ MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() {
    return graphLock;
 }
 MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() const {
    int streamId = 0;
    int numaNodeId = 0;
    auto streamsExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(_taskExecutor.get());
    if (nullptr != streamsExecutor) {
        streamId = streamsExecutor->GetStreamId();
        numaNodeId = streamsExecutor->GetNumaNodeId();
    }
    auto graphLock = Graph::Lock(_graphs[streamId % _graphs.size()]);
    IE_ASSERT(graphLock._graph.IsReady());
    return graphLock;
 }
 void MKLDNNExecNetwork::setProperty(const std::map<std::string, std::string> &properties) {
    {
        std::lock_guard<std::mutex> lock{_cfgMutex};
@ -171,9 +204,8 @@ InferenceEngine::CNNNetwork MKLDNNExecNetwork::GetExecGraphInfo() {
 }
 Parameter MKLDNNExecNetwork::GetConfig(const std::string &name) const {
-    if (_graphs.size() == 0)
+    if (_graphs.size() == 0) IE_THROW() << "No graph was found";
-        IE_THROW() << "No graph was found";
+    Config engConfig = GetGraph()._graph.getProperty();
    Config engConfig = const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.getProperty();
    auto option = engConfig._config.find(name);
    if (option != engConfig._config.end()) {
        return option->second;
@ -187,8 +219,7 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name)
        IE_THROW() << "No graph was found";
    if (name == METRIC_KEY(NETWORK_NAME)) {
-        IE_SET_METRIC_RETURN(NETWORK_NAME,
+        IE_SET_METRIC_RETURN(NETWORK_NAME, GetGraph()._graph.dump().getName());
                               const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.dump().getName());
    } else if (name == METRIC_KEY(SUPPORTED_METRICS)) {
        std::vector<std::string> metrics;
        metrics.push_back(METRIC_KEY(NETWORK_NAME));
@ -198,12 +229,12 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name)
        IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
    } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
        std::vector<std::string> configKeys;
-        for (auto && key : const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.getProperty()._config) {
+        for (auto && key : GetGraph()._graph.getProperty()._config) {
            configKeys.push_back(key.first);
        }
        IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
    } else if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) {
-        Config engConfig = const_cast<MKLDNNExecNetwork*>(this)->GetGraph()._graph.getProperty();
+        Config engConfig = GetGraph()._graph.getProperty();
        auto option = engConfig._config.find(CONFIG_KEY(CPU_THROUGHPUT_STREAMS));
        IE_ASSERT(option != engConfig._config.end());
        auto streams = std::stoi(option->second);
--- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h
@ -59,8 +59,9 @@ protected:
            Graph&                          _graph;
        };
    };
    // WARNING: Do not use _graphs directly.
-    std::deque<Graph>                           _graphs;
+    mutable std::deque<Graph>                   _graphs;
    NumaNodesWeights&                           _numaNodesWeights;
    /* WARNING: Use GetGraph() function to get access to graph in current stream.
@ -68,6 +69,8 @@ protected:
     *       even from main thread
     */
    Graph::Lock GetGraph();
    Graph::Lock GetGraph() const;
    bool CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const;
 };
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@ -24,22 +24,20 @@
 #include <transformations/common_optimizations/common_optimizations.hpp>
 #include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
 #include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
 #include <transformations/common_optimizations/depth_to_space_fusion.hpp>
 #include <transformations/common_optimizations/softmax_fusion.hpp>
 #include <transformations/common_optimizations/normalize_l2_fusion.hpp>
 #include <transformations/op_conversions/convert_depth_to_space.hpp>
 #include <transformations/op_conversions/convert_shuffle_channels3.hpp>
 #include <transformations/op_conversions/convert_space_to_depth.hpp>
 #include <transformations/op_conversions/convert_gelu.hpp>
-#include <transformations/op_conversions/convert_gather_v7_to_gather_v1.hpp>
+#include <transformations/op_conversions/convert_gather_downgrade.hpp>
-#include <transformations/op_conversions/convert_gather_v1_to_gather_v7.hpp>
+#include <transformations/op_conversions/convert_gather_upgrade.hpp>
 #include <transformations/op_conversions/gelu7_downgrade.hpp>
 #include <transformations/op_conversions/hswish_decomposition.hpp>
 #include <transformations/op_conversions/hsigmoid_decomposition.hpp>
 #include <transformations/op_conversions/mvn6_decomposition.hpp>
 #include <transformations/op_conversions/normalize_l2_decomposition.hpp>
 #include <transformations/op_conversions/reduce_l1_decomposition.hpp>
 #include <transformations/op_conversions/reduce_l2_decomposition.hpp>
 #include <transformations/op_conversions/convert_pad_to_group_conv.hpp>
 #include <transformations/op_conversions/softplus_decomposition.hpp>
 #include <transformations/op_conversions/convert_space_to_batch.hpp>
 #include <transformations/op_conversions/convert_batch_to_space.hpp>
@ -53,7 +51,6 @@
 #include <transformations/op_conversions/gru_cell_decomposition.hpp>
 #include <transformations/op_conversions/log_softmax_decomposition.hpp>
 #include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
 #include <transformations/op_conversions/convert_shuffle_channels3.hpp>
 #include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
 #include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
 #include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
@ -249,7 +246,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
        return false;
    };
-    pass_config->set_callback<ngraph::pass::ConvertRNNSequenceToTensorIterator, ngraph::pass::ConvertGRUSequenceToTensorIterator,
+    pass_config->set_callback<ngraph::pass::ConvertRNNSequenceToTensorIterator,
                              ngraph::pass::ConvertGRUSequenceToTensorIterator,
                              ngraph::pass::ConvertLSTMSequenceToTensorIterator>(
            [isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
                return isSequencePrimitiveSupported(node);
@ -280,18 +278,17 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
                return MKLDNNMVNNode::isSupportedOperation(node, errorMessage);
            });
    pass_config->set_callback<ngraph::pass::NormalizeL2Decomposition>(
            [](const_node_ptr &node) -> bool {
                std::string errorMsg;
                return MKLDNNNormalizeL2Node::isSupportedOperation(node, errorMsg);
            });
    pass_config->set_callback<ngraph::pass::SoftmaxFusion>(
            [](const_node_ptr &node) -> bool {
                return node->input_value(0).get_partial_shape().rank().get_length() > 5;
            });
    auto normalizeL2FusionCallback = [](const_node_ptr &node) -> bool {
        std::string errorMsg;
        return !MKLDNNNormalizeL2Node::isSupportedOperation(node, errorMsg);
    };
    pass_config->set_callback<ngraph::pass::NormalizeL2FusionWithAdd>(normalizeL2FusionCallback);
    pass_config->set_callback<ngraph::pass::NormalizeL2FusionWithMax>(normalizeL2FusionCallback);
    // List of enabled/disabled transformations
    pass_config->disable<ngraph::pass::ConvertGELU>();
    pass_config->disable<ngraph::pass::ConvertShuffleChannels3>();
@ -307,10 +304,11 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
    pass_config->disable<ngraph::pass::WeightsDequantizeToFakeQuantize>();
    pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
    pass_config->disable<ngraph::pass::ConvertGather7ToGather1>();
    pass_config->disable<ngraph::pass::ConvertDeformableConv8To1>();
    pass_config->enable<ngraph::pass::NormalizeL2Decomposition>();
    pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
    pass_config->enable<ngraph::pass::ConvertGather1ToGather7>();
    pass_config->enable<ngraph::pass::ConvertGather8ToGather7>();
    if (useLpt) {
        pass_config->set_callback<ngraph::pass::ConvertQuantizeDequantize>([](const_node_ptr &node) -> bool {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp
@ -741,9 +741,10 @@ private:
 bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        const auto defConvNode = ngraph::as_type_ptr<const ngraph::op::v1::DeformableConvolution>(op);
+        if (!one_of(op->get_type_info(),
-        if (!defConvNode) {
+                ngraph::op::v1::DeformableConvolution::type_info,
-            errorMessage = "Node is not an instance of DeformableConvolution form the operation set v1.";
+                ngraph::op::v8::DeformableConvolution::type_info)) {
            errorMessage = "Node is not an instance of DeformableConvolution form the operation set v1 or v8.";
            return false;
        }
    } catch (...) {
@ -759,28 +760,35 @@ MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shar
    if (!isSupportedOperation(op, errorMessage)) {
        IE_THROW(NotImplemented) << errorMessage;
    }
-    auto defConvNode = ngraph::as_type_ptr<const ngraph::op::v1::DeformableConvolution>(op);
+    auto defConvNodeBase = std::dynamic_pointer_cast<ngraph::op::util::DeformableConvolutionBase>(op);
-    group = defConvNode->get_group();
+    group = defConvNodeBase->get_group();
-    deformable_group = defConvNode->get_deformable_group();
+    deformable_group = defConvNodeBase->get_deformable_group();
-
+    auto& strides = defConvNodeBase->get_strides();
    auto& strides = defConvNode->get_strides();
    for (int i = 0; i < strides.size(); i++) {
        stride.push_back(strides[i]);
    }
-    auto& dilations = defConvNode->get_dilations();
+    auto& dilations = defConvNodeBase->get_dilations();
    for (int i = 1; i <= dilations.size(); i++) {
        dilation.push_back(dilations[dilations.size() - i] - 1);
    }
-    paddingL = defConvNode->get_pads_begin();
+    paddingL = defConvNodeBase->get_pads_begin();
    if (op->get_type_info() == ngraph::op::v8::DeformableConvolution::type_info) {
        auto defConvNode = std::dynamic_pointer_cast<ngraph::op::v8::DeformableConvolution>(op);
        with_bilinear_pad = defConvNode->get_bilinear_interpolation_pad();
    } else {
        with_bilinear_pad = false;
    }
    enforceRef = (op->get_type_info() == ngraph::op::v8::DeformableConvolution::type_info);
 }
 void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() {
    std::string errorPrefix = "DeformableConvolution layer with name '" + getName() + "' ";
-    if (getParentEdges().size() != 3)
+    if (getParentEdges().size() != 3 && getParentEdges().size() != 4)
        IE_THROW() << errorPrefix << "has incorrect number of input edges";
    if (getChildEdges().empty())
        IE_THROW() << errorPrefix << "has incorrect number of output edges";
@ -806,22 +814,29 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
    if (!supportedPrimitiveDescriptors.empty())
        return;
    size_t inputsNumber = getOriginalInputsNumber();
    NodeConfig config;
    config.dynBatchSupport = false;
-    config.inConfs.resize(3);
+    config.inConfs.resize(inputsNumber);
    config.inConfs[0].constant = false;
    config.inConfs[0].inPlace = -1;
    config.inConfs[1].constant = false;
    config.inConfs[1].inPlace = -1;
-    config.inConfs[1].constant = false;
+    config.inConfs[2].constant = false;
-    config.inConfs[1].inPlace = -1;
+    config.inConfs[2].inPlace = -1;
    if (inputsNumber > 3) {
        config.inConfs[3].constant = false;
        config.inConfs[3].inPlace = -1;
    }
    config.outConfs.resize(1);
    config.outConfs[0].constant = false;
    config.outConfs[0].inPlace = -1;
    impl_desc_type impl_type;
-    if (mayiuse(cpu::x64::avx512_common)) {
+    if (enforceRef) {
        impl_type = impl_desc_type::ref;
    } else if (mayiuse(cpu::x64::avx512_common)) {
        impl_type = impl_desc_type::jit_avx512;
    } else if (mayiuse(cpu::x64::avx2)) {
        impl_type = impl_desc_type::jit_avx2;
@ -831,8 +846,8 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
        impl_type = impl_desc_type::ref;
    }
-    if (mayiuse(cpu::x64::sse41)) {
+    if (!enforceRef && mayiuse(cpu::x64::sse41)) {
-        // optimzed implementation
+        // optimized implementation
        auto dataFormat = memory::format_tag::nhwc;
        auto offFormat = memory::format_tag::nchw;
        auto weiFormat = group > 1 ? mayiuse(avx512_common) ? memory::format_tag::gOIhw16i16o : memory::format_tag::gOIhw8i8o
@ -842,8 +857,25 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
                                                                              memory::data_type::f32, dataFormat);
        config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(1)->getShape().getStaticDims(),
                                                                              memory::data_type::f32, offFormat);
        auto& wDims = getParentEdgeAt(2)->getShape().getStaticDims();
        if (group > 1 && wDims.size() != 5) {
            auto new_dims = InferenceEngine::SizeVector({group, div_up(wDims[0], group)});
            for (int i = 1; i < wDims.size(); i++) {
                new_dims.push_back(wDims[i]);
            }
            config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(),
                                                                                 memory::data_type::f32, weiFormat);
        } else {
            config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(),
                                                                                 memory::data_type::f32, weiFormat);
        }
        if (inputsNumber > 3) {
            config.inConfs[3].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(3)->getShape().getStaticDims(),
                                                                                 memory::data_type::f32, memory::format_tag::nchw);
        }
        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(),
                                                                              memory::data_type::f32, dataFormat);
        supportedPrimitiveDescriptors.push_back({config, impl_type});
@ -855,6 +887,10 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
                                                               memory::format_tag::nchw);
        config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::f32,
                                                               memory::format_tag::oihw);
        if (inputsNumber > 3) {
            config.inConfs[3].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(3)->getShape().getStaticDims(), memory::data_type::f32,
                                                                                 memory::format_tag::nchw);
        }
        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32,
                                                                memory::format_tag::nchw);
        supportedPrimitiveDescriptors.push_back({config, impl_type});
@ -874,6 +910,7 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
    jcp.dg = deformable_group;
    jcp.ngroups = group;
    jcp.mb = srcDims[0];
    jcp.oc = dstDims[1] / jcp.ngroups;
@ -884,9 +921,8 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
    jcp.oh = dstDims[2];
    jcp.ow = dstDims[3];
-    bool with_groups = group > 1;
+    jcp.kh = weiDims[2];
-    jcp.kh = weiDims[with_groups + 2];
+    jcp.kw = weiDims[3];
    jcp.kw = weiDims[with_groups + 3];
    jcp.t_pad = paddingL[0];
    jcp.l_pad = paddingL[1];
@ -898,6 +934,8 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
    jcp.dilate_w = dilation[1];
    jcp.with_bias = false;
    jcp.with_bi_pad = with_bilinear_pad;
    jcp.with_modulation = getParentEdges().size() > 3;
    const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8;
    jcp.ic_block = simd_w;
@ -910,13 +948,16 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
    jcp.typesize_in = sizeof(float);
    jcp.typesize_off = sizeof(float);
    jcp.typesize_out = sizeof(float);
    jcp.typesize_modulation = sizeof(float);
    jcp.ur_w = mayiuse(cpu::x64::avx512_common) ? 6 : 3;
    jcp.nb_oc_blocking = !mayiuse(cpu::x64::avx2) ? 2 : 4;
    jcp.nthr = dnnl_get_max_threads();
-    if (mayiuse(cpu::x64::avx512_common)) {
+    if (enforceRef) {
        return;
    } else if (mayiuse(cpu::x64::avx512_common)) {
        def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32<cpu::x64::avx512_common>(jcp));
    } else if (mayiuse(cpu::x64::avx2)) {
        def_conv_kernel.reset(new jit_uni_def_conv_kernel_f32<cpu::x64::avx2>(jcp));
@ -930,9 +971,9 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
 void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const float* offsets, const float* weights, float* dst,
                                                       const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides,
-                                                       const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides) {
+                                                       const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides,
                                                       const float* modulation, const std::vector<size_t>& modulation_strides) {
    const bool with_groups = jcp.ngroups > 1;
    const int G = jcp.ngroups;
    const int MB = jcp.mb;
    const int OH = jcp.oh;
@ -956,65 +997,79 @@ void MKLDNNDeformableConvolutionNode::executeReference(const float* src, const f
    const int DG = jcp.dg;
-    const int channel_per_deformable_group = IC * G / DG;
+    const int channel_per_deformable_group = (IC * G) / DG;
    const bool with_bi_pad = jcp.with_bi_pad;
    auto ker = [=](int g, int mb, int oc, int oh, int ow) {
        float d = 0;
        const int h_in = oh * KSH - padT;
        const int w_in = ow * KSW - padL;
        for (int ic = 0; ic < IC; ic++) {
-            const float *data_im_ptr = src + mb * src_strides[0] + (g * IC + ic) * src_strides[1] + h_in * src_strides[2] + w_in * src_strides[3];
+            const float *data_im_ptr = src + mb * src_strides[0] + (g * IC + ic) * src_strides[1];
-            const int deformable_group_index = ic / channel_per_deformable_group;
+            const int deformable_group_index = (IC * g + ic) / channel_per_deformable_group;
            const float *data_offset_ptr = offsets + mb * off_strides[0] + (deformable_group_index * 2 * KH * KW) * off_strides[1];
            const float *modulation_offset_ptr = nullptr;
            if (modulation != nullptr) {
                modulation_offset_ptr = modulation + mb * modulation_strides[0] + (deformable_group_index * KH * KW) * modulation_strides[1];
            }
            for (int kh = 0; kh < KH; kh++) {
                for (int kw = 0; kw < KW; kw++) {
                    const size_t data_offset_h_index = 2 * (kh * KW + kw) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3];
                    const size_t data_offset_w_index = (2 * (kh * KW + kw) + 1) * off_strides[1] + oh * off_strides[2] + ow * off_strides[3];
                    const float offset_h = data_offset_ptr[data_offset_h_index];
                    const float offset_w = data_offset_ptr[data_offset_w_index];
-                    float val = 0.0f;
+                    float map_h = h_in + kh * (KDH + 1) + offset_h;
-                    const float h_im = h_in + kh * (KDH + 1) + offset_h;
+                    float map_w = w_in + kw * (KDW + 1) + offset_w;
-                    const float w_im = w_in + kw * (KDW + 1) + offset_w;
+                    bool skip_compute;
-
+                    if (with_bilinear_pad) {
-                    if (h_im >= 0 && w_im >= 0 && h_im < IH && w_im < IW) {
+                        skip_compute = !(static_cast<int>(map_w) > -1 &&
-                        float map_h = kh * (KDH + 1) + offset_h;
+                                static_cast<int>(map_w) < IW &&
-                        float map_w = kw * (KDW + 1) + offset_w;
+                                static_cast<int>(map_h) > -1 &&
-                        const int cur_height = IH - h_in;
+                                static_cast<int>(map_h) < IH);
                        const int cur_width = IW - w_in;
                        int h_low = static_cast<int>(floorf(map_h));
                        int w_low = static_cast<int>(floorf(map_w));
                        int h_high;
                        int w_high;
                        if (h_low >= cur_height - 1) {
                            h_high = h_low = cur_height - 1;
                            map_h = static_cast<float>(h_low);
                    } else {
-                            h_high = h_low + 1;
+                        skip_compute = !(map_w >= 0 &&
-                        }
+                                map_w < IW &&
-
+                                map_h >= 0 &&
-                        if (w_low >= cur_width - 1) {
+                                map_h < IH);
                            w_high = w_low = cur_width - 1;
                            map_w = static_cast<float>(w_low);
                        } else {
                            w_high = w_low + 1;
                    }
                    if (!skip_compute) {
                        const int cur_h_end = IH;
                        const int cur_w_end = IW;
                        int h_low = with_bi_pad ? static_cast<int>(floorf(map_h)) :
                                std::max(static_cast<int>(floorf(map_h)), 0);
                        int w_low = with_bi_pad ? static_cast<int>(floorf(map_w)) :
                                std::max(static_cast<int>(floorf(map_w)), 0);
                        const int cur_h_start = h_low;
                        const int cur_w_start = w_low;
                        int h_high = with_bi_pad ? h_low + 1 : std::min(static_cast<int>(ceilf(map_h)), cur_h_end - 1);
                        int w_high = with_bi_pad ? w_low + 1 : std::min(static_cast<int>(ceilf(map_w)), cur_w_end - 1);
                        float lh = map_h - h_low;
                        float lw = map_w - w_low;
                        float hh = 1 - lh, hw = 1 - lw;
-                        float v1 = data_im_ptr[h_low * src_strides[2] + w_low * src_strides[3]];
+                        float v1 = (cur_w_start >= 0 && cur_h_start >= 0) ? data_im_ptr[h_low * src_strides[2] + w_low * src_strides[3]] : 0.0f;
-                        float v2 = data_im_ptr[h_low * src_strides[2] + w_high * src_strides[3]];
+                        float v2 = (w_high < cur_w_end && cur_h_start >= 0) ? data_im_ptr[h_low * src_strides[2] + w_high * src_strides[3]] : 0.0f;
-                        float v3 = data_im_ptr[h_high * src_strides[2] + w_low * src_strides[3]];
+                        float v3 = (cur_w_start >= 0 && h_high < cur_h_end) ? data_im_ptr[h_high * src_strides[2] + w_low * src_strides[3]] : 0.0f;
-                        float v4 = data_im_ptr[h_high * src_strides[2] + w_high * src_strides[3]];
+                        float v4 = (w_high < cur_w_end && h_high < cur_h_end) ? data_im_ptr[h_high * src_strides[2] + w_high * src_strides[3]] : 0.0f;
                        float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
-                        val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+                        float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
                        float modulation_scalar = 1.0f;
                        if (modulation_offset_ptr != nullptr) {
                            size_t modulation_index = (kh * KW + kw) * modulation_strides[1] + oh * modulation_strides[2] + ow * modulation_strides[3];
                            modulation_scalar = modulation_offset_ptr[modulation_index];
                        }
                        const float weight = with_groups ? weights[(g + oc / G) * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] +
                                                             kw * wei_strides[3]]
                                                         : weights[oc * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] + kw * wei_strides[3]];
                        d += val * weight * modulation_scalar;
                    }
                    d += val * (with_groups ? weights[g * wei_strides[0] + oc * wei_strides[1] + ic * wei_strides[2] + kh * wei_strides[3] +
                                                      kw * wei_strides[4]]
                                            : weights[oc * wei_strides[0] + ic * wei_strides[1] + kh * wei_strides[2] + kw * wei_strides[3]]);
                }
            }
        }
@ -1058,6 +1113,8 @@ void MKLDNNDeformableConvolutionNode::executeOptimized(const float* src, const f
 }
 void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
    const size_t inputsNumber = getOriginalInputsNumber();
    auto &srcMemory0 = getParentEdgeAt(0)->getMemory();
    auto &srcMemory1 = getParentEdgeAt(1)->getMemory();
    auto &srcMemory2 = getParentEdgeAt(2)->getMemory();
@ -1066,8 +1123,18 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
    const auto *src = reinterpret_cast<const float *>(srcMemory0.GetPtr());
    const auto *offsets = reinterpret_cast<const float *>(srcMemory1.GetPtr());
    const auto *weights = reinterpret_cast<const float *>(srcMemory2.GetPtr());
    float* modulation = nullptr;
    if (inputsNumber > 3) {
        modulation = reinterpret_cast<float *>(getParentEdgeAt(3)->getMemory().GetPtr());
    }
    float *dst = reinterpret_cast<float *>(dstMemory.GetPtr());
    auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
    if (!selectedPrimitiveDescriptor)
        IE_THROW() << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors.";
    auto config = selectedPrimitiveDescriptor->getConfig();
    auto src_block_desc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
    std::vector<size_t> src_strides(src_block_desc.getStrides().size());
    for (int i = 0; i < src_strides.size(); i++) {
@ -1080,13 +1147,19 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
        dst_strides[dst_block_desc.getOrder()[i]] = dst_block_desc.getStrides()[i];
    }
    auto off_strides =  getParentEdgeAt(1)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
    auto wei_strides =  getParentEdgeAt(2)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
    InferenceEngine::SizeVector modulation_strides;
    if (inputsNumber > 3) {
        modulation_strides = getParentEdgeAt(3)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
    }
    if (def_conv_kernel) {
        executeOptimized(src, offsets, weights, dst, src_strides, off_strides, dst_strides);
    } else {
-        executeReference(src, offsets, weights, dst, src_strides, off_strides, wei_strides, dst_strides);
+        executeReference(src, offsets, weights, dst, src_strides, off_strides, wei_strides, dst_strides, modulation, modulation_strides);
    }
 }
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h
@ -22,8 +22,6 @@ struct jit_def_conv_params {
    int kd, kh, kw;
    int stride_d, stride_h, stride_w;
    int dilate_d, dilate_h, dilate_w;
    bool with_bias;
    bool with_sum;
    int nthr;
    int nb_ic, ic_block;
    int nb_oc, oc_block;
@ -32,13 +30,19 @@ struct jit_def_conv_params {
    int ur_w_tail;
    int typesize_in;
    int typesize_off;
    int typesize_modulation;
    int typesize_bia;
    int typesize_out;
    bool with_bias;
    bool with_sum;
    bool with_modulation;
    bool with_bi_pad;
 };
 struct jit_def_conv_call_args {
    const void *src;
    const void *off;
    const void *modulation;
    const void *filt;
    const void *bias;
    const void *dst;
@ -75,11 +79,13 @@ public:
    bool canBeInPlace() const override {
        return false;
    }
    bool enforceRef = false;
    InferenceEngine::Precision getRuntimePrecision() const override;
 private:
    size_t group = 1;
    bool with_bilinear_pad = false;
    std::vector<ptrdiff_t> stride = {};
    std::vector<ptrdiff_t> dilation = {};
    std::vector<ptrdiff_t> paddingL = {};
@ -92,10 +98,10 @@ private:
    void executeReference(const float* src, const float* offsets, const float* weights, float* dst,
                          const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides,
-                          const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides);
+                          const std::vector<size_t>& wei_strides, const std::vector<size_t>& dst_strides,
                          const float* modulation = nullptr, const std::vector<size_t>& modulation_strides = {});
    void executeOptimized(const float* src, const float* offsets, const float* weights, float* dst,
-                          const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides,
+                          const std::vector<size_t>& src_strides, const std::vector<size_t>& off_strides, const std::vector<size_t>& dst_strides);
                          const std::vector<size_t>& dst_strides);
 };
 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
@ -272,6 +272,8 @@ void MKLDNNStridedSliceNode::createPrimitive() {
    auto srcOrder = srcBlockingDesc.getOrder();
    params.srcDims = srcBlockingDesc.getBlockDims();
    params.dstDims = dstBlockingDesc.getBlockDims();
    params.srcMemPtr = srcMemPtr;
    params.dstMemPtr = dstMemPtr;
    params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc->getPrecision().size();
    if (params.parametersAreConstant) {
@ -282,8 +284,6 @@ void MKLDNNStridedSliceNode::createPrimitive() {
        SizeVector newSrcDims, newDstDims;
        dimsNormalization(newSrcDims, newDstDims);
        dimsGluing(realNDims, newSrcDims, newDstDims);
        if (params.dstDims.size() == 1 || params.nDimsForWork != 1)
        indicesCalculation();
    }
 }
@ -510,14 +510,35 @@ void MKLDNNStridedSliceNode::dimsGluing(const size_t realNDims, const SizeVector
        if (params.dstDims.size() > 2)
            params.lastDstDim /= newDstDims[secondDim.first];
    }
    // some parameter calculations for common execution
    params.isOptimized = params.nDimsForWork == 1 && params.dstDims.size() > 1;
    if (params.isOptimized) {
        if (params.dstDims.size() == 2)
            params.dstDims[1] = 1;
        params.workAmount = params.dstDims[0] * params.dstDims[1];
        params.srcShift = (begin[0] * params.srcStrides[0] + begin[1] * params.srcStrides[1]) * params.dataSize;
    } else {
        params.srcShift = stride.back() == 1 && stride.size() > 1 ?
                          begin[params.nDimsForWork] * params.srcStrides[params.nDimsForWork] * params.dataSize : 0;
    }
 }
 void MKLDNNStridedSliceNode::indicesCalculation() {
    // indices calculation before execution for the best performance
    params.nThreads = parallel_get_max_threads();
    params.srcIndices.resize(params.workAmount, 0);
    params.dstIndices.resize(params.workAmount, 0);
    // should choose more optimal thread count
    const size_t nthr = parallel_get_max_threads();
    params.nThreads = nthr > params.workAmount ? params.workAmount : nthr;
    if (params.isOptimized) {
        indicesCalculationForOptimized();
        return;
    }
    auto getSrcIdx = [this](const SizeVector& indexes){
        size_t srcIdx = 0;
        for (int i = 0; i < params.nDimsForWork; ++i)
@ -542,11 +563,11 @@ void MKLDNNStridedSliceNode::indicesCalculation() {
                if (coords[k] < params.dstDims[k]) {
                    srcIdx += stride[k] * params.srcStrides[k] * params.dataSize;
                    break;
-                } else {
+                }
                coords[k] = 0;
                out = true;
            }
            }
            if (out)
                srcIdx = getSrcIdx(coords);
@ -554,6 +575,25 @@ void MKLDNNStridedSliceNode::indicesCalculation() {
    });
 }
 void MKLDNNStridedSliceNode::indicesCalculationForOptimized() {
    const size_t dstIdx0 = params.dstStrides[0] * params.dataSize;
    const size_t dstIdx1 = params.dstStrides[1] * params.dataSize;
    const size_t srcIdx0 = stride[0] * params.srcStrides[0] * params.dataSize;
    const size_t srcIdx1 = stride[1] * params.srcStrides[1] * params.dataSize;
    for (size_t i0 = 0; i0 < params.dstDims[0]; i0++) {
        const size_t idx = i0 * params.dstDims[1];
        params.dstIndices[idx] = i0 * dstIdx0;
        params.srcIndices[idx] = i0 * srcIdx0;
        for (size_t i1 = 1; i1 < params.dstDims[1]; i1++) {
            params.dstIndices[idx + i1] = params.dstIndices[idx] + i1 * dstIdx1;
            params.srcIndices[idx + i1] = params.srcIndices[idx] + i1 * srcIdx1;
        }
    }
 }
 void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) {
    if (!params.parametersAreConstant) {
        auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
@ -586,42 +626,15 @@ void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) {
        SizeVector newSrcDims, newDstDims;
        dimsNormalization(newSrcDims, newDstDims);
        dimsGluing(dstDims.size(), newSrcDims, newDstDims);
        if (params.dstDims.size() == 1 || params.nDimsForWork != 1)
        indicesCalculation();
    }
    if (params.dstDims.size() > 1 && params.nDimsForWork == 1)
        stridedSliceV();
    else
    stridedSlice();
 }
-void MKLDNNStridedSliceNode::stridedSliceV() {
+inline void MKLDNNStridedSliceNode::stridedSlice() {
-    const uint8_t* srcData = reinterpret_cast<const uint8_t*>(this->getParentEdgeAt(DATA_ID)->getMemoryPtr()->GetPtr()) +
+    const uint8_t* srcData = reinterpret_cast<const uint8_t*>(params.srcMemPtr->GetPtr()) + params.srcShift;
-                             (begin[0] * params.srcStrides[0] + begin[1] * params.srcStrides[1]) * params.dataSize;
+    uint8_t* dstData = reinterpret_cast<uint8_t*>(params.dstMemPtr->GetPtr());
    uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
    const size_t dstIdx = params.dstStrides[0] * params.dataSize;
    const size_t srcIdx = stride[0] * params.srcStrides[0] * params.dataSize;
    const size_t dstShift = params.dstStrides[1] * params.dataSize;
    const size_t srcShift = stride[1] * params.srcStrides[1] * params.dataSize;
    if (params.dstDims.size() > 2) {
        parallel_for2d(params.dstDims[0], params.dstDims[1], [&](const size_t i, const size_t j) {
            cpu_memcpy(&dstData[i * dstIdx + j * dstShift], &srcData[i * srcIdx + j * srcShift], params.lastDstDim);
        });
    } else {
        parallel_for(params.dstDims[0], [&](const size_t i) {
            cpu_memcpy(&dstData[i * dstIdx], &srcData[i * srcIdx], params.lastDstDim);
        });
    }
 }
 void MKLDNNStridedSliceNode::stridedSlice() {
    const uint8_t* srcData = reinterpret_cast<const uint8_t*>(this->getParentEdgeAt(DATA_ID)->getMemoryPtr()->GetPtr()) +
            (stride.back() == 1 && stride.size() > 1 ? begin[params.nDimsForWork] * params.srcStrides[params.nDimsForWork] * params.dataSize : 0);
    uint8_t* dstData = reinterpret_cast<uint8_t*>(this->getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
    parallel_nt(params.nThreads, [&](const int ithr, const int nthr) {
        size_t start = 0, end = 0;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.h
@ -27,14 +27,14 @@ public:
    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
 private:
-    void stridedSliceV();
+    inline void stridedSlice();
    void stridedSlice();
    void addHiddenDims(const size_t nSrcDims);
    void orderParametersByLayouts();
    void dimsNormalization(InferenceEngine::SizeVector& newSrcDims, InferenceEngine::SizeVector& newDstDims);
    void dimsGluing(const size_t realNDims, const InferenceEngine::SizeVector& newSrcDims, const InferenceEngine::SizeVector& newDstDims);
    void indicesCalculation();
    void indicesCalculationForOptimized();
    const size_t DATA_ID = 0;
    const size_t BEGIN_ID = 1;
@ -56,6 +56,8 @@ private:
    InferenceEngine::SizeVector strideDims;
    struct {
        MKLDNNMemoryPtr srcMemPtr = nullptr;
        MKLDNNMemoryPtr dstMemPtr = nullptr;
        InferenceEngine::SizeVector srcDims;
        InferenceEngine::SizeVector dstDims;
        InferenceEngine::SizeVector srcStrides;
@ -69,6 +71,8 @@ private:
        size_t workAmount = 0;
        size_t lastDstDim = 0;
        size_t dataSize = 0;
        size_t srcShift = 0;
        bool isOptimized = false;
        bool equalDims = false;
        bool parametersAreConstant = true;
    } params;
--- a/inference-engine/src/mkldnn_plugin/utils/rt_info/memory_formats_attribute.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/rt_info/memory_formats_attribute.cpp
@ -8,20 +8,20 @@
 #include "memory_formats_attribute.hpp"
-namespace ngraph {
+using namespace ngraph;
 using namespace ov;
-template class ngraph::MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>;
+template class ov::MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>;
 constexpr VariantTypeInfo VariantWrapper<MLKDNNInputMemoryFormats>::type_info;
-std::string getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) {
+std::string ngraph::getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) {
    return MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>::getMemoryFormats(node);
 }
-template class ngraph::MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>;
+template class ov::MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>;
 constexpr VariantTypeInfo VariantWrapper<MLKDNNOutputMemoryFormats>::type_info;
-std::string getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) {
+std::string ngraph::getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node> & node) {
    return MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>::getMemoryFormats(node);
 }
 }  // namespace ngraph
--- a/inference-engine/src/mkldnn_plugin/utils/rt_info/memory_formats_attribute.hpp
+++ b/inference-engine/src/mkldnn_plugin/utils/rt_info/memory_formats_attribute.hpp
@ -25,6 +25,25 @@ public:
    std::string getMemoryFormats() const { return memory_format; }
 };
 class MLKDNNInputMemoryFormats : public MLKDNNMemoryFormats {
 public:
    MLKDNNInputMemoryFormats() = default;
    explicit MLKDNNInputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
 };
 std::string getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node>& node);
 class MLKDNNOutputMemoryFormats : public MLKDNNMemoryFormats {
 public:
    MLKDNNOutputMemoryFormats() = default;
    explicit MLKDNNOutputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
 };
 std::string getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node>& node);
 }  // namespace ngraph
 namespace ov {
 template <typename MemoryFormatsType>
 class MLKDNNMemoryFormatsHelper : public VariantImpl<MemoryFormatsType> {
 public:
@ -35,7 +54,7 @@ public:
        using MemoryFormatsWrapper = VariantWrapper<MemoryFormatsType>;
        if (!rtInfo.count(MemoryFormatsWrapper::type_info.name)) return "";
        const auto &attr = rtInfo.at(MemoryFormatsWrapper::type_info.name);
-        MemoryFormatsType mem_format = as_type_ptr<MemoryFormatsWrapper>(attr)->get();
+        MemoryFormatsType mem_format = ngraph::as_type_ptr<MemoryFormatsWrapper>(attr)->get();
        return mem_format.getMemoryFormats();
    }
@ -48,7 +67,7 @@ public:
        }
        if (unique_mem_format.size() > 1) {
-            throw ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " no rule defined for multiple values.");
+            throw ngraph::ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " no rule defined for multiple values.");
        }
        std::string final_mem_format;
@ -59,46 +78,29 @@ public:
    }
    std::shared_ptr<ngraph::Variant> init(const std::shared_ptr<ngraph::Node> & node) override {
-        throw ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " has no default initialization.");
+        throw ngraph::ngraph_error(std::string(VariantWrapper<MemoryFormatsType>::type_info.name) + " has no default initialization.");
    }
 };
-
+extern template class MLKDNNMemoryFormatsHelper<ngraph::MLKDNNInputMemoryFormats>;
 class MLKDNNInputMemoryFormats : public MLKDNNMemoryFormats {
 public:
    MLKDNNInputMemoryFormats() = default;
    explicit MLKDNNInputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
 };
 extern template class MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>;
 template<>
-class VariantWrapper<MLKDNNInputMemoryFormats> : public MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats> {
+class VariantWrapper<ngraph::MLKDNNInputMemoryFormats> : public MLKDNNMemoryFormatsHelper<ngraph::MLKDNNInputMemoryFormats> {
 public:
-    static constexpr VariantTypeInfo type_info{MLKDNNInputMemoryFormatsAttr, 0};
+    static constexpr VariantTypeInfo type_info{ngraph::MLKDNNInputMemoryFormatsAttr, 0};
    const VariantTypeInfo &get_type_info() const override { return type_info; }
-    VariantWrapper(const MLKDNNInputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<MLKDNNInputMemoryFormats>(value) {}
+    VariantWrapper(const ngraph::MLKDNNInputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<ngraph::MLKDNNInputMemoryFormats>(value) {}
 };
-std::string getMLKDNNInputMemoryFormats(const std::shared_ptr<ngraph::Node>& node);
+extern template class MLKDNNMemoryFormatsHelper<ngraph::MLKDNNOutputMemoryFormats>;
 class MLKDNNOutputMemoryFormats : public MLKDNNMemoryFormats {
 public:
    MLKDNNOutputMemoryFormats() = default;
    explicit MLKDNNOutputMemoryFormats(const std::string &_memory_format) : MLKDNNMemoryFormats(_memory_format) {}
 };
 extern template class MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>;
 template<>
-class VariantWrapper<MLKDNNOutputMemoryFormats> : public MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats> {
+class VariantWrapper<ngraph::MLKDNNOutputMemoryFormats> : public MLKDNNMemoryFormatsHelper<ngraph::MLKDNNOutputMemoryFormats> {
 public:
-    static constexpr VariantTypeInfo type_info{MLKDNNOutputMemoryFormatsAttr, 0};
+    static constexpr VariantTypeInfo type_info{ngraph::MLKDNNOutputMemoryFormatsAttr, 0};
    const VariantTypeInfo &get_type_info() const override { return type_info; }
-    VariantWrapper(const MLKDNNOutputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<MLKDNNOutputMemoryFormats>(value) {}
+    VariantWrapper(const ngraph::MLKDNNOutputMemoryFormats &value) : MLKDNNMemoryFormatsHelper<ngraph::MLKDNNOutputMemoryFormats>(value) {}
 };
-std::string getMLKDNNOutputMemoryFormats(const std::shared_ptr<ngraph::Node>& node);
+}  // namespace ov
 }  // namespace ngraph
--- a/inference-engine/src/multi_device/CMakeLists.txt
+++ b/inference-engine/src/multi_device/CMakeLists.txt
@ -12,7 +12,7 @@ ie_add_plugin(NAME ${TARGET_NAME}
              SOURCES ${SOURCES} ${HEADERS}
              VERSION_DEFINES_FOR multi_device_plugin.cpp)
-target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
+target_link_libraries(${TARGET_NAME} PRIVATE inference_engine ngraph inference_engine_transformations)
 set_ie_threading_interface_for(${TARGET_NAME})
--- a/inference-engine/src/multi_device/multi_device_plugin.cpp
+++ b/inference-engine/src/multi_device/multi_device_plugin.cpp
@ -10,6 +10,10 @@
 #include <unordered_map>
 #include <unordered_set>
 #include <ngraph/opsets/opset1.hpp>
 #include <transformations/utils/utils.hpp>
 #include "ngraph_ops/convolution_ie.hpp"
 #include "ngraph_ops/deconvolution_ie.hpp"
 #include <ie_metric_helpers.hpp>
 #include <threading/ie_executor_manager.hpp>
@ -21,6 +25,30 @@
 namespace MultiDevicePlugin {
    using namespace InferenceEngine;
 namespace {
    std::string GetNetworkPrecision(const InferenceEngine::CNNNetwork &network) {
        auto nGraphFunc = network.getFunction();
        bool isINTModel = ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc);
        if (isINTModel) {
            return METRIC_VALUE(INT8);
        }
        for (auto & node : nGraphFunc->get_ordered_ops()) {
            if (std::dynamic_pointer_cast<ngraph::opset1::Convolution>(node) ||
                std::dynamic_pointer_cast<ngraph::opset1::GroupConvolution>(node) ||
                std::dynamic_pointer_cast<ngraph::opset1::GroupConvolutionBackpropData>(node) ||
                std::dynamic_pointer_cast<ngraph::opset1::ConvolutionBackpropData>(node) ||
                std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node) ||
                std::dynamic_pointer_cast<ngraph::op::DeconvolutionIE>(node)) {
                auto layerType = node->input(1).get_element_type().get_type_name();
                if (layerType == "f32")
                    return METRIC_VALUE(FP32);
                if (layerType == "f16")
                    return METRIC_VALUE(FP16);
            }
        }
        return METRIC_VALUE(FP32);
    }
    std::map<std::string, std::string> mergeConfigs(std::map<std::string, std::string> config,
                                                    const std::map<std::string, std::string> & local) {
        for (auto && kvp : local) {
@ -28,7 +56,10 @@ namespace {
        }
        return config;
    }
-    std::vector<std::string> supported_configKeys = {MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES};
+    std::vector<std::string> supported_configKeys = {
        MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES,
        CONFIG_KEY_INTERNAL(WORK_MODE)
    };
 }  // namespace
 std::map<std::string, std::string> MultiDeviceInferencePlugin::GetSupportedConfig(
@ -98,8 +129,8 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(cons
 InferenceEngine::Parameter MultiDeviceInferencePlugin::GetConfig(const std::string& name,
        const std::map<std::string, InferenceEngine::Parameter> & options) const {
-    if (name == MULTI_CONFIG_KEY(DEVICE_PRIORITIES)) {
+    if (supported_configKeys.end() != std::find(supported_configKeys.begin(), supported_configKeys.end(), name)) {
-        auto it = _config.find(MULTI_CONFIG_KEY(DEVICE_PRIORITIES));
+        auto it = _config.find(name);
        if (it == _config.end()) {
            IE_THROW() << "Value for KEY_MULTI_DEVICE_PRIORITIES is not set";
        } else {
@ -148,17 +179,23 @@ InferenceEngine::Parameter MultiDeviceInferencePlugin::GetMetric(const std::stri
 // Is called only when caching is enabled
 IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetwork(const std::string& modelPath,
                                                                        const std::map<std::string, std::string>& config) {
-    return LoadExeNetworkImpl(modelPath, {}, config);
+    return LoadNetworkImpl(modelPath, {}, config);
 }
 IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(const CNNNetwork &network,
                                                                               const std::map<std::string, std::string>& config) {
-    return LoadExeNetworkImpl({}, network, config);
+    if (network.getFunction() == nullptr) {
        IE_THROW() << "MULTI device supports just ngraph network representation";
    }
    auto networkPrecision = GetNetworkPrecision(network);
    return LoadNetworkImpl({}, network, config, networkPrecision);
 }
-IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(const std::string& modelPath,
+IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(const std::string& modelPath,
                                                                              CNNNetwork network,
-                                                                              const std::map<std::string, std::string>& config) {
+                                                                              const std::map<std::string, std::string>& config,
                                                                              const std::string &networkPrecision) {
    if (GetCore() == nullptr) {
        IE_THROW() << "Please, work with MULTI device via InferenceEngine::Core object";
    }
@ -168,16 +205,39 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadExeNetworkImpl(c
    }
    auto fullConfig = mergeConfigs(_config, config);
    auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
    if (priorities == fullConfig.end()) {
        IE_THROW() << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
    }
    auto metaDevices = ParseMetaDevices(priorities->second, fullConfig);
    // collect the settings that are applicable to the devices we are loading the network to
    std::unordered_map<std::string, InferenceEngine::Parameter> multiNetworkConfig;
    std::vector<DeviceInformation> metaDevices;
    auto workMode = fullConfig.find(CONFIG_KEY_INTERNAL(WORK_MODE));
    auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
    // not found device priorities for -d AUTO use case
    if (priorities == fullConfig.end()) {
        if (workMode != fullConfig.end()) {
            std::string allDevices;
            auto availableDevices = GetCore()->GetAvailableDevices();
            if (availableDevices.empty()) {
                IE_THROW(NotFound) << "No available device found";
            }
            for (auto&& device : availableDevices) {
                allDevices += device;
                allDevices += ((device == availableDevices[availableDevices.size()-1]) ? "" : ",");
            }
            metaDevices = ParseMetaDevices(allDevices, fullConfig);
            multiNetworkConfig.insert({MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, allDevices});
        } else {
            IE_THROW() << "KEY_MULTI_DEVICE_PRIORITIES key is not set for MULTI device";
        }
    } else {  // for use case -d MULTI:xPU or -d AUTO:xPU
        metaDevices = ParseMetaDevices(priorities->second, fullConfig);
        multiNetworkConfig.insert(*priorities);
    }
    // check if it is -d AUTO or -d AUTO:xPU use case
    if (workMode != fullConfig.end()) {
        auto targetDevice = SelectDevice(metaDevices, networkPrecision);
        // std::cout << "!!! DEBUG: select device is " << targetDevice.deviceName << std::endl;
        metaDevices = { targetDevice };
    }
    DeviceMap<SoExecutableNetworkInternal> executableNetworkPerDevice;
    std::mutex load_mutex;
@ -275,4 +335,125 @@ QueryNetworkResult MultiDeviceInferencePlugin::QueryNetwork(const CNNNetwork&
    return queryResult;
 }
 DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision) {
    if (metaDevices.empty()) {
        IE_THROW(NotFound) << "No available device to select in AUTO plugin";
    }
    if (metaDevices.size() == 1) {
        return metaDevices.at(0);
    }
    std::vector<DeviceInformation> CPU;
    std::vector<DeviceInformation> dGPU;
    std::vector<DeviceInformation> iGPU;
    std::vector<DeviceInformation> MYRIAD;
    std::vector<DeviceInformation> VPUX;
    for (auto& item : metaDevices) {
        if (item.deviceName.find("CPU") == 0) {
            CPU.push_back(item);
            continue;
        }
        if (item.deviceName.find("MYRIAD") == 0) {
            MYRIAD.push_back(item);
            continue;
        }
        if (item.deviceName.find("VPUX") == 0) {
            VPUX.push_back(item);
            continue;
        }
        if (item.deviceName.find("GPU") == 0) {
            auto gpuFullDeviceName = GetCore()->GetMetric(item.deviceName, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
            if (gpuFullDeviceName.find("iGPU") != std::string::npos) {
                iGPU.push_back(item);
            } else if (gpuFullDeviceName.find("dGPU") != std::string::npos) {
                dGPU.push_back(item);
            }
            continue;
        }
    }
    if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) {
        IE_THROW(NotFound) << "No available device found";
    }
    // Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU
    if (!dGPU.empty()) {
        for (auto&& item : dGPU) {
            std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
            if (supportNetwork != capability.end()) {
                return item;
            }
        }
    } else if (!VPUX.empty()) {
        for (auto&& item : VPUX) {
            std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
            if (supportNetwork != capability.end()) {
                return item;
            }
        }
    } else if (!iGPU.empty()) {
        for (auto&& item : iGPU) {
            std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
            if (supportNetwork != capability.end()) {
                return item;
            }
        }
    } else if (!MYRIAD.empty()) {
        for (auto&& item : MYRIAD) {
            std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
            if (supportNetwork != capability.end()) {
                return item;
            }
        }
    }
    // If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16.
    if (networkPrecision == "FP32") {
        if (!dGPU.empty()) {
            for (auto&& item : dGPU) {
                std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
                if (supportNetwork != capability.end()) {
                    return item;
                }
            }
        } else if (!VPUX.empty()) {
            for (auto&& item : VPUX) {
                std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
                if (supportNetwork != capability.end()) {
                    return item;
                }
            }
        } else if (!iGPU.empty()) {
            for (auto&& item : iGPU) {
                std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
                if (supportNetwork != capability.end()) {
                    return item;
                }
            }
        } else if (!MYRIAD.empty()) {
            for (auto&& item : MYRIAD) {
                std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
                if (supportNetwork != capability.end()) {
                    return item;
                }
            }
        }
    }
    if (CPU.empty()) {
        IE_THROW() << "Cannot select any device";
    }
    return CPU[0];
 }
 }  // namespace MultiDevicePlugin
--- a/inference-engine/src/multi_device/multi_device_plugin.hpp
+++ b/inference-engine/src/multi_device/multi_device_plugin.hpp
@ -41,9 +41,11 @@ protected:
                                                          const MultiDevicePlugin::DeviceName & deviceName) const;
 private:
-    InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const std::string& modelPath,
+    InferenceEngine::IExecutableNetworkInternal::Ptr LoadNetworkImpl(const std::string& modelPath,
                                                                       InferenceEngine::CNNNetwork network,
-                                                                       const std::map<std::string, std::string>& config);
+                                                                       const std::map<std::string, std::string>& config,
                                                                       const std::string &networkPrecision = METRIC_VALUE(FP32));
    DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
 };
 }  // namespace MultiDevicePlugin
--- a/inference-engine/src/offline_transformations/include/mask_attribute.hpp
+++ b/inference-engine/src/offline_transformations/include/mask_attribute.hpp
@ -192,24 +192,6 @@ private:
 std::ostream & operator<< (std::ostream & out, const Mask & mask);
 extern template class VariantImpl<Mask::Ptr>;
 template<>
 class VariantWrapper<Mask::Ptr> : public VariantImpl<Mask::Ptr> {
 public:
    static constexpr VariantTypeInfo type_info{"Variant::RuntimeAttribute::Mask", 0};
    const VariantTypeInfo &get_type_info() const override {
        return type_info;
    }
    static std::shared_ptr<VariantWrapper<Mask::Ptr>> create(const value_type & value) {
        return std::make_shared<VariantWrapper<Mask::Ptr>>(value);
    }
    explicit VariantWrapper(const value_type &value) : VariantImpl<value_type>(value) {}
 };
 Mask::Ptr getMask(const Output<const Node> & output);
 Mask::Ptr getMask(const Output<Node> & output);
@ -217,3 +199,25 @@ Mask::Ptr getMask(const Output<Node> & output);
 void setMask(Output<Node> output, const Mask::Ptr & mask);
 }  // namespace ngraph
 namespace ov {
 extern template class VariantImpl<ngraph::Mask::Ptr>;
 template<>
 class VariantWrapper<ngraph::Mask::Ptr> : public VariantImpl<ngraph::Mask::Ptr> {
 public:
    static constexpr VariantTypeInfo type_info{"Variant::RuntimeAttribute::Mask", 0};
    const VariantTypeInfo &get_type_info() const override {
        return type_info;
    }
    static std::shared_ptr<VariantWrapper<ngraph::Mask::Ptr>> create(const value_type & value) {
        return std::make_shared<VariantWrapper<ngraph::Mask::Ptr>>(value);
    }
    explicit VariantWrapper(const value_type &value) : VariantImpl<value_type>(value) {}
 };
 }  // namespace ov
--- a/inference-engine/src/offline_transformations/src/moc_transformations.cpp
+++ b/inference-engine/src/offline_transformations/src/moc_transformations.cpp
@ -35,6 +35,8 @@
 #include <transformations/common_optimizations/conv_mul_fusion.hpp>
 #include <transformations/common_optimizations/nop_elimination.hpp>
 #include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
 #include <transformations/common_optimizations/leaky_relu_fusion.hpp>
 #include <transformations/common_optimizations/normalize_l2_fusion.hpp>
 NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
@ -79,11 +81,13 @@ bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::F
    common_fusions->add_matcher<ngraph::pass::SwishFusion>();
    common_fusions->add_matcher<ngraph::pass::HSwishFusion>();
    common_fusions->add_matcher<ngraph::pass::HSigmoidFusion>();
    common_fusions->add_matcher<ngraph::pass::NormalizeL2Fusion>();
    common_fusions->add_matcher<ngraph::pass::ClampFusion>();
    common_fusions->add_matcher<ngraph::pass::PadFusion>();
    common_fusions->add_matcher<ngraph::pass::MVNFusion>();
    common_fusions->add_matcher<ngraph::pass::DilatedConvolutionConverter>();
    common_fusions->add_matcher<ngraph::pass::GeluFusion>();
    common_fusions->add_matcher<ngraph::pass::LeakyReluFusion>();
    common_fusions->set_name("ngraph::pass::CommonFusions");
    manager.register_pass<ngraph::pass::BinarizeWeights>();
--- a/inference-engine/src/offline_transformations/src/pruning/mask_attribute.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/mask_attribute.cpp
@ -12,10 +12,6 @@
 namespace ngraph {
 template class ngraph::VariantImpl<Mask::Ptr>;
 constexpr VariantTypeInfo VariantWrapper<Mask::Ptr>::type_info;
 Mask::Ptr getMask(const Output<const Node> & output) {
    auto &rtInfo = output.get_rt_info();
    using MaskWrapper = VariantWrapper<Mask::Ptr>;
@ -57,6 +53,12 @@ std::ostream & operator<< (std::ostream & out, const Mask & mask) {
    return out;
 }
 }  // namespace ngraph
 namespace ov {
 template class ngraph::VariantImpl<ngraph::Mask::Ptr>;
 constexpr VariantTypeInfo VariantWrapper<ngraph::Mask::Ptr>::type_info;
 }  // namespace ov
--- a/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
+++ b/inference-engine/src/plugin_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
@ -45,6 +45,15 @@ DECLARE_CONFIG_KEY(CPU_THREADS_PER_STREAM);
 */
 DECLARE_CONFIG_KEY(FORCE_DISABLE_CACHE);
 /**
 * @brief The name for setting work mode internal in MULTI device plugin option.
 *
 * This option should be used with value only:
 * PluginConfigInternalParams::MULTI_MODE_AUTO or PluginConfigInternalParams::MULTI_MODE_LEGACY
 */
 DECLARE_CONFIG_KEY(WORK_MODE);
 DECLARE_CONFIG_VALUE(MULTI_MODE_AUTO);
 }  // namespace PluginConfigInternalParams
 }  // namespace InferenceEngine
--- a/inference-engine/src/plugin_api/threading/ie_tbb_streams_executor.hpp
+++ b/inference-engine/src/plugin_api/threading/ie_tbb_streams_executor.hpp
@ -0,0 +1,33 @@
 // Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <memory>
 #include <string>
 #include "ie_api.h"
 #include "ie_parallel.hpp"
 #include "threading/ie_istreams_executor.hpp"
 namespace InferenceEngine {
 /**
 * @class TBBStreamsExecutor
 * @brief CPU Streams executor implementation. Use TBB thread pool to run tasks
 */
 class INFERENCE_ENGINE_API_CLASS(TBBStreamsExecutor) : public IStreamsExecutor {
 public:
    using Ptr = std::shared_ptr<TBBStreamsExecutor>;
    explicit TBBStreamsExecutor(const Config& config = {});
    ~TBBStreamsExecutor() override;
    void run(Task task) override;
    void Execute(Task task) override;
    int GetStreamId() override;
    int GetNumaNodeId() override;
 private:
    struct Impl;
    std::unique_ptr<Impl> _impl;
 };
 }  // namespace InferenceEngine
--- a/Show More
+++ b/Show More