diff --git a/.ci/azure/ci_utils/onnxruntime/version b/.ci/azure/ci_utils/onnxruntime/version
index 3abd49542da..27e0d15e9f8 100644
--- a/.ci/azure/ci_utils/onnxruntime/version
+++ b/.ci/azure/ci_utils/onnxruntime/version
@@ -1 +1 @@
-rel-1.7.1
+rel-1.8.1
diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml
index 52c8bbd3840..2b9dda46708 100644
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@@ -16,13 +16,12 @@ jobs:
   timeoutInMinutes: 90
 
   pool:
-    name: LIN_VMSS_VENV_F16S_WU2
+    name: LIN_VMSS_VENV_F16S_U20_WU2
 
   variables:
     system.debug: true
     VSTS_HTTP_RETRY: 5
     VSTS_HTTP_TIMEOUT: 200
-    WORKERS_NUMBER: 16
     BUILD_TYPE: Release
     REPO_DIR: $(Build.Repository.LocalPath)
     OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib
@@ -43,6 +42,7 @@ jobs:
       echo Python info ; which python ; python --version
       echo Java info ; which java ; java -version
       echo gcc info ; which gcc ; gcc --version
+      echo cmake info ; which cmake ; cmake --version
       lsb_release
       env
       cat /proc/cpuinfo
@@ -74,27 +74,26 @@ jobs:
     submodules: recursive
     path: openvino_contrib
 
-  - checkout: testdata
-    clean: true
-    lfs: true
-    path: testdata
-
   - script: |
-      sudo apt --assume-yes install libusb-1.0-0-dev
-      # For opencv-python: setuptools and upgrade
-      sudo apt-get install python3-setuptools patchelf
+      set -e
+      $(REPO_DIR)/install_build_dependencies.sh
+      # Move jdk into contrib
+      sudo apt --assume-yes install openjdk-11-jdk
+      # For opencv-python: python3-setuptools and pip upgrade
       python3 -m pip install --upgrade pip
       python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt
       python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
       # For running Python API tests
       python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
-      # For running nGraph unit tests dependent on Python frameworks
-      python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test.txt
+      # For running PaddlePaddle frontend unit tests
+      python3 -m pip install -r $(REPO_DIR)/ngraph/test/frontend/paddlepaddle/requirements_dev.txt
+      # For running ONNX frontend unit tests
+      python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test_onnx.txt
       # For MO unit tests
       python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements.txt
       python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements_dev.txt
       # Speed up build
-      wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
+      wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip
       unzip ninja-linux.zip
       sudo cp -v ninja /usr/local/bin/
       # Speed up tests
@@ -102,6 +101,11 @@ jobs:
     workingDirectory: $(WORK_DIR)
     displayName: 'Install dependencies'
 
+  - checkout: testdata
+    clean: true
+    lfs: true
+    path: testdata
+
   - task: CMake@1
     inputs:
       # CMake must get Python 3.x version by default
@@ -110,13 +114,14 @@ jobs:
         -DVERBOSE_BUILD=ON
         -DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
         -DENABLE_PYTHON=ON
-        -DPYTHON_EXECUTABLE=/usr/bin/python3.6
+        -DPYTHON_EXECUTABLE=/usr/bin/python3.8
         -DENABLE_WHEEL=ON
         -DENABLE_TESTS=ON
         -DNGRAPH_ONNX_IMPORT_ENABLE=ON
         -DNGRAPH_ONNX_FRONTEND_ENABLE=ON
         -DENABLE_FASTER_BUILD=ON
         -DENABLE_STRICT_DEPENDENCIES=OFF
+        -DENABLE_REQUIREMENTS_INSTALL=OFF
         -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules
         $(REPO_DIR)
       workingDirectory: $(BUILD_DIR)
@@ -139,8 +144,10 @@ jobs:
     displayName: 'List install files'
 
   - script: |
+      set -e
       mkdir $(INSTALL_DIR)/opencv/
-      cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake && cp -R $(REPO_DIR)/inference-engine/temp/opencv_4.5.2_ubuntu18/opencv/* $(INSTALL_DIR)/opencv/
+      cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -DCOMPONENT=tests -P cmake_install.cmake
+      cp -R $(REPO_DIR)/inference-engine/temp/opencv_4.5.2_ubuntu20/opencv/* $(INSTALL_DIR)/opencv/
     workingDirectory: $(BUILD_DIR)
     displayName: 'Install tests'
 
@@ -155,17 +162,31 @@ jobs:
     workingDirectory: $(BUILD_SAMPLES_DIR)
     displayName: 'Build c samples'
 
+  - script: rm -fr $(BUILD_DIR)
+    displayName: 'Clean build dir'
+    continueOnError: false
+
+    # Skip test_onnx/test_zoo_models and test_onnx/test_backend due to long execution time
+  - script: . $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_TEST_DIR)/pyngraph --junitxml=TEST-Pyngraph.xml --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_zoo_models.py --ignore=$(INSTALL_TEST_DIR)/pyngraph/tests/test_onnx/test_backend.py
+    displayName: 'nGraph Python Bindings Tests'
+    continueOnError: false
+
   - script: |
       export MO_ROOT=$(INSTALL_DIR)/deployment_tools/model_optimizer
-      . $(SETUPVARS) -pyver 3.6 && python3 -m pytest -s $(INSTALL_DIR)/deployment_tools/model_optimizer/unit_tests --junitxml=TEST-ModelOptimizer.xml
+      . $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_DIR)/deployment_tools/model_optimizer/unit_tests --junitxml=TEST-ModelOptimizer.xml
     displayName: 'Model Optimizer UT'
     continueOnError: false
 
   - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
+    workingDirectory: $(INSTALL_TEST_DIR)
     displayName: 'nGraph UT'
     continueOnError: false
 
-    # . $(SETUPVARS) && python3 $(WORK_DIR)/gtest-parallel/gtest_parallel.py $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --workers=$(WORKERS_NUMBER) --dump_json_test_results=InferenceEngineUnitTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
+  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/paddlepaddle_tests --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-PaddlePaddle.xml
+    displayName: 'PaddlePaddle Frontend UT'
+    continueOnError: false
+
+    # . $(SETUPVARS) && python3 $(WORK_DIR)/gtest-parallel/gtest_parallel.py $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --workers=16 --dump_json_test_results=InferenceEngineUnitTests.json --gtest_filter=*smoke* -- --gtest_print_time=1
   - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/InferenceEngineUnitTests --gtest_print_time=1 --gtest_output=xml:TEST-InferenceEngineUnitTests.xml
     displayName: 'IE UT old'
     continueOnError: false
@@ -213,10 +234,9 @@ jobs:
       export DATA_PATH=$(MODELS_PATH)
       export MODELS_PATH=$(MODELS_PATH)
       cd $(REPO_DIR)/inference-engine/ie_bridges/python/tests
-      . $(SETUPVARS) -pyver 3.6 && pytest pytest --junitxml=TEST-PythonAPI.xml
+      . $(SETUPVARS) -pyver 3.8 && python3 -m pytest --junitxml=TEST-PythonAPI.xml
     displayName: 'Python API Tests'
     continueOnError: false
-    enabled: false
 
   - task: PublishTestResults@2
     condition: always()
diff --git a/.ci/azure/linux_conditional_compilation.yml b/.ci/azure/linux_conditional_compilation.yml
index 6d2d33574b7..a4063d2c903 100644
--- a/.ci/azure/linux_conditional_compilation.yml
+++ b/.ci/azure/linux_conditional_compilation.yml
@@ -4,20 +4,18 @@ jobs:
   timeoutInMinutes: 90
 
   pool:
-    name: LIN_VMSS_VENV_F16S_WU2
+    name: LIN_VMSS_VENV_F16S_U20_WU2
 
   variables:
     system.debug: true
     VSTS_HTTP_RETRY: 5
     VSTS_HTTP_TIMEOUT: 200
-    WORKERS_NUMBER: 16
     BUILD_TYPE: Release
     REPO_DIR: $(Build.Repository.LocalPath)
     OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib
     MODELS_PATH: $(REPO_DIR)/../testdata
     WORK_DIR: $(Pipeline.Workspace)/_w
     BUILD_DIR: $(WORK_DIR)/build
-    BIN_DIR: $(REPO_DIR)/bin/intel64/$(BUILD_TYPE)
     INSTALL_DIR: $(WORK_DIR)/install_pkg
     SETUPVARS: $(INSTALL_DIR)/bin/setupvars.sh
 
@@ -30,6 +28,7 @@ jobs:
       echo Python info ; which python ; python --version
       echo Java info ; which java ; java -version
       echo gcc info ; which gcc ; gcc --version
+      echo cmake info ; which cmake ; cmake --version
       lsb_release
       env
       cat /proc/cpuinfo
@@ -53,10 +52,11 @@ jobs:
     path: openvino
 
   - script: |
-      sudo apt --assume-yes install libusb-1.0-0-dev
+      set -e
+      $(REPO_DIR)/install_build_dependencies.sh
       python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt
       # Speed up build
-      wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
+      wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip
       unzip ninja-linux.zip
       sudo cp -v ninja /usr/local/bin/
     workingDirectory: $(WORK_DIR)
@@ -76,12 +76,14 @@ jobs:
 
   - script: ninja
     workingDirectory: $(BUILD_DIR)
-    displayName: 'Build'
+    displayName: 'Build LinCC'
 
   - script: ls -alR $(REPO_DIR)/bin/
-    displayName: 'List files'
+    displayName: 'List bin files'
 
   - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
     workingDirectory: $(BUILD_DIR)
     displayName: 'Install'
 
+  - script: ls -alR $(INSTALL_DIR)
+    displayName: 'List install files'
diff --git a/.ci/azure/linux_ngraph_onnx.yml b/.ci/azure/linux_ngraph_onnx.yml
index 28326c89053..c6071fc127f 100644
--- a/.ci/azure/linux_ngraph_onnx.yml
+++ b/.ci/azure/linux_ngraph_onnx.yml
@@ -20,13 +20,12 @@ jobs:
   timeoutInMinutes: 90
 
   pool:
-    name: LIN_VMSS_VENV_ONNX_WU2
+    name: LIN_VMSS_VENV_ONNX_U20_WU2
 
   variables:
     system.debug: true
     VSTS_HTTP_RETRY: 5
     VSTS_HTTP_TIMEOUT: 200
-    WORKERS_NUMBER: 8
     REPO_DIR: $(Build.Repository.LocalPath)
     WORK_DIR: $(Pipeline.Workspace)/_w
     MODELS_DIR: /mount/cinfsshare/onnxtestdata
@@ -43,6 +42,7 @@ jobs:
       echo Python info ; which python ; python --version
       echo Java info ; which java ; java -version
       echo gcc info ; which gcc ; gcc --version
+      echo cmake info ; which cmake ; cmake --version
       lsb_release
       env
       cat /proc/cpuinfo
@@ -68,16 +68,23 @@ jobs:
     submodules: recursive
     path: openvino
 
-  - script: docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) .
+  - script: |
+      set -e
+      sudo apt --assume-yes install git-lfs uidmap
+      curl -fsSL https://get.docker.com -o get-docker.sh
+      sudo sh get-docker.sh
+    workingDirectory: $(WORK_DIR)
+    displayName: 'Install dependencies'
+
+  - script: sudo docker build --tag=openvino-onnx-ci-image --file=.ci/openvino-onnx/Dockerfile --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg PROTOBUF_LITE=$(PROTOBUF_LITE) .
     displayName: 'Docker build $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'
 
   - script: ngraph/python/tests/test_onnx/model_zoo_preprocess.sh -d $(TMP_DIR) -o -s "$(ONNX_MODEL_ZOO_SHA)"
     displayName: 'Get models'
     condition: ne(variables['BUILD_TYPE'], 'Debug')
 
-  - script: sudo fallocate -l 48G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h
+  - script: sudo fallocate -l 64G /swapfile ; sudo mkswap /swapfile ; sudo swapon /swapfile ; df ; free -h
     displayName: 'Create swap'
 
-  - script: |
-      docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)"
+  - script: sudo docker run --name openvino-onnx-ci-container --volume $(TMP_DIR)/model_zoo/onnx_model_zoo_$(ONNX_MODEL_ZOO_SHA):/root/.onnx/model_zoo/onnx_model_zoo --volume $(MODELS_DIR)/msft:/root/.onnx/model_zoo/MSFT openvino-onnx-ci-image /bin/bash -c "$(TOX_COMMAND)"
     displayName: 'Docker run $(BUILD_TYPE) protobuf-lite: $(PROTOBUF_LITE)'
diff --git a/.ci/azure/linux_onnxruntime.yml b/.ci/azure/linux_onnxruntime.yml
index a2bfee8c70a..0229c37c0b0 100644
--- a/.ci/azure/linux_onnxruntime.yml
+++ b/.ci/azure/linux_onnxruntime.yml
@@ -3,23 +3,23 @@ jobs:
   timeoutInMinutes: 90
 
   pool:
-    name: LIN_VMSS_VENV_ONNX_WU2
+    name: LIN_VMSS_VENV_ONNX_U20_WU2
 
   variables:
     system.debug: true
     VSTS_HTTP_RETRY: 5
     VSTS_HTTP_TIMEOUT: 200
-    WORKERS_NUMBER: 8
     BUILD_TYPE: Release
     REPO_DIR: $(Build.Repository.LocalPath)
     ONNXRUNTIME_REPO_DIR: $(REPO_DIR)/../onnxruntime
     WORK_DIR: $(Pipeline.Workspace)/_w
     MODELS_DIR: /mount/cinfsshare/onnxtestdata
     TMP_DIR: /mnt/tmp
-    INSTALL_DIR: $(WORK_DIR)/install_pkg
+    INSTALL_DIR: $(WORK_DIR)/install_pkg/openvino
     BUILD_DIR: $(WORK_DIR)/build
     ONNXRUNTIME_UTILS: $(REPO_DIR)/.ci/azure/ci_utils/onnxruntime
     ONNXRUNTIME_BUILD_DIR: $(ONNXRUNTIME_REPO_DIR)/build
+
   steps:
   - script: |
       curl -H Metadata:true --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2019-06-01"
@@ -29,6 +29,7 @@ jobs:
       echo Python info ; which python ; python --version
       echo Java info ; which java ; java -version
       echo gcc info ; which gcc ; gcc --version
+      echo cmake info ; which cmake ; cmake --version
       lsb_release
       env
       cat /proc/cpuinfo
@@ -60,15 +61,14 @@ jobs:
     displayName: 'Clone onnxruntime'
 
   - script: |
-      sudo apt --assume-yes install libusb-1.0-0-dev
-      # For opencv-python: setuptools and upgrade
-      sudo apt-get install python3-setuptools
+      set -e
+      $(REPO_DIR)/install_build_dependencies.sh
       python3 -m pip install --upgrade pip
       python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/requirements.txt
       # For running Python API tests
       python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
       # Speed up build
-      wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip
+      wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip
       unzip ninja-linux.zip
       sudo cp -v ninja /usr/local/bin/
       # Speed up tests
@@ -83,7 +83,7 @@ jobs:
         -GNinja
         -DCMAKE_BUILD_TYPE=$(BUILD_TYPE)
         -DENABLE_PYTHON=ON
-        -DPYTHON_EXECUTABLE=/usr/bin/python3.6
+        -DPYTHON_EXECUTABLE=/usr/bin/python3.8
         -DENABLE_VPU=OFF
         -DENABLE_GNA=OFF
         -DENABLE_OPENCV=OFF
@@ -102,10 +102,10 @@ jobs:
 
   - script: ninja
     workingDirectory: $(BUILD_DIR)
-    displayName: 'Build Lin'
+    displayName: 'Build Lin ONNX'
 
   - script: ls -alR $(REPO_DIR)/bin/
-    displayName: 'List files'
+    displayName: 'List bin files'
 
   - script: cmake -DCMAKE_INSTALL_PREFIX=$(INSTALL_DIR) -P cmake_install.cmake
     workingDirectory: $(BUILD_DIR)
@@ -113,10 +113,9 @@ jobs:
 
   - script: |
       source $(INSTALL_DIR)/bin/setupvars.sh
-      echo "2021.2" > $(INSTALL_DIR)/deployment_tools/inference_engine/version.txt
       CXXFLAGS="-Wno-error=deprecated-declarations" ./build.sh --config RelWithDebInfo --use_openvino CPU_FP32 --build_shared_lib --parallel --skip_tests --build_dir $(ONNXRUNTIME_BUILD_DIR)
     workingDirectory: $(ONNXRUNTIME_REPO_DIR)
-    displayName: 'Build ONNX Runtime'
+    displayName: 'Build Lin ONNX Runtime'
 
   - script: |
       source $(INSTALL_DIR)/bin/setupvars.sh
diff --git a/.ci/azure/mac.yml b/.ci/azure/mac.yml
index 90fc812bbaa..b07ff48f78c 100644
--- a/.ci/azure/mac.yml
+++ b/.ci/azure/mac.yml
@@ -22,7 +22,6 @@ jobs:
     system.debug: true
     VSTS_HTTP_RETRY: 5
     VSTS_HTTP_TIMEOUT: 200
-    WORKERS_NUMBER: 3
     BUILD_TYPE: Release
     REPO_DIR: $(Build.Repository.LocalPath)
     OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)/../openvino_contrib
@@ -76,6 +75,7 @@ jobs:
   - script: |
       brew install cython
       brew install automake
+      python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test_onnx.txt
       # Speed up build
       brew install ninja
       # Speed up tests
@@ -87,7 +87,7 @@ jobs:
       export PATH="/usr/local/opt/cython/bin:$PATH"
       export CC=gcc
       export CXX=g++
-      cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR)
+      cmake -GNinja -DVERBOSE_BUILD=ON -DENABLE_REQUIREMENTS_INSTALL=OFF -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR)
     workingDirectory: $(BUILD_DIR)
     displayName: 'CMake'
 
@@ -103,6 +103,7 @@ jobs:
     displayName: 'Install'
 
   - script: $(BIN_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU*:IE_CPU.onnx_model_sigmoid:IE_CPU/GRUSequenceOp.onnx_model_gru* --gtest_output=xml:TEST-NGraphUT.xml
+    workingDirectory: $(BIN_DIR)
     displayName: 'nGraph UT'
     continueOnError: false
 
diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml
index c368776c8f4..e5ec0486f9b 100644
--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@@ -22,7 +22,6 @@ jobs:
     system.debug: true
     VSTS_HTTP_RETRY: 5
     VSTS_HTTP_TIMEOUT: 200
-    WORKERS_NUMBER: 8
     BUILD_TYPE: Release
     REPO_DIR: $(Build.Repository.LocalPath)
     OPENVINO_CONTRIB_REPO_DIR: $(REPO_DIR)\..\openvino_contrib
@@ -42,6 +41,7 @@ jobs:
   - script: |
       powershell -command "Invoke-RestMethod -Headers @{\"Metadata\"=\"true\"} -Method GET -Uri http://169.254.169.254/metadata/instance/compute?api-version=2019-06-01 | format-custom"
       where python3
+      python3 --version
       where python
       python --version
       where java
@@ -83,7 +83,18 @@ jobs:
     path: testdata
 
   - script: |
-      certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-win.zip ninja-win.zip
+      python -m pip install --upgrade pip
+      rem For running Python API tests
+      python -m pip install -r $(REPO_DIR)\inference-engine\ie_bridges\python\src\requirements-dev.txt
+      rem For running PaddlePaddle frontend unit tests
+      python -m pip install -r $(REPO_DIR)\ngraph\test\frontend\paddlepaddle\requirements_dev.txt
+      rem For running ONNX frontend unit tests
+      python -m pip install -r $(REPO_DIR)\ngraph\test\requirements_test_onnx.txt
+      rem For MO unit tests
+      python -m pip install -r $(REPO_DIR)\model-optimizer\requirements.txt
+      python -m pip install -r $(REPO_DIR)\model-optimizer\requirements_dev.txt
+      rem Speed up build
+      certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
       powershell -command "Expand-Archive -Force ninja-win.zip"
       git clone https://github.com/google/gtest-parallel.git
     workingDirectory: $(WORK_DIR)
@@ -91,7 +102,7 @@ jobs:
 
   - script: |
       set PATH=$(WORK_DIR)\ninja-win;%PATH%
-      call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
+      call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
     workingDirectory: $(BUILD_DIR)
     displayName: 'CMake'
 
@@ -129,10 +140,19 @@ jobs:
     workingDirectory: $(BUILD_SAMPLES_DIR)
     displayName: 'Build c samples'
 
+  - script: rd /Q /S $(BUILD_DIR)
+    displayName: 'Clean build dir'
+    continueOnError: false
+
   - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
+    workingDirectory: $(INSTALL_TEST_DIR)
     displayName: 'nGraph UT'
     continueOnError: false
 
+  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\paddlepaddle_tests --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-PaddlePaddle.xml
+    displayName: 'PaddlePaddle Frontend UT'
+    continueOnError: false
+
   - script: |
       set PATH=$(IB_DIR);%PATH%
       call $(SETUPVARS) && "$(IB_TESTCONSOLE)" $(INSTALL_TEST_DIR)\InferenceEngineUnitTests.exe --gtest_output=xml:TEST-InferenceEngineUnitTests-IB.xml
diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml
index 7969cf13aa1..f452feb67d7 100644
--- a/.github/workflows/code_style.yml
+++ b/.github/workflows/code_style.yml
@@ -28,7 +28,7 @@ jobs:
           cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT ..
 
       - name: Check code style
-        run: cmake --build build --target clang_format_check_all
+        run: cmake --build build --target clang_format_check_all -j8
 
       - name: Create code style diff
         if: failure()
@@ -64,5 +64,29 @@ jobs:
           cmake ..
 
       - name: ShellCheck
-        run: make ie_shellcheck
+        run: cmake --build . --target ie_shellcheck -j8
+        working-directory: build
+
+  NamingConventionCheck:
+    runs-on: ubuntu-18.04
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: recursive
+
+      - name: Install Clang dependency
+        run: sudo apt --assume-yes install libusb-1.0-0-dev libclang-9-dev
+
+      - name: Install Python-based dependencies
+        run: |
+          python3 -m pip install pyyaml clang==9.0
+
+      - name: CMake
+        run: |
+          mkdir build
+          cd build
+          cmake ..
+
+      - name: Naming convention check
+        run: cmake --build . --target ncc_all -j8
         working-directory: build
diff --git a/.gitmodules b/.gitmodules
index d3f72b54c4f..0b76a4b239e 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -40,7 +40,7 @@
 	ignore = dirty
 [submodule "thirdparty/onnx"]
 	path = thirdparty/onnx/onnx
-	url = https://github.com/openvinotoolkit/onnx.git
+	url = https://github.com/onnx/onnx.git
 [submodule "thirdparty/protobuf"]
 	path = thirdparty/protobuf/protobuf
 	url = https://github.com/protocolbuffers/protobuf.git
@@ -50,3 +50,6 @@
 [submodule "thirdparty/ittapi/ittapi"]
 	path = thirdparty/ittapi/ittapi
 	url = https://github.com/intel/ittapi.git
+[submodule "ncc"]
+	path = cmake/developer_package/ncc_naming_style/ncc
+	url = https://github.com/nithinn/ncc.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cfbb8e5ea45..61a96ae9f4c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,6 +77,9 @@ endfunction()
 ie_cpack_add_component(ngraph REQUIRED)
 ie_cpack_add_component(ngraph_dev REQUIRED DEPENDS ngraph)
 
+# add target with processed tests model zoo
+include(cmake/test_model_zoo.cmake)
+
 add_subdirectory(thirdparty)
 add_subdirectory(openvino)
 add_subdirectory(ngraph)
diff --git a/CODEOWNERS b/CODEOWNERS
index d47170c4716..2894fac8ff3 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -30,13 +30,13 @@ azure-pipelines.yml  @openvinotoolkit/openvino-admins
 
 # IE GPU:
 /inference-engine/src/cldnn_engine/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
-/inference-engine/include/gpu/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
-/inference-engine/include/cldnn/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
+/inference-engine/src/inference_engine/include/gpu/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
+/inference-engine/src/inference_engine/include/cldnn/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
 /inference-engine/thirdparty/clDNN/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
 
 # IE VPU:
 /inference-engine/src/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
-/inference-engine/include/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
+/inference-engine/src/inference_engine/include/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
 /inference-engine/thirdparty/movidius/  @openvinotoolkit/openvino-ie-vpu-maintainers
 /inference-engine/tests_deprecated/unit/engines/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
 /inference-engine/tests_deprecated/functional/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
@@ -49,11 +49,11 @@ azure-pipelines.yml  @openvinotoolkit/openvino-admins
 
 # IE GNA:
 /inference-engine/src/gna_plugin/  @openvinotoolkit/openvino-ie-gna-maintainers
-/inference-engine/include/gna/  @openvinotoolkit/openvino-ie-gna-maintainers
+/inference-engine/src/inference_engine/include/gna/  @openvinotoolkit/openvino-ie-gna-maintainers
 
 # IE MULTI:
 /inference-engine/src/multi_device/  @openvinotoolkit/openvino-ie-multi-maintainers
-/inference-engine/include/multi-device/  @openvinotoolkit/openvino-ie-multi-maintainers
+/inference-engine/src/inference_engine/include/multi-device/  @openvinotoolkit/openvino-ie-multi-maintainers
 
 # IE Tests:
 /inference-engine/tests/  @openvinotoolkit/openvino-ie-tests-maintainers
@@ -77,4 +77,4 @@ azure-pipelines.yml  @openvinotoolkit/openvino-admins
 # Control 3d party dependencies
 *requirements*  @openvino-configuration-mgmt
 *setup.py  @openvino-configuration-mgmt
-/scripts/install_dependencies/  @openvino-configuration-mgmt
\ No newline at end of file
+/scripts/install_dependencies/  @openvino-configuration-mgmt
diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake
index e84a7cdc718..82f98b4c515 100644
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@@ -7,10 +7,6 @@ cmake_policy(SET CMP0054 NEW)
 # TODO: fix it
 set_temp_directory(TEMP "${IE_MAIN_SOURCE_DIR}")
 
-if(CMAKE_CROSSCOMPILING)
-    set(CMAKE_STAGING_PREFIX "${TEMP}")
-endif()
-
 if(ENABLE_SAME_BRANCH_FOR_MODELS)
     branchName(MODELS_BRANCH)
 else()
@@ -315,25 +311,25 @@ if(ENABLE_SPEECH_DEMO)
     if(DEFINED IE_PATH_TO_DEPS)
         if(WIN32 AND X86_64)
             RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS
-                    ARCHIVE_WIN "speech_demo_1.0.0.774_windows.zip"
+                    ARCHIVE_WIN "speech_demo_1.0.0.780_windows.zip"
                     VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*"
-                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.774"
-                    SHA256 "67b25170be5e89a4f0e90e8b39623b60c9a15b965c30329385e295fcd2edc856")
+                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.780"
+                    SHA256 "957bd274a1f6dc1d83a46879c7ef3b3b06f17d11af85cc45c18919051d145abd")
             debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS})
         elseif(LINUX AND X86_64)
             if(LINUX_OS_NAME STREQUAL "CentOS 7" OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
                 RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS
-                    ARCHIVE_LIN "speech_demo_1.0.0.774_centos.tgz"
+                    ARCHIVE_LIN "speech_demo_1.0.0.780_centos.tgz"
                     VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*"
-                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.774"
-                    SHA256 "5ec3b7be9ae05376aefae5bd5fd4a39b12c274e82817fd3218120b8e8fc8ff5a")
+                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.780"
+                    SHA256 "6d8d1111d0e662fe71d71cd3debad2995f6fb6fe5df3b92196dae06ff7abdf44")
                 debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS})
             else()
                 RESOLVE_DEPENDENCY(SPEECH_LIBS_AND_DEMOS
-                    ARCHIVE_LIN "speech_demo_1.0.0.774_linux.tgz"
+                    ARCHIVE_LIN "speech_demo_1.0.0.780_linux.tgz"
                     VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+.[0-9]+).*"
-                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.774"
-                    SHA256 "f0bbd0a6218b0365e7cfb1f860b34e4ace7e0d47dd60b369cdea8a480329810f")
+                    TARGET_PATH "${TEMP}/speech_demo_1.0.0.780"
+                    SHA256 "0ec6f1e47c00d781dc918af5d3055ab474ff47b9978dd6fe2add73e3339b0763")
                 debug_message(STATUS "speech_libs_and_demos=" ${SPEECH_LIBS_AND_DEMOS})
             endif()
         else()
diff --git a/cmake/developer_package/IEDevScriptsConfig.cmake b/cmake/developer_package/IEDevScriptsConfig.cmake
index 46423aa61c7..febe20339bb 100644
--- a/cmake/developer_package/IEDevScriptsConfig.cmake
+++ b/cmake/developer_package/IEDevScriptsConfig.cmake
@@ -132,7 +132,7 @@ set(IE_DEBUG_POSTFIX_WIN "d")
 set(IE_RELEASE_POSTFIX_WIN "")
 set(IE_DEBUG_POSTFIX_LIN "")
 set(IE_RELEASE_POSTFIX_LIN "")
-set(IE_DEBUG_POSTFIX_MAC "d")
+set(IE_DEBUG_POSTFIX_MAC "")
 set(IE_RELEASE_POSTFIX_MAC "")
 
 if(WIN32)
@@ -187,8 +187,8 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 # Enable CMAKE_<LANG>_COMPILER_ID AppleClang
 set(CMAKE_POLICY_DEFAULT_CMP0025 NEW)
 
-set(CMAKE_WARN_DEPRECATED OFF)
-set(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION ON)
+set(CMAKE_WARN_DEPRECATED OFF CACHE BOOL "Don't warn about obsolete cmake versions in 3rdparty")
+set(CMAKE_WARN_ON_ABSOLUTE_INSTALL_DESTINATION ON CACHE BOOL "Warn about absolute paths in destination")
 
 # LTO
 
@@ -251,20 +251,40 @@ endfunction()
 
 # check python package
 
-function(ie_check_pip_package name message_type)
+function(ie_check_pip_package full_name message_type)
     find_package(PythonInterp 3 REQUIRED)
 
+    get_filename_component(PYTHON_EXEC_DIR ${PYTHON_EXECUTABLE} DIRECTORY)
+
+    # extract version if any
+    if(full_name MATCHES "^([a-z_]+)[~=<>!]*(.*)$")
+        set(name ${CMAKE_MATCH_1})
+        set(req_version ${CMAKE_MATCH_2})
+    else()
+        set(name ${full_name})
+    endif()
+
     execute_process(
         COMMAND ${PYTHON_EXECUTABLE} -m pip show ${name}
+        WORKING_DIRECTORY ${PYTHON_EXEC_DIR}
         RESULT_VARIABLE PIP_EXIT_CODE
-        OUTPUT_QUIET
-    )
+        OUTPUT_VARIABLE output)
 
     if(NOT PIP_EXIT_CODE EQUAL 0)
         set(${name}_FOUND OFF PARENT_SCOPE)
-        message(${message_type} "${name} package is not installed. Please use \"${PYTHON_EXECUTABLE} -m pip install ${name}\".")
+        message(${message_type} "${name} package is not installed. Please use \"${PYTHON_EXECUTABLE} -m pip install ${full_name}\".")
     else()
-        set(${name}_FOUND ON PARENT_SCOPE)
+        if(req_version)
+            string(REGEX MATCH "Version: ([0-9]+\.?[0-9]*\.?[0-9]*)\n" installed_version "${output}")
+            if(installed_version)
+                set(installed_version "${CMAKE_MATCH_1}")
+            endif()
+
+            message(${message_type} "${name} package is installed, but may have different version (${installed_version}). "
+                "Please use \"${PYTHON_EXECUTABLE} -m pip install ${full_name}\".")
+        else()
+            set(${name}_FOUND ON PARENT_SCOPE)
+        endif()
     endif()
 endfunction()
 
@@ -272,6 +292,7 @@ endfunction()
 
 include(cpplint/cpplint)
 include(clang_format/clang_format)
+include(ncc_naming_style/ncc_naming_style)
 
 # Restore state
 set(CMAKE_MODULE_PATH ${OLD_CMAKE_MODULE_PATH})
diff --git a/cmake/developer_package/clang_format/clang_format.cmake b/cmake/developer_package/clang_format/clang_format.cmake
index 7a1487ea705..a94f1891466 100644
--- a/cmake/developer_package/clang_format/clang_format.cmake
+++ b/cmake/developer_package/clang_format/clang_format.cmake
@@ -2,17 +2,17 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-if (ENABLE_CLANG_FORMAT)
+if(ENABLE_CLANG_FORMAT)
     set(CLANG_FORMAT_FILENAME clang-format-9 clang-format)
-    find_program(CLANG_FORMAT NAMES ${CLANG_FORMAT_FILENAME} PATHS ENV PATH)
-    if (CLANG_FORMAT)
+    find_host_program(CLANG_FORMAT NAMES ${CLANG_FORMAT_FILENAME} PATHS ENV PATH)
+    if(CLANG_FORMAT)
         execute_process(COMMAND ${CLANG_FORMAT} ${CMAKE_CURRENT_SOURCE_DIR} ARGS --version OUTPUT_VARIABLE CLANG_VERSION)
-        if (NOT CLANG_VERSION OR CLANG_VERSION STREQUAL "")
+        if(NOT CLANG_VERSION OR CLANG_VERSION STREQUAL "")
             message(WARNING "Supported clang-format version is 9!")
             set(ENABLE_CLANG_FORMAT OFF)
         else()
             string(REGEX REPLACE "[^0-9]+([0-9]+)\\..*" "\\1" CLANG_FORMAT_MAJOR_VERSION ${CLANG_VERSION})
-            if (NOT ${CLANG_FORMAT_MAJOR_VERSION} EQUAL "9")
+            if(NOT ${CLANG_FORMAT_MAJOR_VERSION} EQUAL "9")
                 message(WARNING "Supported clang-format version is 9!")
                 set(ENABLE_CLANG_FORMAT OFF)
             endif()
diff --git a/cmake/developer_package/compile_flags/os_flags.cmake b/cmake/developer_package/compile_flags/os_flags.cmake
index 868c3455d5d..d2c51130a95 100644
--- a/cmake/developer_package/compile_flags/os_flags.cmake
+++ b/cmake/developer_package/compile_flags/os_flags.cmake
@@ -68,13 +68,13 @@ function(ie_sse42_optimization_flags flags)
         if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
             # No such option for MSVC 2019
         elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
-            set(${flags} /arch:SSE4.2 /QxSSE4.2 PARENT_SCOPE)
+            set(${flags} /QxSSE4.2 PARENT_SCOPE)
         else()
             message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
         endif()
     else()
         if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
-            set(${flags} -msse4.2 -xSSE4.2 PARENT_SCOPE)
+            set(${flags} -xSSE4.2 PARENT_SCOPE)
         else()
             set(${flags} -msse4.2 PARENT_SCOPE)
         endif()
@@ -95,7 +95,7 @@ function(ie_avx2_optimization_flags flags)
         endif()
     else()
         if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
-            set(${flags} -march=core-avx2 -xCORE-AVX2 -mtune=core-avx2 PARENT_SCOPE)
+            set(${flags} -xCORE-AVX2 PARENT_SCOPE)
         else()
             set(${flags} -mavx2 -mfma PARENT_SCOPE)
         endif()
@@ -152,6 +152,24 @@ function(ie_arm_neon_optimization_flags flags)
     endif()
 endfunction()
 
+#
+# Disables all warnings for 3rd party targets
+#
+function(ov_disable_all_warnings)
+    foreach(target IN LISTS ARGN)
+        if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+            target_compile_options(${target} PRIVATE /WX-)
+        elseif(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
+            target_compile_options(${target} PRIVATE -w)
+        elseif(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+            # 193: zero used for undefined preprocessing identifier "XXX"
+            # 1011: missing return statement at end of non-void function "XXX"
+            # 2415: variable "xxx" of static storage duration was declared but never referenced
+            target_compile_options(${target} PRIVATE -diag-disable=warn,193,1011,2415)
+        endif()
+    endforeach()
+endfunction()
+
 #
 # Enables Link Time Optimization compilation
 #
@@ -286,15 +304,13 @@ else()
     ie_add_compiler_flags(-Wreturn-type)
     ie_add_compiler_flags(-Wunused-variable)
 
-    # Disable noisy warnings
-
     if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
         ie_add_compiler_flags(-Wswitch)
     elseif(UNIX)
         ie_add_compiler_flags(-Wuninitialized -Winit-self)
         if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-            ie_add_compiler_flags(-Wno-error=switch
-                                  -Winconsistent-missing-override)
+            ie_add_compiler_flags(-Winconsistent-missing-override
+                                  -Wstring-plus-int)
         else()
             ie_add_compiler_flags(-Wmaybe-uninitialized)
             check_cxx_compiler_flag("-Wsuggest-override" SUGGEST_OVERRIDE_SUPPORTED)
@@ -304,10 +320,11 @@ else()
         endif()
     endif()
 
+    # Disable noisy warnings
+
     if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
-        ie_add_compiler_flags(-diag-disable=remark)
-        # noisy warnings from Intel Compiler 19.1.1.217 20200306
-        ie_add_compiler_flags(-diag-disable=2196)
+        # 177: function "XXX" was declared but never referenced
+        ie_add_compiler_flags(-diag-disable=remark,177,2196)
     endif()
 
     # Linker flags
@@ -315,7 +332,6 @@ else()
     if(APPLE)
         set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-dead_strip")
         set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-dead_strip")
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-dead_strip")
     elseif(LINUX)
         set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL")
         set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL")
diff --git a/cmake/developer_package/compile_flags/sanitizer.cmake b/cmake/developer_package/compile_flags/sanitizer.cmake
index 35343b129f3..298f4243799 100644
--- a/cmake/developer_package/compile_flags/sanitizer.cmake
+++ b/cmake/developer_package/compile_flags/sanitizer.cmake
@@ -18,6 +18,8 @@ if (ENABLE_UB_SANITIZER)
     # TODO: Remove -fno-sanitize=null as thirdparty/ocl/clhpp_headers UBSAN compatibility resolved:
     # https://github.com/KhronosGroup/OpenCL-CLHPP/issues/17
     set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize=undefined -fno-sanitize=null")
+    # TODO: Remove -Wno-maybe-uninitialized after CVS-61143 fix
+    set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -Wno-maybe-uninitialized")
     check_cxx_compiler_flag("-fsanitize-recover=undefined" SANITIZE_RECOVER_UNDEFINED_SUPPORTED)
     if (SANITIZE_RECOVER_UNDEFINED_SUPPORTED)
         set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=undefined")
@@ -33,17 +35,18 @@ endif()
 
 # common sanitizer options
 if (DEFINED SANITIZER_COMPILER_FLAGS)
-    # ensure sumbols are present
+    # ensure symbols are present
     set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -g -fno-omit-frame-pointer")
+    if(NOT OV_COMPILER_IS_CLANG)
+        # GPU plugin tests compilation is slow with -fvar-tracking-assignments on GCC.
+        # Clang has no var-tracking-assignments.
+        set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fno-var-tracking-assignments")
+    endif()
     # prevent unloading libraries at runtime, so sanitizer can resolve their symbols
     set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete")
 
-    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-        set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=gold")
-    elseif(OV_COMPILER_IS_CLANG AND NOT WIN32)
-        if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)
-            set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
-        endif()
+    if(OV_COMPILER_IS_CLANG AND NOT WIN32 AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)
+        set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld")
     endif()
 
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
diff --git a/cmake/developer_package/features.cmake b/cmake/developer_package/features.cmake
index 33e3530bac3..a0d4d17db32 100644
--- a/cmake/developer_package/features.cmake
+++ b/cmake/developer_package/features.cmake
@@ -44,16 +44,14 @@ ie_option (BUILD_SHARED_LIBS "Build as a shared library" ON)
 
 ie_dependent_option (ENABLE_FASTER_BUILD "Enable build features (PCH, UNITY) to speed up build time" OFF "CMAKE_VERSION VERSION_GREATER_EQUAL 3.16" OFF)
 
-if(NOT DEFINED ENABLE_CPPLINT)
-	ie_dependent_option (ENABLE_CPPLINT "Enable cpplint checks during the build" ON "UNIX;NOT ANDROID" OFF)
-endif()
+ie_dependent_option (ENABLE_CPPLINT "Enable cpplint checks during the build" ON "UNIX;NOT ANDROID" OFF)
 
-if(NOT DEFINED ENABLE_CPPLINT_REPORT)
-	ie_dependent_option (ENABLE_CPPLINT_REPORT "Build cpplint report instead of failing the build" OFF "ENABLE_CPPLINT" OFF)
-endif()
+ie_dependent_option (ENABLE_CPPLINT_REPORT "Build cpplint report instead of failing the build" OFF "ENABLE_CPPLINT" OFF)
 
 ie_dependent_option (ENABLE_CLANG_FORMAT "Enable clang-format checks during the build" ON "UNIX;NOT ANDROID" OFF)
 
+ie_dependent_option (ENABLE_NCC_STYLE "Enable ncc style check" ON "UNIX;NOT ANDROID" OFF)
+
 ie_option (VERBOSE_BUILD "shows extra information about build" OFF)
 
 ie_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF)
diff --git a/cmake/developer_package/ncc_naming_style/ncc b/cmake/developer_package/ncc_naming_style/ncc
new file mode 160000
index 00000000000..d7d83049708
--- /dev/null
+++ b/cmake/developer_package/ncc_naming_style/ncc
@@ -0,0 +1 @@
+Subproject commit d7d83049708eaa18ea6796adf0eeef85b28ebc1f
diff --git a/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake b/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake
new file mode 100644
index 00000000000..60b03e2f726
--- /dev/null
+++ b/cmake/developer_package/ncc_naming_style/ncc_naming_style.cmake
@@ -0,0 +1,137 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+if(NOT COMMAND ie_check_pip_package)
+    message(FATAL_ERROR "ncc_naming_style.cmake must be included after ie_check_pip_package")
+endif()
+
+set(ncc_style_dir "${IEDevScripts_DIR}/ncc_naming_style")
+set(ncc_style_bin_dir "${CMAKE_CURRENT_BINARY_DIR}/ncc_naming_style")
+
+# try to find_package(Clang QUIET)
+# ClangConfig.cmake contains bug that if libclang-XX-dev is not
+# installed, then find_package fails with errors even in QUIET mode
+configure_file("${ncc_style_dir}/try_find_clang.cmake"
+               "${ncc_style_bin_dir}/source/CMakeLists.txt" COPYONLY)
+execute_process(
+    COMMAND
+        "${CMAKE_COMMAND}" -S "${ncc_style_bin_dir}/source"
+                           -B "${ncc_style_bin_dir}/build"
+    RESULT_VARIABLE clang_find_result
+    OUTPUT_VARIABLE output
+    ERROR_VARIABLE output)
+
+if(NOT clang_find_result EQUAL "0")
+    message(WARNING "Please, install libclang-[N]-dev package (required for ncc naming style check)")
+    set(ENABLE_NCC_STYLE OFF)
+endif()
+
+# Since we were able to find_package(Clang) in a separate process
+# let's try to find in current process
+if(ENABLE_NCC_STYLE)
+    find_host_package(Clang QUIET)
+    if(Clang_FOUND AND TARGET libclang)
+        get_target_property(libclang_location libclang LOCATION)
+        set(ncc_wrapper_py "${ncc_style_bin_dir}/ncc_wrapper.py")
+        configure_file("${ncc_style_dir}/ncc_wrapper.py.in" ${ncc_wrapper_py} @ONLY)
+        message(STATUS "Found libclang: ${libclang_location}")
+    else()
+        message(WARNING "libclang is not found (required for ncc naming style check)")
+        set(ENABLE_NCC_STYLE OFF)
+    endif()
+endif()
+
+# find python3
+
+find_package(PythonInterp 3 QUIET)
+if(NOT PYTHONINTERP_FOUND)
+    message(WARNING "Python3 interpreter was not found (required for ncc naming style check)")
+    set(ENABLE_NCC_STYLE OFF)
+endif()
+
+# check python requirements_dev.txt
+
+set(req_file "${ncc_style_dir}/requirements_dev.txt")
+file(STRINGS ${req_file} req_lines)
+
+foreach(req IN LISTS req_lines)
+    ie_check_pip_package(${req} STATUS)
+endforeach()
+
+set(ncc_script_dir "${ncc_style_dir}/ncc/")
+set(ncc_script_py "${ncc_style_dir}/ncc/ncc.py")
+
+if(NOT EXISTS ${ncc_script_py})
+    message(WARNING "ncc.py is not downloaded via submodule")
+    set(ENABLE_NCC_STYLE OFF)
+endif()
+
+# create high-level target
+
+if(ENABLE_NCC_STYLE AND NOT TARGET ncc_all)
+    add_custom_target(ncc_all ALL)
+    set_target_properties(ncc_all PROPERTIES FOLDER ncc_naming_style)
+endif()
+
+#
+# ov_ncc_naming_style(FOR_TARGET target_name
+#                     INCLUDE_DIRECTORY dir
+#                     [ADDITIONAL_INCLUDE_DIRECTORIES dir1 dir2 ..])
+#
+# FOR_TARGET - name of the target
+# INCLUDE_DIRECTORY - directory to check headers from
+# ADDITIONAL_INCLUDE_DIRECTORIES - additional include directories used in checked headers
+#
+function(ov_ncc_naming_style)
+    if(NOT ENABLE_NCC_STYLE)
+        return()
+    endif()
+
+    cmake_parse_arguments(NCC_STYLE ""
+        "FOR_TARGET;INCLUDE_DIRECTORY" "ADDITIONAL_INCLUDE_DIRECTORIES" ${ARGN})
+
+    file(GLOB_RECURSE headers
+         RELATIVE "${NCC_STYLE_INCLUDE_DIRECTORY}"
+         "${NCC_STYLE_INCLUDE_DIRECTORY}/*.hpp")
+
+    set(new_pythonpath "${ncc_script_dir}:$ENV{PYTHOPATH}")
+    list(APPEND ADDITIONAL_INCLUDE_DIRECTORIES "${NCC_STYLE_INCLUDE_DIRECTORY}")
+
+    foreach(header IN LISTS headers)
+        set(output_file "${ncc_style_bin_dir}/${header}.ncc_style")
+        set(full_header_path "${NCC_STYLE_INCLUDE_DIRECTORY}/${header}")
+
+        add_custom_command(
+            OUTPUT
+                ${output_file}
+            COMMAND
+                "${CMAKE_COMMAND}" -E env PYTHONPATH=${new_pythonpath}
+                "${CMAKE_COMMAND}"
+                -D "PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}"
+                -D "NCC_PY_SCRIPT=${ncc_wrapper_py}"
+                -D "INPUT_FILE=${full_header_path}"
+                -D "OUTPUT_FILE=${output_file}"
+                -D "STYLE_FILE=${ncc_style_dir}/openvino.style"
+                -D "ADDITIONAL_INCLUDE_DIRECTORIES=${ADDITIONAL_INCLUDE_DIRECTORIES}"
+                -P "${ncc_style_dir}/ncc_run.cmake"
+            DEPENDS
+                "${full_header_path}"
+                "${ncc_style_dir}/openvino.style"
+                "${ncc_script_py}"
+                "${ncc_wrapper_py}"
+                "${ncc_style_dir}/ncc_run.cmake"
+            COMMENT
+                "[ncc naming style] ${header}"
+            VERBATIM)
+        list(APPEND output_files ${output_file})
+    endforeach()
+
+    set(ncc_target ${NCC_STYLE_FOR_TARGET}_ncc_check)
+    add_custom_target(${ncc_target}
+        DEPENDS ${output_files}
+        COMMENT "[ncc naming style] ${NCC_STYLE_FOR_TARGET}")
+
+    add_dependencies(${NCC_STYLE_FOR_TARGET} ${ncc_target})
+    add_dependencies(ncc_all ${ncc_target})
+endfunction()
diff --git a/cmake/developer_package/ncc_naming_style/ncc_run.cmake b/cmake/developer_package/ncc_naming_style/ncc_run.cmake
new file mode 100644
index 00000000000..9d161b9c373
--- /dev/null
+++ b/cmake/developer_package/ncc_naming_style/ncc_run.cmake
@@ -0,0 +1,31 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+foreach(var NCC_PY_SCRIPT PYTHON_EXECUTABLE OUTPUT_FILE
+    INPUT_FILE ADDITIONAL_INCLUDE_DIRECTORIES STYLE_FILE)
+    if(NOT DEFINED ${var})
+        message(FATAL_ERROR "${var} is not defined for ncc_run.cmake")
+    endif()
+endforeach()
+
+file(REMOVE "${OUTPUT_FILE}")
+
+execute_process(
+    COMMAND
+        "${PYTHON_EXECUTABLE}"
+        "${NCC_PY_SCRIPT}"
+        --path ${INPUT_FILE}
+        --style ${STYLE_FILE}
+        --include ${ADDITIONAL_INCLUDE_DIRECTORIES}
+    RESULT_VARIABLE result
+    OUTPUT_VARIABLE output
+    ERROR_VARIABLE output)
+
+file(WRITE "${OUTPUT_FILE}" "${output}")
+
+if(NOT result EQUAL "0")
+    # Display the output to console (to parse it form IDE)
+    message("${output}")
+    message(FATAL_ERROR  "[ncc naming style] Naming style check failed for ${INPUT_FILE}")
+endif()
diff --git a/cmake/developer_package/ncc_naming_style/ncc_wrapper.py.in b/cmake/developer_package/ncc_naming_style/ncc_wrapper.py.in
new file mode 100644
index 00000000000..ed70e960b50
--- /dev/null
+++ b/cmake/developer_package/ncc_naming_style/ncc_wrapper.py.in
@@ -0,0 +1,52 @@
+#!/usr/bin/python3
+
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+import os
+import sys
+
+from clang.cindex import Config
+from ncc import Options, RulesDb, do_validate, Validator
+
+if __name__ == "__main__":
+    # set path to speicific clang library location
+    Config.set_library_file('@libclang_location@')
+
+    logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s',
+                        filename='log.txt', filemode='w')
+
+    """ Parse all command line arguments and validate """
+    op = Options()
+    op.parse_cmd_line()
+
+    if op.args.path is None:
+        sys.exit(1)
+
+    """ Creating the rules database """
+    rules_db = RulesDb(op._style_file)
+
+    """ Check the source code against the configured rules """
+    errors = 0
+    for path in op.args.path:
+        if os.path.isfile(path):
+            if do_validate(op, path):
+                v = Validator(rules_db, path, op)
+                errors += v.validate()
+        elif os.path.isdir(path):
+            for (root, subdirs, files) in os.walk(path):
+                for filename in files:
+                    path = root + '/' + filename
+                    if do_validate(op, path):
+                        v = Validator(rules_db, path, op)
+                        errors += v.validate()
+
+                if not op.args.recurse:
+                    break
+        else:
+            sys.stderr.write("File '{}' not found!\n".format(path))
+
+    if errors:
+        print("Total number of errors = {}".format(errors))
+        sys.exit(1)
diff --git a/cmake/developer_package/ncc_naming_style/openvino.style b/cmake/developer_package/ncc_naming_style/openvino.style
new file mode 100644
index 00000000000..c44fc5c5e4a
--- /dev/null
+++ b/cmake/developer_package/ncc_naming_style/openvino.style
@@ -0,0 +1,129 @@
+# custom OpenVINO values
+CppMethod: '^(operator\W+|[a-z_\d]+|signaling_NaN|quiet_NaN)$'
+# TODO: remove stopwatch|unsupported_op
+ClassName: '^([A-Z][\w]+|b?float16|numeric_limits|ngraph_error|stopwatch|unsupported_op)$'
+# TODO: remove oi_pair
+StructName: '^([A-Z][\w]+|element_type_traits|hash|oi_pair)$'
+FunctionName: '^(operator\W+|[a-z_\d]+)$'
+Namespace: '^[a-z\d_]+$'
+NamespaceAlias: '^[a-z\d_]+$'
+UnionName: '[A-Z][\w]+$'
+TemplateTemplateParameter: '[A-Z][\w]+'
+NamespaceReference: '^[a-z\d_]+$'
+TemplateNonTypeParameter: '^\w*$'
+ClassTemplate: '^([A-Z][\w]+|element_type_traits)$'
+TemplateTypeParameter: '^\w*$'
+ParameterName: '^\w*$'
+FunctionTemplate: '^(operator.+|\w+)$'
+TypeAliasName: '^\w+$'
+VariableReference: '^\w+$'
+
+# TODO: align
+EnumConstantName: '^.*$'
+EnumName: '^.*$'
+UsingDeclaration: '^.*$'
+TypedefName: '^.*$'
+
+# not needed values
+ClassTemplatePartialSpecialization: 'XXXX'
+ConversionFunction: '^.*$'
+UsingDirective: 'XXXX'
+ClassAccessSpecifier: '^.*$' # looks like can be fixed
+TypeReference: '^.*$' # looks like can be fixed
+CxxBaseSpecifier: '^.*$' # looks like can be fixed
+TemplateReference: '^.*$'
+MemberReference: '^.*$'
+LabelReference: 'XXXX'
+OverloadedDeclarationReference: '^.*$'
+InvalidFile: 'XXXX'
+NoDeclarationFound: 'XXXX'
+NotImplemented: 'XXXX'
+InvalidCode: 'XXXX'
+UnexposedExpression: '^.*$'
+DeclarationReferenceExpression: '^.*$'
+MemberReferenceExpression: '^.*$'
+CallExpression: '^.*$'
+BlockExpression: 'XXXX'
+IntegerLiteral: '^.*$'
+FloatingLiteral: '^.*$'
+ImaginaryLiteral: 'XXXX'
+StringLiteral: '^.*$'
+CharacterLiteral: '^.*$'
+ParenExpression: '^.*$'
+UnaryOperator: '^.*$'
+ArraySubscriptExpression: '^.*$'
+BinaryOperator: '^.*$'
+CompoundAssignmentOperator: '^.*$'
+ConditionalOperator: '^.*$'
+CstyleCastExpression: '^.*$'
+CompoundLiteralExpression: 'XXXX'
+InitListExpression: '^.*$'
+AddrLabelExpression: 'XXXX'
+StatementExpression: 'XXXX'
+GenericSelectionExpression: 'XXXX'
+GnuNullExpression: 'XXXX'
+CxxStaticCastExpression: '^.*$'
+CxxDynamicCastExpression: 'XXXX'
+CxxReinterpretCastExpression: '^.*$'
+CxxConstCastExpression: 'XXXX'
+CxxFunctionalCastExpression: '^.*$'
+CxxTypeidExpression: 'XXXX'
+CxxBoolLiteralExpression: '^.*$'
+CxxNullPointerLiteralExpression: '^.*$'
+CxxThisExpression: '^.*$'
+CxxThrowExpression: '^.*$'
+CxxNewExpression: '^.*$'
+CxxDeleteExpression: 'XXXX'
+CxxUnaryExpression: '^.*$'
+PackExpansionExpression: '^.*$'
+SizeOfPackExpression: '^.*$'
+LambdaExpression: '^.*$'
+ObjectBoolLiteralExpression: 'XXXX'
+ObjectSelfExpression: 'XXXX'
+UnexposedStatement: 'XXXX'
+LabelStatement: 'XXXX'
+CompoundStatement: '^.*$'
+CaseStatement: '^.*$'
+DefaultStatement: '^.*$'
+IfStatement: '^.*$'
+SwitchStatement: '^.*$'
+WhileStatement: '^.*$'
+DoStatement: '^.*$'
+ForStatement: '^.*$'
+GotoStatement: 'XXXX'
+IndirectGotoStatement: 'XXXX'
+ContinueStatement: '^.*$'
+BreakStatement: '^.*$'
+ReturnStatement: '^.*$'
+AsmStatement: 'XXXX'
+CxxCatchStatement: 'XXXX'
+CxxTryStatement: 'XXXX'
+CxxForRangeStatement: '^.*$'
+MsAsmStatement: 'XXXX'
+NullStatement: 'XXXX'
+DeclarationStatement: '^.*$'
+TranslationUnit: 'XXXX'
+UnexposedAttribute: '^.*$'
+CxxFinalAttribute: 'XXXX'
+CxxOverrideAttribute: '^.*$'
+AnnotateAttribute: 'XXXX'
+AsmLabelAttribute: 'XXXX'
+PackedAttribute: 'XXXX'
+PureAttribute: 'XXXX'
+ConstAttribute: 'XXXX'
+NoduplicateAttribute: 'XXXX'
+PreprocessingDirective: 'XXXX'
+MacroDefinition: 'XXXX'
+MacroInstantiation: 'XXXX'
+InclusionDirective: 'XXXX'
+VariableName:
+    ScopePrefix:
+        Global: ''
+        Static: ''
+        ClassMember: ''
+    DataTypePrefix:
+        String: ''
+        Integer: ''
+        Bool: ''
+        Pointer: ''
+    Pattern: '^.*$'
diff --git a/cmake/developer_package/ncc_naming_style/requirements_dev.txt b/cmake/developer_package/ncc_naming_style/requirements_dev.txt
new file mode 100644
index 00000000000..b06650ce6ac
--- /dev/null
+++ b/cmake/developer_package/ncc_naming_style/requirements_dev.txt
@@ -0,0 +1,2 @@
+clang==9.0
+pyyaml
\ No newline at end of file
diff --git a/cmake/developer_package/ncc_naming_style/try_find_clang.cmake b/cmake/developer_package/ncc_naming_style/try_find_clang.cmake
new file mode 100644
index 00000000000..70f2bfd0545
--- /dev/null
+++ b/cmake/developer_package/ncc_naming_style/try_find_clang.cmake
@@ -0,0 +1,8 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+cmake_minimum_required(VERSION 3.13)
+project(try_find_clang)
+
+find_package(Clang QUIET)
diff --git a/cmake/developer_package/version.cmake b/cmake/developer_package/version.cmake
index 054bc10c78e..cfd3e22e71a 100644
--- a/cmake/developer_package/version.cmake
+++ b/cmake/developer_package/version.cmake
@@ -42,7 +42,7 @@ macro(ie_parse_ci_build_number)
             return()
         endif()
 
-        set(ie_version_hpp "${OpenVINO_SOURCE_DIR}/inference-engine/include/ie_version.hpp")
+        set(ie_version_hpp "${OpenVINO_SOURCE_DIR}/inference-engine/src/inference_engine/include/ie/ie_version.hpp")
         if(NOT EXISTS ${ie_version_hpp})
             message(FATAL_ERROR "File ie_version.hpp with IE_VERSION definitions is not found")
         endif()
diff --git a/cmake/features.cmake b/cmake/features.cmake
index b7e23ee9226..26bf48f3824 100644
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@@ -98,7 +98,7 @@ ie_dependent_option (ENABLE_FUNCTIONAL_TESTS "functional tests" ON "ENABLE_TESTS
 
 ie_dependent_option (ENABLE_SAMPLES "console samples are part of inference engine package" ON "NOT MINGW" OFF)
 
-ie_dependent_option (ENABLE_SPEECH_DEMO "enable speech demo integration" ON "NOT APPLE;NOT ANDROID;X86 OR X86_64" OFF)
+ie_dependent_option (ENABLE_SPEECH_DEMO "enable speech demo integration" ON "NOT APPLE;NOT ANDROID;X86_64" OFF)
 
 ie_option (ENABLE_OPENCV "enables OpenCV" ON)
 
@@ -125,14 +125,15 @@ endif()
 ie_dependent_option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" ON "protoc_available" OFF)
 ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" OFF "NGRAPH_ONNX_IMPORT_ENABLE" OFF)
 ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF)
-ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" OFF
-    "NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF)
+ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" ON
+    "NGRAPH_ONNX_IMPORT_ENABLE" OFF)
 ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF
     "NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF)
 ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF)
 ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON
     "NGRAPH_UNIT_TEST_ENABLE" OFF)
 option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" OFF)
+option(ENABLE_REQUIREMENTS_INSTALL "Dynamic dependencies install" ON)
 
 # WA for ngraph python build on Windows debug
 list(REMOVE_ITEM IE_OPTIONS NGRAPH_UNIT_TEST_ENABLE NGRAPH_UNIT_TEST_BACKENDS_ENABLE)
diff --git a/cmake/test_model_zoo.cmake b/cmake/test_model_zoo.cmake
new file mode 100644
index 00000000000..c3f158626cd
--- /dev/null
+++ b/cmake/test_model_zoo.cmake
@@ -0,0 +1,131 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+function(ov_model_convert SRC DST OUT)
+    set(onnx_gen_script ${OpenVINO_SOURCE_DIR}/ngraph/test/models/onnx/onnx_prototxt_converter.py)
+
+    file(GLOB_RECURSE prototxt_models RELATIVE "${SRC}" "${SRC}/*.prototxt")
+    file(GLOB_RECURSE xml_models RELATIVE "${SRC}" "${SRC}/*.xml")
+    file(GLOB_RECURSE bin_models RELATIVE "${SRC}" "${SRC}/*.bin")
+    file(GLOB_RECURSE onnx_models RELATIVE "${SRC}" "${SRC}/*.onnx")
+    file(GLOB_RECURSE data_models RELATIVE "${SRC}" "${SRC}/*.data")
+
+    foreach(in_file IN LISTS prototxt_models xml_models bin_models onnx_models data_models)
+        get_filename_component(ext "${in_file}" EXT)
+        get_filename_component(rel_dir "${in_file}" DIRECTORY)
+        get_filename_component(name_we "${in_file}" NAME_WE)
+        set(model_source_dir "${SRC}/${rel_dir}")
+
+        if(NOT NGRAPH_ONNX_IMPORT_ENABLE AND ext MATCHES "^\\.(onnx|prototxt)$")
+            # don't copy / process ONNX / prototxt files
+            continue()
+        endif()
+
+        if(ext STREQUAL ".prototxt")
+            # convert model
+            set(rel_out_name "${name_we}.onnx")
+            if(rel_dir)
+                set(rel_out_name "${rel_dir}/${rel_out_name}")
+            endif()
+        else()
+            # copy as is
+            set(rel_out_name "${in_file}")
+        endif()
+
+        set(full_out_name "${DST}/${rel_out_name}")
+        file(MAKE_DIRECTORY "${DST}/${rel_dir}")
+
+        if(ext STREQUAL ".prototxt")
+            # convert .prototxt models to .onnx binary
+            add_custom_command(OUTPUT ${full_out_name}
+                COMMAND ${PYTHON_EXECUTABLE} ${onnx_gen_script}
+                    "${SRC}/${in_file}" ${full_out_name}
+                DEPENDS ${onnx_gen_script} "${SRC}/${in_file}"
+                COMMENT "Generate ${rel_out_name}"
+                WORKING_DIRECTORY "${model_source_dir}")
+        else()
+            add_custom_command(OUTPUT ${full_out_name}
+                COMMAND "${CMAKE_COMMAND}" -E copy_if_different
+                    "${SRC}/${in_file}" ${full_out_name}
+                DEPENDS ${onnx_gen_script} "${SRC}/${in_file}"
+                COMMENT "Copy ${rel_out_name}"
+                WORKING_DIRECTORY "${model_source_dir}")
+        endif()
+        list(APPEND files "${full_out_name}")
+    endforeach()
+
+    set(${OUT} ${files} PARENT_SCOPE)
+endfunction()
+
+ov_model_convert("${CMAKE_CURRENT_SOURCE_DIR}/ngraph/test"
+                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ngraph"
+                  onnx_out_files)
+
+set(rel_path "inference-engine/tests/functional/inference_engine/onnx_reader")
+ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
+                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_reader"
+                 ie_onnx_out_files)
+
+set(rel_path "inference-engine/tests/functional/inference_engine/ir_serialization")
+ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
+                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ir_serialization"
+                 ie_serialize_out_files)
+
+set(rel_path "inference-engine/tests/unit/frontends/onnx_import/models")
+ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
+                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_import"
+                 ie_onnx_import_out_files)
+
+if(ENABLE_TESTS)
+    if(NGRAPH_ONNX_IMPORT_ENABLE AND ENABLE_REQUIREMENTS_INSTALL)
+        find_package(PythonInterp 3 REQUIRED)
+
+        get_filename_component(PYTHON_EXEC_DIR ${PYTHON_EXECUTABLE} DIRECTORY)
+        execute_process(COMMAND "${PYTHON_EXECUTABLE}" -m pip --version
+            WORKING_DIRECTORY ${PYTHON_EXEC_DIR}
+            RESULT_VARIABLE pip3_exit_code
+            OUTPUT_VARIABLE pip3_version)
+
+        if(NOT pip3_exit_code EQUAL 0)
+            message(FATAL_ERROR "Failed to extract pip module version")
+        endif()
+
+        if(pip3_version MATCHES ".* ([0-9]+)+\.([0-9]+)([\.0-9 ]).*")
+            set(pip3_version ${CMAKE_MATCH_1}.${CMAKE_MATCH_2})
+        else()
+            message(FATAL_ERROR "Failed to parse ${pip3_version}")
+        endif()
+
+        message(STATUS "pip version is ${pip3_version}")
+        set(args --quiet)
+        if(pip3_version VERSION_GREATER 20.2.2)
+            list(APPEND args --use-feature=2020-resolver)
+        endif()
+
+        set(reqs "${OpenVINO_SOURCE_DIR}/ngraph/test/requirements_test_onnx.txt")
+        add_custom_target(test_pip_prerequsites ALL
+                          "${PYTHON_EXECUTABLE}" -m pip install ${args} -r ${reqs}
+                          COMMENT "Install requirements_test.txt"
+                          VERBATIM
+                          SOURCES ${reqs})
+    endif()
+
+    add_custom_target(test_model_zoo DEPENDS ${onnx_out_files}
+                                             ${ie_onnx_out_files}
+                                             ${ie_serialize_out_files}
+                                             ${ie_onnx_import_out_files})
+
+    if(TARGET test_pip_prerequsites)
+        add_dependencies(test_model_zoo test_pip_prerequsites)
+    endif()
+
+    if (NGRAPH_PDPD_FRONTEND_ENABLE)
+        add_dependencies(test_model_zoo paddlepaddle_test_models)
+    endif()
+
+    install(DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo"
+            DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL)
+
+    set(TEST_MODEL_ZOO "./test_model_zoo" CACHE PATH "Path to test model zoo")
+endif()
diff --git a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md
index 03a58a23387..d5383275ad6 100644
--- a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md
+++ b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md
@@ -519,3 +519,67 @@ Standard ONNX\* operators:
 | Upsample | No |
 | Where | No |
 | Xor | No |
+
+
+## PaddlePaddle\* Supported Operators
+
+Standard PaddlePaddle(paddlepaddle>=2.1)\* Operators:
+
+| Operator Name in PaddlePaddle\*| Limitations|
+| :----------| :----------|
+| adpative_pool2d | 'NHWC' data_layout is not supported |
+| arg_max | 'int32' output data_type is not supported |
+| assign_value | No |
+| batch_norm | No |
+| bilinear_interp | 'NCW' 'NWC' 'NHWC' 'NCDHW' 'NDHWC' data_layout are not supported |
+| bilinear_interp_v2 | 'NCW' 'NWC' 'NHWC' 'NCDHW' 'NDHWC' data_layout are not supported |
+| bmm | No |
+| cast | No |
+| clip | No |
+| concat | No |
+| conv2d | 'NHWC' data_layout is not supported |
+| depthwise_conv2d | 'NHWC' data_layout is not supported |
+| deformable_conv | No |
+| elementwise_add | No |
+| elementwise_div | No |
+| elementwise_max | No |
+| elementwise_min | No |
+| elementwise_mul | No |
+| elementwise_pow | No |
+| elementwise_sub | No |
+| equal | No |
+| expand_v2 | No |
+| fill_constant_batch_size_like | No |
+| fill_constant | No |
+| flatten_contiguous_range | No |
+| greater_equal | No |
+| hard_sigmoid | No |
+| hard_swish | No |
+| leaky_relu | No |
+| log | No |
+| logical_not | No |
+| matmul | No |
+| matrix_nms | Only supports IE CPU plugin with 'number of selected boxes' static shape(eg: min(min(num_boxes, nms_top_k) * num_classes_output, keep_top_k)) |
+| max_pool2d_with_index | No |
+| mul | No |
+| multiclass_nms | Only supports IE CPU plugin with 'number of selected boxes' static shape(eg: min(min(num_boxes, nms_top_k) * num_classes_output, keep_top_k)) |
+| nearest_interp | 'NCW' 'NWC' 'NHWC' 'NCDHW' 'NDHWC' data_layout are not supported |
+| nearest_interp_v2 | 'NCW' 'NWC' 'NHWC' 'NCDHW' 'NDHWC' data_layout are not supported |
+| pad3d | 'Circular' mode is not supported |
+| pow | No |
+| pool2d | 'NHWC' data_layout is not supported |
+| range | No |
+| relu | No |
+| relu6 | No |
+| reshape2 | No |
+| rnn | 'SimpleRNN' and 'GRU' modes are not supported |
+| scale | No |
+| shape | No |
+| slice | No |
+| softmax | No |
+| sigmoid | No |
+| split | No |
+| squeeze2 | No |
+| transpose2 | No |
+| unsqueeze2 | No |
+| yolo_box | No |
diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
index 6feec5f627a..eabe4840eb8 100644
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md
@@ -1,6 +1,7 @@
 # Converting TensorFlow* Object Detection API Models {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models}
 
 > **NOTES**:
+> * Starting with the 2022.1 release, the Model Optimizer can convert the TensorFlow\* Object Detection API Faster and Mask RCNNs topologies differently. By default, the Model Optimizer adds operation "Proposal" to the generated IR. This operation needs an additional input to the model with name "image_info" which should be fed with several values describing the pre-processing applied to the input image (refer to the [Proposal](../../../../ops/detection/Proposal_4.md) operation specification for more information). However, this input is redundant for the models trained and inferred with equal size images. Model Optimizer can generate IR for such models and insert operation [DetectionOutput](../../../../ops/detection/DetectionOutput_1.md) instead of `Proposal`. The `DetectionOutput` operation does not require additional model input "image_info" and moreover, for some models the produced inference results are closer to the original TensorFlow\* model. In order to trigger new behaviour the attribute "operation_to_add" in the corresponding JSON transformation configuration file should be set to value "DetectionOutput" instead of default one "Proposal".
 > * Starting with the 2021.1 release, the Model Optimizer converts the TensorFlow\* Object Detection API SSDs, Faster and Mask RCNNs topologies keeping shape-calculating sub-graphs by default, so topologies can be re-shaped in the Inference Engine using dedicated reshape API. Refer to [Using Shape Inference](../../../../IE_DG/ShapeInference.md) for more information on how to use this feature. It is possible to change the both spatial dimensions of the input image and batch size.
 > * To generate IRs for SSD topologies, the Model Optimizer creates a number of `PriorBoxClustered` layers instead of a constant node with prior boxes calculated for the particular input image size. This change allows you to reshape the topology in the Inference Engine using dedicated Inference Engine API. The reshaping is supported for all SSD topologies except FPNs which contain hardcoded shapes for some operations preventing from changing topology input shape.  
 
@@ -29,14 +30,16 @@ To convert a TensorFlow\* Object Detection API model, go to the `<INSTALL_DIR>/d
     * `faster_rcnn_support_api_v1.13.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.13.X
     * `faster_rcnn_support_api_v1.14.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.14.0 up to 1.14.X inclusively
     * `faster_rcnn_support_api_v1.15.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.15.0 up to 2.0
-    * `faster_rcnn_support_api_v2.0.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 or higher
+    * `faster_rcnn_support_api_v2.0.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 up to 2.3.X inclusively
+    * `faster_rcnn_support_api_v2.4.json` --- for Faster R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.4 or higher
     * `mask_rcnn_support.json` --- for Mask R-CNN topologies from the TF 1.X models zoo trained with TensorFlow\* version 1.9.0 or lower.
     * `mask_rcnn_support_api_v1.7.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.7.0 up to 1.9.X inclusively
     * `mask_rcnn_support_api_v1.11.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.11.0 up to 1.12.X inclusively
     * `mask_rcnn_support_api_v1.13.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.13.0 up to 1.13.X inclusively
     * `mask_rcnn_support_api_v1.14.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.14.0 up to 1.14.X inclusively
     * `mask_rcnn_support_api_v1.15.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 1.15.0 up to 2.0
-    * `mask_rcnn_support_api_v2.0.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 or higher
+    * `mask_rcnn_support_api_v2.0.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.0 up to 2.3.X inclusively
+    * `mask_rcnn_support_api_v2.4.json` --- for Mask R-CNN topologies trained using the TensorFlow\* Object Detection API version 2.4 or higher
     * `rfcn_support.json` --- for RFCN topology from the models zoo trained with TensorFlow\* version up to 1.9.X inclusively
     * `rfcn_support_api_v1.10.json` --- for RFCN topology from the models zoo frozen with TensorFlow\* version 1.10.0 up to 1.12.X inclusively
     * `rfcn_support_api_v1.13.json` --- for RFCN topology from the models zoo frozen with TensorFlow\* version 1.13.X
diff --git a/docs/doxygen/doxygen-ignore.txt b/docs/doxygen/doxygen-ignore.txt
index b1f27a4972c..c2bc8a0825c 100644
--- a/docs/doxygen/doxygen-ignore.txt
+++ b/docs/doxygen/doxygen-ignore.txt
@@ -16,8 +16,8 @@ openvino/docs/optimization_guide/dldt_optimization_guide.md
 openvino/docs/IE_DG/ShapeInference.md
 build/docs/openvino_docs.xml
 openvino/docs/install_guides/installing-openvino-linux-ivad-vpu.md
-inference-engine/include/ie_parallel.hpp
-inference-engine/include/ie_plugin_config.hpp
-inference-engine/include/vpu/myriad_config.hpp
-inference-engine/include/vpu/vpu_config.hpp
-inference-engine/include/vpu/vpu_plugin_config.hpp
\ No newline at end of file
+inference-engine/src/inference_engine/include/ie/ie_parallel.hpp
+inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp
+inference-engine/src/inference_engine/include/ie/vpu/myriad_config.hpp
+inference-engine/src/inference_engine/include/ie/vpu/vpu_config.hpp
+inference-engine/src/inference_engine/include/ie/vpu/vpu_plugin_config.hpp
diff --git a/docs/doxygen/ie_docs.config b/docs/doxygen/ie_docs.config
index bbd203c931c..792d001bdcf 100644
--- a/docs/doxygen/ie_docs.config
+++ b/docs/doxygen/ie_docs.config
@@ -824,7 +824,7 @@ WARN_LOGFILE           = "@DOCS_BUILD_DIR@/ie_docs.log"
 # Note: If this tag is empty the current directory is searched.
 
 INPUT                  = "@DOCS_BUILD_DIR@" \
-                         "@IE_SOURCE_DIR@/include"
+                         "@IE_SOURCE_DIR@/src/inference_engine/include"
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
diff --git a/docs/ops/arithmetic/Erf_1.md b/docs/ops/arithmetic/Erf_1.md
index 6b445dafad2..52d2d0301cb 100644
--- a/docs/ops/arithmetic/Erf_1.md
+++ b/docs/ops/arithmetic/Erf_1.md
@@ -4,34 +4,32 @@
 
 **Category**: Arithmetic unary operation
 
-**Short description**: *Erf* calculates the Gauss error function element-wise with given tensor.
+**Short description**: *Erf* performs element-wise Gauss error function (erf) on a given input tensor.
 
 **Detailed Description**
 
-For each element from the input tensor calculates corresponding element in the output tensor with the following formula:
+*Erf* performs element-wise erf operation on a given input tensor, based on the following mathematical formula:
+
 \f[
 erf(x) = \pi^{-1} \int_{-x}^{x} e^{-t^2} dt
 \f]
 
-**Attributes**:
-
-    No attributes available.
+**Attributes**: *Erf* operation has no attributes.
 
 **Inputs**
 
-* **1**: A tensor of type *T*. **Required.**
+* **1**: A tensor of type *T* and arbitrary shape. **Required.**
 
 **Outputs**
 
-* **1**: The result of element-wise operation. A tensor of type *T*.
+* **1**: The result of element-wise *Erf* function applied to the input tensor. A tensor of type *T* and the same shape as the input tensor.
 
 **Types**
 
-* *T*: any supported floating-point type.
+* *T*: any supported numeric type.
 
-**Examples**
 
-*Example 1*
+**Example**
 
 ```xml
 <layer ... type="Erf">
diff --git a/docs/ops/arithmetic/Tan_1.md b/docs/ops/arithmetic/Tan_1.md
index 6ea7d1e9a6b..d9086f7ad5f 100644
--- a/docs/ops/arithmetic/Tan_1.md
+++ b/docs/ops/arithmetic/Tan_1.md
@@ -6,32 +6,39 @@
 
 **Short description**: *Tan* performs element-wise tangent operation with given tensor.
 
-**Attributes**:
-
-    No attributes available.
-
-**Inputs**
-
-* **1**: An tensor of type *T*. **Required.**
-
-**Outputs**
-
-* **1**: The result of element-wise tan operation. A tensor of type *T*.
-
-**Types**
-
-* *T*: any numeric type.
-
-*Tan* does the following with the input tensor *a*:
+**Detailed description**:  Operation takes one input tensor and performs the element-wise tangent function on a given input tensor, based on the following mathematical formula:
 
 \f[
 a_{i} = tan(a_{i})
 \f]
 
-**Examples**
-
 *Example 1*
 
+    input = [0.0, 0.25, -0.25, 0.5, -0.5]
+    output = [0.0, 0.25534192, -0.25534192, 0.54630249, -0.54630249]
+
+*Example 2*
+
+    input = [-2, -1, 0, 1, 2]
+    output = [2, -2, 0, 2, -2]
+
+**Attributes**: *tan*  operation has no attributes.
+
+**Inputs**
+
+* **1**: A tensor of type *T* and arbitrary shape, measured in radians. **Required.**
+
+**Outputs**
+
+* **1**: The result of element-wise *tan* applied to the input tensor. A tensor of type *T* and same shape as the input tensor.
+
+**Types**
+
+* *T*: any supported numeric type.
+
+
+**Examples**
+
 ```xml
 <layer ... type="Tan">
     <input>
diff --git a/docs/ops/comparison/LessEqual_1.md b/docs/ops/comparison/LessEqual_1.md
index 4144095bed4..a8b7c810181 100644
--- a/docs/ops/comparison/LessEqual_1.md
+++ b/docs/ops/comparison/LessEqual_1.md
@@ -4,32 +4,7 @@
 
 **Category**: Comparison binary operation
 
-**Short description**: *LessEqual* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules.
-
-**Attributes**:
-
-* *auto_broadcast*
-
-  * **Description**: specifies rules used for auto-broadcasting of input tensors.
-  * **Range of values**:
-    * *none* - no auto-broadcasting is allowed, all input shapes should match
-    * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
-  * **Type**: string
-  * **Default value**: "numpy"
-  * **Required**: *no*
-
-**Inputs**
-
-* **1**: A tensor of type *T*. **Required.**
-* **2**: A tensor of type *T*. **Required.**
-
-**Outputs**
-
-* **1**: The result of element-wise comparison operation. A tensor of type boolean.
-
-**Types**
-
-* *T*: arbitrary supported type.
+**Short description**: *LessEqual* performs element-wise comparison operation with two given tensors applying broadcast rules specified in the *auto_broadcast* attribute.
 
 **Detailed description**
 Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
@@ -40,12 +15,39 @@ After broadcasting *LessEqual* does the following with the input tensors *a* and
 o_{i} = a_{i} <= b_{i}
 \f]
 
+**Attributes**:
+
+* *auto_broadcast*
+
+  * **Description**: specifies rules used for auto-broadcasting of input tensors.
+  * **Range of values**:
+    * *none* - no auto-broadcasting is allowed, all input shapes should match,
+    * *numpy* -  numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
+    * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
+  * **Type**: string
+  * **Default value**: "numpy"
+  * **Required**: *no*
+
+**Inputs**
+
+* **1**: A tensor of type *T* and arbitrary shape. **Required.**
+* **2**: A tensor of type *T* and arbitrary shape. **Required.**
+
+**Outputs**
+
+* **1**: The result of element-wise comparison operation applied to the input tensors. A tensor of type **boolean** and  shape equal to broadcasted shape of two inputs.
+
+**Types**
+
+* *T*: arbitrary supported type.
+
 **Examples**
 
-*Example 1*
+*Example 1: no broadcast*
 
 ```xml
 <layer ... type="LessEqual">
+    <data auto_broadcast="none"/>
     <input>
         <port id="0">
             <dim>256</dim>
@@ -65,9 +67,10 @@ o_{i} = a_{i} <= b_{i}
 </layer>
 ```
 
-*Example 2: broadcast*
+*Example 2: numpy broadcast*
 ```xml
 <layer ... type="LessEqual">
+    <data auto_broadcast="numpy"/>
     <input>
         <port id="0">
             <dim>8</dim>
diff --git a/docs/ops/comparison/Less_1.md b/docs/ops/comparison/Less_1.md
index 79a154a6c57..dcf210d6579 100644
--- a/docs/ops/comparison/Less_1.md
+++ b/docs/ops/comparison/Less_1.md
@@ -6,6 +6,16 @@
 
 **Short description**: *Less* performs element-wise comparison operation with two given tensors applying multi-directional broadcast rules.
 
+**Detailed description**
+Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
+
+After broadcasting *Less* does the following with the input tensors *a* and *b*:
+
+\f[
+o_{i} = a_{i} < b_{i}
+\f]
+
+
 **Attributes**:
 
 * *auto_broadcast*
@@ -13,8 +23,9 @@
   * **Description**: specifies rules used for auto-broadcasting of input tensors.
   * **Range of values**:
     * *none* - no auto-broadcasting is allowed, all input shapes should match
-    * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
-  * **Type**: string
+    * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md)
+    * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md)
+  * **Type**: `string`
   * **Default value**: "numpy"
   * **Required**: *no*
 
@@ -31,15 +42,6 @@
 
 * *T*: arbitrary supported type.
 
-**Detailed description**
-Before performing arithmetic operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
-
-After broadcasting *Less* does the following with the input tensors *a* and *b*:
-
-\f[
-o_{i} = a_{i} < b_{i}
-\f]
-
 **Examples**
 
 *Example 1*
diff --git a/docs/ops/logical/LogicalAnd_1.md b/docs/ops/logical/LogicalAnd_1.md
index 4f39b236fef..a653d1abbc2 100644
--- a/docs/ops/logical/LogicalAnd_1.md
+++ b/docs/ops/logical/LogicalAnd_1.md
@@ -6,39 +6,40 @@
 
 **Short description**: *LogicalAnd* performs element-wise logical AND operation with two given tensors applying multi-directional broadcast rules.
 
+**Detailed description**: Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
+
+After broadcasting *LogicalAnd* does the following with the input tensors *a* and *b*:
+
+\f[
+o_{i} = a_{i} \wedge b_{i}
+\f]
+
 **Attributes**:
 
 * *auto_broadcast*
 
   * **Description**: specifies rules used for auto-broadcasting of input tensors.
   * **Range of values**:
-    * *none* - no auto-broadcasting is allowed, all input shapes should match
-    * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in <a href="https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md">ONNX docs</a>.
+    * *none* - no auto-broadcasting is allowed, all input shapes must match,
+    * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md),
+    * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md).
   * **Type**: string
   * **Default value**: "numpy"
   * **Required**: *no*
 
 **Inputs**
 
-* **1**: A tensor of type *T*. **Required.**
-* **2**: A tensor of type *T*. **Required.**
+* **1**: A tensor of type *T* and arbitrary shape. **Required.**
+* **2**: A tensor of type *T* and arbitrary shape. **Required.**
 
 **Outputs**
 
-* **1**: The result of element-wise logical AND operation. A tensor of type boolean.
+* **1**: The result of element-wise *LogicalAnd* operation. A tensor of type boolean.
 
 **Types**
 
 * *T*: boolean type.
 
-**Detailed description**
-Before performing logical operation, input tensors *a* and *b* are broadcasted if their shapes are different and `auto_broadcast` attributes is not `none`. Broadcasting is performed according to `auto_broadcast` value.
-
-After broadcasting *LogicalAnd* does the following with the input tensors *a* and *b*:
-
-\f[
-o_{i} = a_{i} and b_{i}
-\f]
 
 **Examples**
 
diff --git a/docs/template_extension/cpu_kernel.cpp b/docs/template_extension/cpu_kernel.cpp
index aa2486589cb..b1d426b1582 100644
--- a/docs/template_extension/cpu_kernel.cpp
+++ b/docs/template_extension/cpu_kernel.cpp
@@ -102,6 +102,7 @@ InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig&
             IE_THROW() << "Operation supports only FP32 precisions!";
         }
     } catch (InferenceEngine::Exception& ex) {
+        error = ex.what();
         if (resp) {
             strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1);
             resp->msg[sizeof(resp->msg) - 1] = 0;
diff --git a/docs/template_extension/fft_kernel.cpp b/docs/template_extension/fft_kernel.cpp
index 12554a70c75..3fcf71a8f64 100644
--- a/docs/template_extension/fft_kernel.cpp
+++ b/docs/template_extension/fft_kernel.cpp
@@ -66,6 +66,7 @@ InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig& config,
             IE_THROW() << "Operation supports only FP32 precisions!";
         }
     } catch (InferenceEngine::Exception& ex) {
+        error = ex.what();
         if (resp) {
             strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1);
             resp->msg[sizeof(resp->msg) - 1] = 0;
diff --git a/docs/template_plugin/tests/functional/op_reference/comparison.hpp b/docs/template_plugin/tests/functional/op_reference/comparison.hpp
new file mode 100644
index 00000000000..0d520b73ba2
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/comparison.hpp
@@ -0,0 +1,63 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+#include <vector>
+
+#include "base_reference_test.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace reference_tests {
+namespace ComparisonOpsRefTestDefinitions {
+
+struct RefComparisonParams {
+    ngraph::helpers::ComparisonTypes compType;
+    Tensor input1;
+    Tensor input2;
+    Tensor expected;
+};
+
+struct Builder : ParamsBuilder<RefComparisonParams> {
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, compType);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, input1);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, input2);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, expected);
+};
+
+class ReferenceComparisonLayerTest : public testing::TestWithParam<RefComparisonParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        const auto& params = GetParam();
+        function = CreateFunction(params.compType, params.input1.shape, params.input2.shape, params.input1.type, params.expected.type);
+        inputData = {params.input1.data, params.input2.data};
+        refOutData = {params.expected.data};
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<RefComparisonParams>& obj) {
+        const auto& param = obj.param;
+        std::ostringstream result;
+        result << "comparisonType=" << param.compType << "_";
+        result << "inpt_shape1=" << param.input1.shape << "_";
+        result << "inpt_shape2=" << param.input2.shape << "_";
+        result << "iType=" << param.input1.type << "_";
+        result << "oType=" << param.expected.type;
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<ngraph::Function> CreateFunction(ngraph::helpers::ComparisonTypes comp_op_type, const ngraph::PartialShape& input_shape1,
+                                                            const ngraph::PartialShape& input_shape2, const ngraph::element::Type& input_type,
+                                                            const ngraph::element::Type& expected_output_type) {
+        const auto in = std::make_shared<ngraph::op::Parameter>(input_type, input_shape1);
+        const auto in2 = std::make_shared<ngraph::op::Parameter>(input_type, input_shape2);
+        const auto comp = ngraph::builder::makeComparison(in, in2, comp_op_type);
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector {comp}, ngraph::ParameterVector {in, in2});
+    }
+};
+}  // namespace ComparisonOpsRefTestDefinitions
+}  // namespace reference_tests
\ No newline at end of file
diff --git a/docs/template_plugin/tests/functional/op_reference/conversion.cpp b/docs/template_plugin/tests/functional/op_reference/conversion.cpp
new file mode 100644
index 00000000000..36f616cdaae
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/conversion.cpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "conversion.hpp"
+
+namespace reference_tests {
+namespace ConversionOpsRefTestDefinitions {
+namespace {
+TEST_P(ReferenceConversionLayerTest, CompareWithHardcodedRefs) {
+    Exec();
+}
+} // namespace
+} // namespace ConversionOpsRefTestDefinitions
+} // namespace reference_tests
diff --git a/docs/template_plugin/tests/functional/op_reference/conversion.hpp b/docs/template_plugin/tests/functional/op_reference/conversion.hpp
new file mode 100644
index 00000000000..f3846c7eab6
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/conversion.hpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+#include <vector>
+
+#include "base_reference_test.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace reference_tests {
+namespace ConversionOpsRefTestDefinitions {
+
+static std::map<ngraph::helpers::ConversionTypes, std::string> conversionNames = {
+    {ngraph::helpers::ConversionTypes::CONVERT,      "Convert"},
+    {ngraph::helpers::ConversionTypes::CONVERT_LIKE, "ConvertLike"}
+};
+
+struct ConvertParams {
+    template <class IT, class OT>
+    ConvertParams(ngraph::helpers::ConversionTypes convType, const ngraph::PartialShape& shape, const ngraph::element::Type& iType,
+                  const ngraph::element::Type& oType, const std::vector<IT>& iValues, const std::vector<OT>& oValues, size_t iSize = 0, size_t oSize = 0)
+        : conversionType(convType), pshape(shape), inType(iType), outType(oType), inputData(CreateBlob(iType, iValues, iSize)),
+          refData(CreateBlob(oType, oValues, oSize)) {}
+    ngraph::helpers::ConversionTypes conversionType;
+    ngraph::PartialShape pshape;
+    ngraph::element::Type inType;
+    ngraph::element::Type outType;
+    InferenceEngine::Blob::Ptr inputData;
+    InferenceEngine::Blob::Ptr refData;
+};
+
+class ReferenceConversionLayerTest : public testing::TestWithParam<ConvertParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        const auto& params = GetParam();
+        function = CreateFunction(params.pshape, params.inType, params.outType, params.conversionType);
+        inputData = {params.inputData};
+        refOutData = {params.refData};
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<ConvertParams>& obj) {
+        const auto& param = obj.param;
+        std::ostringstream result;
+        result << "convertionType=" << conversionNames[param.conversionType] << "_";
+        result << "shape=" << param.pshape << "_";
+        result << "iType=" << param.inType << "_";
+        result << "oType=" << param.outType;
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<ngraph::Function> CreateFunction(const ngraph::PartialShape& input_shape, const ngraph::element::Type& input_type,
+                                                            const ngraph::element::Type& expected_output_type,
+                                                            const ngraph::helpers::ConversionTypes& conversion_type) {
+        const auto in = std::make_shared<ngraph::op::Parameter>(input_type, input_shape);
+        const auto convert = ngraph::builder::makeConversion(in, expected_output_type, conversion_type);
+        return std::make_shared<ngraph::Function>(ngraph::NodeVector {convert}, ngraph::ParameterVector {in});
+    }
+};
+} // namespace ConversionOpsRefTestDefinitions
+} // namespace reference_tests
diff --git a/docs/template_plugin/tests/functional/op_reference/convert.cpp b/docs/template_plugin/tests/functional/op_reference/convert.cpp
index b8e6f5846f7..68834b3a576 100644
--- a/docs/template_plugin/tests/functional/op_reference/convert.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/convert.cpp
@@ -10,433 +10,403 @@
 #include <shared_test_classes/base/layer_test_utils.hpp>
 #include <tuple>
 
-#include "base_reference_test.hpp"
+#include "conversion.hpp"
 
-using namespace reference_tests;
 using namespace ngraph;
 using namespace InferenceEngine;
+using ConversionTypes = ngraph::helpers::ConversionTypes;
 
-struct ConvertParams {
-    template <class IT, class OT>
-    ConvertParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const ngraph::element::Type& oType, const std::vector<IT>& iValues,
-                  const std::vector<OT>& oValues, size_t iSize = 0, size_t oSize = 0)
-        : pshape(shape), inType(iType), outType(oType), inputData(CreateBlob(iType, iValues, iSize)), refData(CreateBlob(oType, oValues, oSize)) {}
-    ngraph::PartialShape pshape;
-    ngraph::element::Type inType;
-    ngraph::element::Type outType;
-    InferenceEngine::Blob::Ptr inputData;
-    InferenceEngine::Blob::Ptr refData;
-};
-
-class ReferenceConvertLayerTest : public testing::TestWithParam<ConvertParams>, public CommonReferenceTest {
-public:
-    void SetUp() override {
-        auto params = GetParam();
-        function = CreateFunction(params.pshape, params.inType, params.outType);
-        inputData = {params.inputData};
-        refOutData = {params.refData};
-    }
-    static std::string getTestCaseName(const testing::TestParamInfo<ConvertParams>& obj) {
-        auto param = obj.param;
-        std::ostringstream result;
-        result << "shape=" << param.pshape << "_";
-        result << "iType=" << param.inType << "_";
-        result << "oType=" << param.outType;
-        return result.str();
-    }
-
-private:
-    static std::shared_ptr<Function> CreateFunction(const PartialShape& input_shape, const element::Type& input_type,
-                                                    const element::Type& expected_output_type) {
-        const auto in = std::make_shared<op::Parameter>(input_type, input_shape);
-        const auto convert = std::make_shared<op::Convert>(in, expected_output_type);
-        return std::make_shared<Function>(NodeVector {convert}, ParameterVector {in});
-    }
-};
-
-TEST_P(ReferenceConvertLayerTest, CompareWithHardcodedRefs) {
-    Exec();
-}
+namespace reference_tests {
+namespace ConversionOpsRefTestDefinitions {
+namespace {
 
 INSTANTIATE_TEST_SUITE_P(
-    smoke_Convert_With_Hardcoded_Refs, ReferenceConvertLayerTest,
+    smoke_Conversion_With_Hardcoded_Refs, ReferenceConversionLayerTest,
     ::testing::Values(
         // destination boolean
-        ConvertParams(ngraph::PartialShape {2, 3}, ngraph::element::u8, ngraph::element::boolean,
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 3}, ngraph::element::u8, ngraph::element::boolean,
                       std::vector<uint8_t> {0, 12, 23, 0, std::numeric_limits<uint8_t>::lowest(), std::numeric_limits<uint8_t>::max()},
                       std::vector<char> {0, 1, 1, 0, 0, 1}),
-        ConvertParams(ngraph::PartialShape {2, 3}, ngraph::element::i32, ngraph::element::boolean,
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 3}, ngraph::element::i32, ngraph::element::boolean,
                       std::vector<int32_t> {0, -12, 23, 0, std::numeric_limits<int32_t>::lowest(), std::numeric_limits<int32_t>::max()},
                       std::vector<char> {0, 1, 1, 0, 1, 1}),
-        ConvertParams(ngraph::PartialShape {3, 3}, ngraph::element::f32, ngraph::element::boolean,
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {3, 3}, ngraph::element::f32, ngraph::element::boolean,
                       std::vector<float> {0.f, 1.5745f, 0.12352f, 0.f, std::numeric_limits<float>::lowest(), std::numeric_limits<float>::max(),
                                           std::numeric_limits<float>::min(), std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity()},
                       std::vector<char> {0, 1, 1, 0, 1, 1, 1, 1, 1}),
-
         // destination bf16
-        ConvertParams(ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::bf16,
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::bf16,
                       std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f},
                       std::vector<bfloat16> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}),
-        ConvertParams(ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::bf16,
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::bf16,
                       std::vector<uint8_t> {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142},
                       std::vector<bfloat16> {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}),
 
         // destination f16
-        ConvertParams(ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f16,
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f16,
                       std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f},
                       std::vector<float16> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}),
-        ConvertParams(ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::f16, std::vector<uint8_t> {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142},
-                      std::vector<float16> {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::f16,
+                      std::vector<uint8_t> {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}, std::vector<float16> {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}),
 
         // destination f32
-        ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u1, ngraph::element::f32, std::vector<uint8_t> {0xA0},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u1, ngraph::element::f32, std::vector<uint8_t> {0xA0},
                       std::vector<float> {1.0f, 0.0f, 1.0f, 0.0f}, 4),
-        ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u4, ngraph::element::f32, std::vector<uint8_t> {0xFB, 0x0A},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u4, ngraph::element::f32, std::vector<uint8_t> {0xFB, 0x0A},
                       std::vector<float> {15.0f, 11.0f, 0.0f, 10.0f}, 4),
-        ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u8, ngraph::element::f32, std::vector<uint8_t> {255, 128, 32, 0},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u8, ngraph::element::f32, std::vector<uint8_t> {255, 128, 32, 0},
                       std::vector<float> {255.0f, 128.0f, 32.0f, 0.0f}),
-        ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u16, ngraph::element::f32, std::vector<uint16_t> {64000, 32000, 128, 0},
-                      std::vector<float> {64000.0f, 32000.0f, 128.0f, 0.0f}),
-        ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u32, ngraph::element::f32, std::vector<uint32_t> {4000000, 2000000, 128, 0},
-                      std::vector<float> {4000000.0f, 2000000.0f, 128.0f, 0.0f}),
-        ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::u64, ngraph::element::f32, std::vector<uint64_t> {4000000, 2000000, 128, 0},
-                      std::vector<float> {4000000.0f, 2000000.0f, 128.0f, 0.0f}),
-        ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::i4, ngraph::element::f32, std::vector<uint8_t> {0xFE, 0xF2},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u16, ngraph::element::f32,
+                      std::vector<uint16_t> {64000, 32000, 128, 0}, std::vector<float> {64000.0f, 32000.0f, 128.0f, 0.0f}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u32, ngraph::element::f32,
+                      std::vector<uint32_t> {4000000, 2000000, 128, 0}, std::vector<float> {4000000.0f, 2000000.0f, 128.0f, 0.0f}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::u64, ngraph::element::f32,
+                      std::vector<uint64_t> {4000000, 2000000, 128, 0}, std::vector<float> {4000000.0f, 2000000.0f, 128.0f, 0.0f}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::i4, ngraph::element::f32, std::vector<uint8_t> {0xFE, 0xF2},
                       std::vector<float> {-1.0f, -2.0f, -1.0f, 2.0f}, 4),
-        ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::i8, ngraph::element::f32, std::vector<int8_t> {-127, -0, 0, 127},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::i8, ngraph::element::f32, std::vector<int8_t> {-127, -0, 0, 127},
                       std::vector<float> {-127.0f, -0.0f, 0.0f, 127.0f}),
-        ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::i16, ngraph::element::f32, std::vector<int16_t> {-32000, -0, 0, 32000},
-                      std::vector<float> {-32000.0f, -0.0f, 0.0f, 32000.0f}),
-        ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::i32, ngraph::element::f32, std::vector<int32_t> {-64000, -0, 0, 64000},
-                      std::vector<float> {-64000.0f, -0.0f, 0.0f, 64000.0f}),
-        ConvertParams(ngraph::PartialShape {2, 2}, ngraph::element::i64, ngraph::element::f32, std::vector<int64_t> {-64000, -0, 0, 64000},
-                      std::vector<float> {-64000.0f, -0.0f, 0.0f, 64000.0f}),
-        ConvertParams(ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::bf16, ngraph::element::f32,
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::i16, ngraph::element::f32,
+                      std::vector<int16_t> {-32000, -0, 0, 32000}, std::vector<float> {-32000.0f, -0.0f, 0.0f, 32000.0f}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::i32, ngraph::element::f32,
+                      std::vector<int32_t> {-64000, -0, 0, 64000}, std::vector<float> {-64000.0f, -0.0f, 0.0f, 64000.0f}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {2, 2}, ngraph::element::i64, ngraph::element::f32,
+                      std::vector<int64_t> {-64000, -0, 0, 64000}, std::vector<float> {-64000.0f, -0.0f, 0.0f, 64000.0f}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::bf16, ngraph::element::f32,
                       std::vector<bfloat16> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f},
                       std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}),
-        ConvertParams(ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f16, ngraph::element::f32,
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f16, ngraph::element::f32,
                       std::vector<float16> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f},
                       std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}),
-        ConvertParams(ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f32,
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f32,
                       std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f},
                       std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}),
 
         // destination i4
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::i4, std::vector<uint8_t> {0xA0}, std::vector<uint8_t> {0x10, 0x10}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i4, std::vector<uint8_t> {0x12, 0x03}, std::vector<uint8_t> {0x12, 0x03},
-                      4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i4, std::vector<uint8_t> {1, 2, 0, 3}, std::vector<uint8_t> {0x12, 0x03},
-                      4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i4, std::vector<uint16_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::i4, std::vector<uint8_t> {0xA0},
+                      std::vector<uint8_t> {0x10, 0x10}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i4, std::vector<uint8_t> {0x12, 0x03},
                       std::vector<uint8_t> {0x12, 0x03}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i4, std::vector<uint32_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i4, std::vector<uint8_t> {1, 2, 0, 3},
                       std::vector<uint8_t> {0x12, 0x03}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i4, std::vector<uint64_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i4, std::vector<uint16_t> {1, 2, 0, 3},
                       std::vector<uint8_t> {0x12, 0x03}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i4, std::vector<uint8_t> {0xFE, 0x03}, std::vector<uint8_t> {0xFE, 0x03},
-                      4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i4, std::vector<int8_t> {-1, -2, 2, 3}, std::vector<uint8_t> {0xFE, 0x23},
-                      4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i4, std::vector<int16_t> {-1, -2, 2, 3},
-                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i4, std::vector<int32_t> {-1, -2, 2, 3},
-                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i4, std::vector<int64_t> {-1, -2, 2, 3},
-                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i4, std::vector<ngraph::float16> {-1, -2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i4, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i4, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i4, std::vector<uint8_t> {0xFE, 0x03},
                       std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i4, std::vector<ngraph::bfloat16> {-1, -2, 0, 3},
-                      std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i4, std::vector<float> {-1, -2, 2, 3}, std::vector<uint8_t> {0xFE, 0x23},
-                      4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i4, std::vector<int8_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i4, std::vector<int16_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i4, std::vector<int32_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i4, std::vector<int64_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i4,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i4,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i4, std::vector<float> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
         // destination i8
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i8, std::vector<uint8_t> {0x81},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i8, std::vector<uint8_t> {0x81},
                       std::vector<int8_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i8, std::vector<uint8_t> {0x21, 0x43}, std::vector<int8_t> {2, 1, 4, 3},
-                      4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i8, std::vector<uint8_t> {1, 2, 0, 3}, std::vector<int8_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i8, std::vector<uint16_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i8, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int8_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i8, std::vector<uint8_t> {1, 2, 0, 3},
                       std::vector<int8_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i8, std::vector<uint32_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i8, std::vector<uint16_t> {1, 2, 0, 3},
                       std::vector<int8_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i8, std::vector<uint64_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i8, std::vector<uint32_t> {1, 2, 0, 3},
                       std::vector<int8_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i8, std::vector<uint8_t> {0x21, 0x43}, std::vector<int8_t> {2, 1, 4, 3},
-                      4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i8, std::vector<int8_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i8, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<int8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i8, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int8_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i8, std::vector<int8_t> {-1, -2, 2, 3},
                       std::vector<int8_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i8, std::vector<int16_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i8, std::vector<int16_t> {-1, -2, 2, 3},
                       std::vector<int8_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i8, std::vector<int32_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i8, std::vector<int32_t> {-1, -2, 2, 3},
                       std::vector<int8_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i8, std::vector<int64_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i8, std::vector<int64_t> {-1, -2, 2, 3},
                       std::vector<int8_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i8, std::vector<ngraph::float16> {-1, -2, 0, 3},
-                      std::vector<int8_t> {-1, -2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i8, std::vector<ngraph::bfloat16> {-1, -2, 0, 3},
-                      std::vector<int8_t> {-1, -2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i8, std::vector<float> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i8,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<int8_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i8,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<int8_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i8, std::vector<float> {-1, -2, 2, 3},
                       std::vector<int8_t> {-1, -2, 2, 3}),
         // destination i16
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i16, std::vector<uint8_t> {0x81},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i16, std::vector<uint8_t> {0x81},
                       std::vector<int16_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i16, std::vector<uint8_t> {0x21, 0x43}, std::vector<int16_t> {2, 1, 4, 3},
-                      4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i16, std::vector<uint8_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i16, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int16_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i16, std::vector<uint8_t> {1, 2, 0, 3},
                       std::vector<int16_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i16, std::vector<uint16_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i16, std::vector<uint16_t> {1, 2, 0, 3},
                       std::vector<int16_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i16, std::vector<uint32_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i16, std::vector<uint32_t> {1, 2, 0, 3},
                       std::vector<int16_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i16, std::vector<uint64_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i16, std::vector<uint64_t> {1, 2, 0, 3},
                       std::vector<int16_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i16, std::vector<uint8_t> {0x21, 0x43}, std::vector<int16_t> {2, 1, 4, 3},
-                      4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i16, std::vector<int8_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i16, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int16_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i16, std::vector<int8_t> {-1, -2, 2, 3},
                       std::vector<int16_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i16, std::vector<int16_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i16, std::vector<int16_t> {-1, -2, 2, 3},
                       std::vector<int16_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i16, std::vector<int32_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i16, std::vector<int32_t> {-1, -2, 2, 3},
                       std::vector<int16_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i16, std::vector<int64_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i16, std::vector<int64_t> {-1, -2, 2, 3},
                       std::vector<int16_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i16, std::vector<ngraph::float16> {-1, -2, 0, 3},
-                      std::vector<int16_t> {-1, -2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i16, std::vector<ngraph::bfloat16> {-1, -2, 0, 3},
-                      std::vector<int16_t> {-1, -2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i16, std::vector<float> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i16,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<int16_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i16,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<int16_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i16, std::vector<float> {-1, -2, 2, 3},
                       std::vector<int16_t> {-1, -2, 2, 3}),
         // destination i32
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i32, std::vector<uint8_t> {0x81},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i32, std::vector<uint8_t> {0x81},
                       std::vector<int32_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i32, std::vector<uint8_t> {0x21, 0x43}, std::vector<int32_t> {2, 1, 4, 3},
-                      4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i32, std::vector<uint8_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i32, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int32_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i32, std::vector<uint8_t> {1, 2, 0, 3},
                       std::vector<int32_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i32, std::vector<uint16_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i32, std::vector<uint16_t> {1, 2, 0, 3},
                       std::vector<int32_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i32, std::vector<uint32_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i32, std::vector<uint32_t> {1, 2, 0, 3},
                       std::vector<int32_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i32, std::vector<uint64_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i32, std::vector<uint64_t> {1, 2, 0, 3},
                       std::vector<int32_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i32, std::vector<uint8_t> {0x21, 0x43}, std::vector<int32_t> {2, 1, 4, 3},
-                      4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i32, std::vector<int8_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i32, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int32_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i32, std::vector<int8_t> {-1, -2, 2, 3},
                       std::vector<int32_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i32, std::vector<int16_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i32, std::vector<int16_t> {-1, -2, 2, 3},
                       std::vector<int32_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i32, std::vector<int32_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i32, std::vector<int32_t> {-1, -2, 2, 3},
                       std::vector<int32_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i32, std::vector<int64_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i32, std::vector<int64_t> {-1, -2, 2, 3},
                       std::vector<int32_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i32, std::vector<ngraph::float16> {-1, -2, 0, 3},
-                      std::vector<int32_t> {-1, -2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i32, std::vector<ngraph::bfloat16> {-1, -2, 0, 3},
-                      std::vector<int32_t> {-1, -2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i32, std::vector<float> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i32,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<int32_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i32,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<int32_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i32, std::vector<float> {-1, -2, 2, 3},
                       std::vector<int32_t> {-1, -2, 2, 3}),
         // destination i64
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i64, std::vector<uint8_t> {0x81},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i64, std::vector<uint8_t> {0x81},
                       std::vector<int64_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i64, std::vector<uint8_t> {0x21, 0x43}, std::vector<int64_t> {2, 1, 4, 3},
-                      4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i64, std::vector<uint8_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i64, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int64_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i64, std::vector<uint8_t> {1, 2, 0, 3},
                       std::vector<int64_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i64, std::vector<uint16_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i64, std::vector<uint16_t> {1, 2, 0, 3},
                       std::vector<int64_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i64, std::vector<uint32_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i64, std::vector<uint32_t> {1, 2, 0, 3},
                       std::vector<int64_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i64, std::vector<uint64_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i64, std::vector<uint64_t> {1, 2, 0, 3},
                       std::vector<int64_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i64, std::vector<uint8_t> {0x21, 0x43}, std::vector<int64_t> {2, 1, 4, 3},
-                      4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i64, std::vector<int8_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i64, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int64_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i64, std::vector<int8_t> {-1, -2, 2, 3},
                       std::vector<int64_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i64, std::vector<int16_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i64, std::vector<int16_t> {-1, -2, 2, 3},
                       std::vector<int64_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i64, std::vector<int32_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i64, std::vector<int32_t> {-1, -2, 2, 3},
                       std::vector<int64_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i64, std::vector<int64_t> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i64, std::vector<int64_t> {-1, -2, 2, 3},
                       std::vector<int64_t> {-1, -2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i64, std::vector<ngraph::float16> {-1, -2, 0, 3},
-                      std::vector<int64_t> {-1, -2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i64, std::vector<ngraph::bfloat16> {-1, -2, 0, 3},
-                      std::vector<int64_t> {-1, -2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i64, std::vector<float> {-1, -2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i64,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<int64_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i64,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<int64_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i64, std::vector<float> {-1, -2, 2, 3},
                       std::vector<int64_t> {-1, -2, 2, 3}),
 
         // destination u1
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u1, std::vector<uint8_t> {0xA0}, std::vector<uint8_t> {0xA0}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u4, ngraph::element::u1, std::vector<uint8_t> {0x10, 0x01, 0x00, 0x00},
-                      std::vector<uint8_t> {0x90}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u8, ngraph::element::u1, std::vector<uint8_t> {1, 0, 1, 0, 0, 0, 0, 1},
-                      std::vector<uint8_t> {0xA1}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u16, ngraph::element::u1, std::vector<uint16_t> {1, 0, 1, 0, 0, 0, 0, 1},
-                      std::vector<uint8_t> {0xA1}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u32, ngraph::element::u1, std::vector<uint32_t> {1, 0, 1, 0, 0, 0, 0, 1},
-                      std::vector<uint8_t> {0xA1}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u64, ngraph::element::u1, std::vector<uint64_t> {1, 0, 1, 0, 0, 0, 0, 1},
-                      std::vector<uint8_t> {0xA1}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::i4, ngraph::element::u1, std::vector<uint8_t> {0x10, 0x01, 0x00, 0x00},
-                      std::vector<uint8_t> {0x90}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::i8, ngraph::element::u1, std::vector<int8_t> {1, 0, 1, 0, 0, 0, 0, 1},
-                      std::vector<uint8_t> {0xA1}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::i16, ngraph::element::u1, std::vector<int16_t> {1, 0, 1, 0, 0, 0, 0, 1},
-                      std::vector<uint8_t> {0xA1}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::i32, ngraph::element::u1, std::vector<int32_t> {1, 0, 1, 0, 0, 0, 0, 1},
-                      std::vector<uint8_t> {0xA1}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::i64, ngraph::element::u1, std::vector<int64_t> {1, 0, 1, 0, 0, 0, 0, 1},
-                      std::vector<uint8_t> {0xA1}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::f16, ngraph::element::u1, std::vector<ngraph::float16> {1, 0, 1, 0, 0, 0, 0, 1},
-                      std::vector<uint8_t> {0xA1}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::bf16, ngraph::element::u1, std::vector<ngraph::bfloat16> {1, 0, 1, 0, 0, 0, 0, 1},
-                      std::vector<uint8_t> {0xA1}, 8, 8),
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::f32, ngraph::element::u1, std::vector<float> {1, 0, 1, 0, 0, 0, 0, 1},
-                      std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u1, std::vector<uint8_t> {0xA0},
+                      std::vector<uint8_t> {0xA0}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u4, ngraph::element::u1,
+                      std::vector<uint8_t> {0x10, 0x01, 0x00, 0x00}, std::vector<uint8_t> {0x90}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u8, ngraph::element::u1,
+                      std::vector<uint8_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u16, ngraph::element::u1,
+                      std::vector<uint16_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u32, ngraph::element::u1,
+                      std::vector<uint32_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u64, ngraph::element::u1,
+                      std::vector<uint64_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::i4, ngraph::element::u1,
+                      std::vector<uint8_t> {0x10, 0x01, 0x00, 0x00}, std::vector<uint8_t> {0x90}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::i8, ngraph::element::u1,
+                      std::vector<int8_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::i16, ngraph::element::u1,
+                      std::vector<int16_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::i32, ngraph::element::u1,
+                      std::vector<int32_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::i64, ngraph::element::u1,
+                      std::vector<int64_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::f16, ngraph::element::u1,
+                      std::vector<ngraph::float16> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::bf16, ngraph::element::u1,
+                      std::vector<ngraph::bfloat16> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::f32, ngraph::element::u1,
+                      std::vector<float> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
 
         // destination u4
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::u4, std::vector<uint8_t> {0xA0}, std::vector<uint8_t> {0x10, 0x10}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u4, std::vector<uint8_t> {0x12, 0x03}, std::vector<uint8_t> {0x12, 0x03},
-                      4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u4, std::vector<uint8_t> {1, 2, 0, 3}, std::vector<uint8_t> {0x12, 0x03},
-                      4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u4, std::vector<uint16_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::u4, std::vector<uint8_t> {0xA0},
+                      std::vector<uint8_t> {0x10, 0x10}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u4, std::vector<uint8_t> {0x12, 0x03},
                       std::vector<uint8_t> {0x12, 0x03}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u4, std::vector<uint32_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u4, std::vector<uint8_t> {1, 2, 0, 3},
                       std::vector<uint8_t> {0x12, 0x03}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u4, std::vector<uint64_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u4, std::vector<uint16_t> {1, 2, 0, 3},
                       std::vector<uint8_t> {0x12, 0x03}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u4, std::vector<uint8_t> {0xFE, 0x03}, std::vector<uint8_t> {0xFE, 0x03},
-                      4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u4, std::vector<int8_t> {-1, -2, 2, 3}, std::vector<uint8_t> {0xFE, 0x23},
-                      4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u4, std::vector<int16_t> {-1, -2, 2, 3},
-                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u4, std::vector<int32_t> {-1, -2, 2, 3},
-                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u4, std::vector<int64_t> {-1, -2, 2, 3},
-                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u4, std::vector<ngraph::float16> {-1, -2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u4, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u4, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u4, std::vector<uint8_t> {0xFE, 0x03},
                       std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u4, std::vector<ngraph::bfloat16> {-1, -2, 0, 3},
-                      std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u4, std::vector<float> {-1, -2, 2, 3}, std::vector<uint8_t> {0xFE, 0x23},
-                      4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u4, std::vector<int8_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u4, std::vector<int16_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u4, std::vector<int32_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u4, std::vector<int64_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u4,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u4,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u4, std::vector<float> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
 
         // destination u8
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u8, std::vector<uint8_t> {0x81},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u8, std::vector<uint8_t> {0x81},
                       std::vector<uint8_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u8, std::vector<uint8_t> {0x21, 0x43}, std::vector<uint8_t> {2, 1, 4, 3},
-                      4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u8, std::vector<uint8_t> {1, 2, 0, 3}, std::vector<uint8_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u8, std::vector<uint16_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u8, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<uint8_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u8, std::vector<uint8_t> {1, 2, 0, 3},
                       std::vector<uint8_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u8, std::vector<uint32_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u8, std::vector<uint16_t> {1, 2, 0, 3},
                       std::vector<uint8_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u8, std::vector<uint64_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u8, std::vector<uint32_t> {1, 2, 0, 3},
                       std::vector<uint8_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u8, std::vector<uint8_t> {0x21, 0x43}, std::vector<uint8_t> {2, 1, 4, 3},
-                      4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u8, std::vector<int8_t> {1, 2, 2, 3}, std::vector<uint8_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u8, std::vector<int16_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u8, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u8, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<uint8_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u8, std::vector<int8_t> {1, 2, 2, 3},
                       std::vector<uint8_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u8, std::vector<int32_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u8, std::vector<int16_t> {1, 2, 2, 3},
                       std::vector<uint8_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u8, std::vector<int64_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u8, std::vector<int32_t> {1, 2, 2, 3},
                       std::vector<uint8_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u8, std::vector<ngraph::float16> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u8, std::vector<int64_t> {1, 2, 2, 3},
+                      std::vector<uint8_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u8, std::vector<ngraph::float16> {1, 2, 0, 3},
                       std::vector<uint8_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u8, std::vector<ngraph::bfloat16> {1, 2, 0, 3},
-                      std::vector<uint8_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u8, std::vector<float> {1, 2, 2, 3}, std::vector<uint8_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u8,
+                      std::vector<ngraph::bfloat16> {1, 2, 0, 3}, std::vector<uint8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u8, std::vector<float> {1, 2, 2, 3},
+                      std::vector<uint8_t> {1, 2, 2, 3}),
 
         // destination u16
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u16, std::vector<uint8_t> {0x81},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u16, std::vector<uint8_t> {0x81},
                       std::vector<uint16_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u16, std::vector<uint8_t> {0x21, 0x43},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u16, std::vector<uint8_t> {0x21, 0x43},
                       std::vector<uint16_t> {2, 1, 4, 3}, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u16, std::vector<uint8_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u16, std::vector<uint8_t> {1, 2, 0, 3},
                       std::vector<uint16_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u16, std::vector<uint16_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u16, std::vector<uint16_t> {1, 2, 0, 3},
                       std::vector<uint16_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u16, std::vector<uint32_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u16, std::vector<uint32_t> {1, 2, 0, 3},
                       std::vector<uint16_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u16, std::vector<uint64_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u16, std::vector<uint64_t> {1, 2, 0, 3},
                       std::vector<uint16_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u16, std::vector<uint8_t> {0x21, 0x43},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u16, std::vector<uint8_t> {0x21, 0x43},
                       std::vector<uint16_t> {2, 1, 4, 3}, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u16, std::vector<int8_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u16, std::vector<int8_t> {1, 2, 2, 3},
                       std::vector<uint16_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u16, std::vector<int16_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u16, std::vector<int16_t> {1, 2, 2, 3},
                       std::vector<uint16_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u16, std::vector<int32_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u16, std::vector<int32_t> {1, 2, 2, 3},
                       std::vector<uint16_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u16, std::vector<int64_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u16, std::vector<int64_t> {1, 2, 2, 3},
                       std::vector<uint16_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u16, std::vector<ngraph::float16> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u16, std::vector<ngraph::float16> {1, 2, 0, 3},
                       std::vector<uint16_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u16, std::vector<ngraph::bfloat16> {1, 2, 0, 3},
-                      std::vector<uint16_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u16, std::vector<float> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u16,
+                      std::vector<ngraph::bfloat16> {1, 2, 0, 3}, std::vector<uint16_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u16, std::vector<float> {1, 2, 2, 3},
                       std::vector<uint16_t> {1, 2, 2, 3}),
 
         // destination u32
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u32, std::vector<uint8_t> {0x81},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u32, std::vector<uint8_t> {0x81},
                       std::vector<uint32_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u32, std::vector<uint8_t> {0x21, 0x43},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u32, std::vector<uint8_t> {0x21, 0x43},
                       std::vector<uint32_t> {2, 1, 4, 3}, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u32, std::vector<uint8_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u32, std::vector<uint8_t> {1, 2, 0, 3},
                       std::vector<uint32_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u32, std::vector<uint16_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u32, std::vector<uint16_t> {1, 2, 0, 3},
                       std::vector<uint32_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u32, std::vector<uint32_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u32, std::vector<uint32_t> {1, 2, 0, 3},
                       std::vector<uint32_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u32, std::vector<uint64_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u32, std::vector<uint64_t> {1, 2, 0, 3},
                       std::vector<uint32_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u32, std::vector<uint8_t> {0x21, 0x43},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u32, std::vector<uint8_t> {0x21, 0x43},
                       std::vector<uint32_t> {2, 1, 4, 3}, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u32, std::vector<int8_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u32, std::vector<int8_t> {1, 2, 2, 3},
                       std::vector<uint32_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u32, std::vector<int16_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u32, std::vector<int16_t> {1, 2, 2, 3},
                       std::vector<uint32_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u32, std::vector<int32_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u32, std::vector<int32_t> {1, 2, 2, 3},
                       std::vector<uint32_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u32, std::vector<int64_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u32, std::vector<int64_t> {1, 2, 2, 3},
                       std::vector<uint32_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u32, std::vector<ngraph::float16> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u32, std::vector<ngraph::float16> {1, 2, 0, 3},
                       std::vector<uint32_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u32, std::vector<ngraph::bfloat16> {1, 2, 0, 3},
-                      std::vector<uint32_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u32, std::vector<float> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u32,
+                      std::vector<ngraph::bfloat16> {1, 2, 0, 3}, std::vector<uint32_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u32, std::vector<float> {1, 2, 2, 3},
                       std::vector<uint32_t> {1, 2, 2, 3}),
-
         // destination u64
-        ConvertParams(ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u64, std::vector<uint8_t> {0x81},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u64, std::vector<uint8_t> {0x81},
                       std::vector<uint64_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u64, std::vector<uint8_t> {0x21, 0x43},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u64, std::vector<uint8_t> {0x21, 0x43},
                       std::vector<uint64_t> {2, 1, 4, 3}, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u64, std::vector<uint8_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u64, std::vector<uint8_t> {1, 2, 0, 3},
                       std::vector<uint64_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u64, std::vector<uint16_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u64, std::vector<uint16_t> {1, 2, 0, 3},
                       std::vector<uint64_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u64, std::vector<uint32_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u64, std::vector<uint32_t> {1, 2, 0, 3},
                       std::vector<uint64_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u64, std::vector<uint64_t> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u64, std::vector<uint64_t> {1, 2, 0, 3},
                       std::vector<uint64_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u64, std::vector<uint8_t> {0x21, 0x43},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u64, std::vector<uint8_t> {0x21, 0x43},
                       std::vector<uint64_t> {2, 1, 4, 3}, 4),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u64, std::vector<int8_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u64, std::vector<int8_t> {1, 2, 2, 3},
                       std::vector<uint64_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u64, std::vector<int16_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u64, std::vector<int16_t> {1, 2, 2, 3},
                       std::vector<uint64_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u64, std::vector<int32_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u64, std::vector<int32_t> {1, 2, 2, 3},
                       std::vector<uint64_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u64, std::vector<int64_t> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u64, std::vector<int64_t> {1, 2, 2, 3},
                       std::vector<uint64_t> {1, 2, 2, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u64, std::vector<ngraph::float16> {1, 2, 0, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u64, std::vector<ngraph::float16> {1, 2, 0, 3},
                       std::vector<uint64_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u64, std::vector<ngraph::bfloat16> {1, 2, 0, 3},
-                      std::vector<uint64_t> {1, 2, 0, 3}),
-        ConvertParams(ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u64, std::vector<float> {1, 2, 2, 3},
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u64,
+                      std::vector<ngraph::bfloat16> {1, 2, 0, 3}, std::vector<uint64_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u64, std::vector<float> {1, 2, 2, 3},
                       std::vector<uint64_t> {1, 2, 2, 3})),
-    ReferenceConvertLayerTest::getTestCaseName);
+    ReferenceConversionLayerTest::getTestCaseName);
+} // namespace
+} // namespace ConversionOpsRefTestDefinitions
+} // namespace reference_tests
diff --git a/docs/template_plugin/tests/functional/op_reference/convert_like.cpp b/docs/template_plugin/tests/functional/op_reference/convert_like.cpp
new file mode 100644
index 00000000000..cd745c4040a
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/convert_like.cpp
@@ -0,0 +1,413 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+#include <vector>
+
+#include "conversion.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using ConversionTypes = ngraph::helpers::ConversionTypes;
+
+namespace reference_tests {
+namespace ConversionOpsRefTestDefinitions {
+namespace {
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Conversion_With_Hardcoded_Refs, ReferenceConversionLayerTest,
+    ::testing::Values(
+        // destination boolean
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 3}, ngraph::element::u8, ngraph::element::boolean,
+                      std::vector<uint8_t> {0, 12, 23, 0, std::numeric_limits<uint8_t>::lowest(), std::numeric_limits<uint8_t>::max()},
+                      std::vector<char> {0, 1, 1, 0, 0, 1}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 3}, ngraph::element::i32, ngraph::element::boolean,
+                      std::vector<int32_t> {0, -12, 23, 0, std::numeric_limits<int32_t>::lowest(), std::numeric_limits<int32_t>::max()},
+                      std::vector<char> {0, 1, 1, 0, 1, 1}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {3, 3}, ngraph::element::f32, ngraph::element::boolean,
+                      std::vector<float> {0.f, 1.5745f, 0.12352f, 0.f, std::numeric_limits<float>::lowest(), std::numeric_limits<float>::max(),
+                                          std::numeric_limits<float>::min(), std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity()},
+                      std::vector<char> {0, 1, 1, 0, 1, 1, 1, 1, 1}),
+        // destination bf16
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::bf16,
+                      std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f},
+                      std::vector<bfloat16> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::bf16,
+                      std::vector<uint8_t> {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142},
+                      std::vector<bfloat16> {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}),
+
+        // destination f16
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f16,
+                      std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f},
+                      std::vector<float16> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {11}, ngraph::element::u8, ngraph::element::f16,
+                      std::vector<uint8_t> {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142},
+                      std::vector<float16> {0, 10, 15, 20, 43, 56, 78, 99, 102, 130, 142}),
+
+        // destination f32
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u1, ngraph::element::f32,
+                      std::vector<uint8_t> {0xA0}, std::vector<float> {1.0f, 0.0f, 1.0f, 0.0f}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u4, ngraph::element::f32,
+                      std::vector<uint8_t> {0xFB, 0x0A}, std::vector<float> {15.0f, 11.0f, 0.0f, 10.0f}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u8, ngraph::element::f32,
+                      std::vector<uint8_t> {255, 128, 32, 0}, std::vector<float> {255.0f, 128.0f, 32.0f, 0.0f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u16, ngraph::element::f32,
+                      std::vector<uint16_t> {64000, 32000, 128, 0}, std::vector<float> {64000.0f, 32000.0f, 128.0f, 0.0f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u32, ngraph::element::f32,
+                      std::vector<uint32_t> {4000000, 2000000, 128, 0}, std::vector<float> {4000000.0f, 2000000.0f, 128.0f, 0.0f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::u64, ngraph::element::f32,
+                      std::vector<uint64_t> {4000000, 2000000, 128, 0}, std::vector<float> {4000000.0f, 2000000.0f, 128.0f, 0.0f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::i4, ngraph::element::f32,
+                      std::vector<uint8_t> {0xFE, 0xF2}, std::vector<float> {-1.0f, -2.0f, -1.0f, 2.0f}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::i8, ngraph::element::f32,
+                      std::vector<int8_t> {-127, -0, 0, 127}, std::vector<float> {-127.0f, -0.0f, 0.0f, 127.0f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::i16, ngraph::element::f32,
+                      std::vector<int16_t> {-32000, -0, 0, 32000}, std::vector<float> {-32000.0f, -0.0f, 0.0f, 32000.0f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::i32, ngraph::element::f32,
+                      std::vector<int32_t> {-64000, -0, 0, 64000}, std::vector<float> {-64000.0f, -0.0f, 0.0f, 64000.0f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {2, 2}, ngraph::element::i64, ngraph::element::f32,
+                      std::vector<int64_t> {-64000, -0, 0, 64000}, std::vector<float> {-64000.0f, -0.0f, 0.0f, 64000.0f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::bf16, ngraph::element::f32,
+                      std::vector<bfloat16> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f},
+                      std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f16, ngraph::element::f32,
+                      std::vector<float16> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f},
+                      std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {1, 1, 3, 5}, ngraph::element::f32, ngraph::element::f32,
+                      std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f},
+                      std::vector<float> {0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f}),
+
+        // destination i4
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::i4, std::vector<uint8_t> {0xA0},
+                      std::vector<uint8_t> {0x10, 0x10}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i4, std::vector<uint8_t> {0x12, 0x03},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i4, std::vector<uint8_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i4, std::vector<uint16_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i4, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i4, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i4, std::vector<uint8_t> {0xFE, 0x03},
+                      std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i4, std::vector<int8_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i4, std::vector<int16_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i4, std::vector<int32_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i4, std::vector<int64_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i4,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i4,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i4, std::vector<float> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        // destination i8
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i8, std::vector<uint8_t> {0x81},
+                      std::vector<int8_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i8, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int8_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i8, std::vector<uint8_t> {1, 2, 0, 3},
+                      std::vector<int8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i8, std::vector<uint16_t> {1, 2, 0, 3},
+                      std::vector<int8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i8, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<int8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i8, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<int8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i8, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int8_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i8, std::vector<int8_t> {-1, -2, 2, 3},
+                      std::vector<int8_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i8, std::vector<int16_t> {-1, -2, 2, 3},
+                      std::vector<int8_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i8, std::vector<int32_t> {-1, -2, 2, 3},
+                      std::vector<int8_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i8, std::vector<int64_t> {-1, -2, 2, 3},
+                      std::vector<int8_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i8,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<int8_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i8,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<int8_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i8, std::vector<float> {-1, -2, 2, 3},
+                      std::vector<int8_t> {-1, -2, 2, 3}),
+        // destination i16
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i16, std::vector<uint8_t> {0x81},
+                      std::vector<int16_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i16, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int16_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i16, std::vector<uint8_t> {1, 2, 0, 3},
+                      std::vector<int16_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i16, std::vector<uint16_t> {1, 2, 0, 3},
+                      std::vector<int16_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i16, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<int16_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i16, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<int16_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i16, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int16_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i16, std::vector<int8_t> {-1, -2, 2, 3},
+                      std::vector<int16_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i16, std::vector<int16_t> {-1, -2, 2, 3},
+                      std::vector<int16_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i16, std::vector<int32_t> {-1, -2, 2, 3},
+                      std::vector<int16_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i16, std::vector<int64_t> {-1, -2, 2, 3},
+                      std::vector<int16_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i16,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<int16_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i16,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<int16_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i16, std::vector<float> {-1, -2, 2, 3},
+                      std::vector<int16_t> {-1, -2, 2, 3}),
+        // destination i32
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i32, std::vector<uint8_t> {0x81},
+                      std::vector<int32_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i32, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int32_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i32, std::vector<uint8_t> {1, 2, 0, 3},
+                      std::vector<int32_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i32, std::vector<uint16_t> {1, 2, 0, 3},
+                      std::vector<int32_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i32, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<int32_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i32, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<int32_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i32, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int32_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i32, std::vector<int8_t> {-1, -2, 2, 3},
+                      std::vector<int32_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i32, std::vector<int16_t> {-1, -2, 2, 3},
+                      std::vector<int32_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i32, std::vector<int32_t> {-1, -2, 2, 3},
+                      std::vector<int32_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i32, std::vector<int64_t> {-1, -2, 2, 3},
+                      std::vector<int32_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i32,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<int32_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i32,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<int32_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i32, std::vector<float> {-1, -2, 2, 3},
+                      std::vector<int32_t> {-1, -2, 2, 3}),
+        // destination i64
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::i64, std::vector<uint8_t> {0x81},
+                      std::vector<int64_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::i64, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int64_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::i64, std::vector<uint8_t> {1, 2, 0, 3},
+                      std::vector<int64_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::i64, std::vector<uint16_t> {1, 2, 0, 3},
+                      std::vector<int64_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::i64, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<int64_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::i64, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<int64_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::i64, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<int64_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::i64, std::vector<int8_t> {-1, -2, 2, 3},
+                      std::vector<int64_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::i64, std::vector<int16_t> {-1, -2, 2, 3},
+                      std::vector<int64_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::i64, std::vector<int32_t> {-1, -2, 2, 3},
+                      std::vector<int64_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::i64, std::vector<int64_t> {-1, -2, 2, 3},
+                      std::vector<int64_t> {-1, -2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::i64,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<int64_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::i64,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<int64_t> {-1, -2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::i64, std::vector<float> {-1, -2, 2, 3},
+                      std::vector<int64_t> {-1, -2, 2, 3}),
+
+        // destination u1
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u1, std::vector<uint8_t> {0xA0},
+                      std::vector<uint8_t> {0xA0}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u4, ngraph::element::u1,
+                      std::vector<uint8_t> {0x10, 0x01, 0x00, 0x00}, std::vector<uint8_t> {0x90}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u8, ngraph::element::u1,
+                      std::vector<uint8_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u16, ngraph::element::u1,
+                      std::vector<uint16_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u32, ngraph::element::u1,
+                      std::vector<uint32_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u64, ngraph::element::u1,
+                      std::vector<uint64_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::i4, ngraph::element::u1,
+                      std::vector<uint8_t> {0x10, 0x01, 0x00, 0x00}, std::vector<uint8_t> {0x90}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::i8, ngraph::element::u1,
+                      std::vector<int8_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::i16, ngraph::element::u1,
+                      std::vector<int16_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::i32, ngraph::element::u1,
+                      std::vector<int32_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::i64, ngraph::element::u1,
+                      std::vector<int64_t> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::f16, ngraph::element::u1,
+                      std::vector<ngraph::float16> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::bf16, ngraph::element::u1,
+                      std::vector<ngraph::bfloat16> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::f32, ngraph::element::u1,
+                      std::vector<float> {1, 0, 1, 0, 0, 0, 0, 1}, std::vector<uint8_t> {0xA1}, 8, 8),
+
+        // destination u4
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u1, ngraph::element::u4, std::vector<uint8_t> {0xA0},
+                      std::vector<uint8_t> {0x10, 0x10}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u4, std::vector<uint8_t> {0x12, 0x03},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u4, std::vector<uint8_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u4, std::vector<uint16_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u4, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u4, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {0x12, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u4, std::vector<uint8_t> {0xFE, 0x03},
+                      std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u4, std::vector<int8_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u4, std::vector<int16_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u4, std::vector<int32_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u4, std::vector<int64_t> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u4,
+                      std::vector<ngraph::float16> {-1, -2, 0, 3}, std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u4,
+                      std::vector<ngraph::bfloat16> {-1, -2, 0, 3}, std::vector<uint8_t> {0xFE, 0x03}, 4, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u4, std::vector<float> {-1, -2, 2, 3},
+                      std::vector<uint8_t> {0xFE, 0x23}, 4, 4),
+
+        // destination u8
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u8, std::vector<uint8_t> {0x81},
+                      std::vector<uint8_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u8, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<uint8_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u8, std::vector<uint8_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u8, std::vector<uint16_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u8, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u8, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<uint8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u8, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<uint8_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u8, std::vector<int8_t> {1, 2, 2, 3},
+                      std::vector<uint8_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u8, std::vector<int16_t> {1, 2, 2, 3},
+                      std::vector<uint8_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u8, std::vector<int32_t> {1, 2, 2, 3},
+                      std::vector<uint8_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u8, std::vector<int64_t> {1, 2, 2, 3},
+                      std::vector<uint8_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u8,
+                      std::vector<ngraph::float16> {1, 2, 0, 3}, std::vector<uint8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u8,
+                      std::vector<ngraph::bfloat16> {1, 2, 0, 3}, std::vector<uint8_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u8, std::vector<float> {1, 2, 2, 3},
+                      std::vector<uint8_t> {1, 2, 2, 3}),
+
+        // destination u16
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u16, std::vector<uint8_t> {0x81},
+                      std::vector<uint16_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u16, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<uint16_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u16, std::vector<uint8_t> {1, 2, 0, 3},
+                      std::vector<uint16_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u16, std::vector<uint16_t> {1, 2, 0, 3},
+                      std::vector<uint16_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u16, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<uint16_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u16, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<uint16_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u16, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<uint16_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u16, std::vector<int8_t> {1, 2, 2, 3},
+                      std::vector<uint16_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u16, std::vector<int16_t> {1, 2, 2, 3},
+                      std::vector<uint16_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u16, std::vector<int32_t> {1, 2, 2, 3},
+                      std::vector<uint16_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u16, std::vector<int64_t> {1, 2, 2, 3},
+                      std::vector<uint16_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u16,
+                      std::vector<ngraph::float16> {1, 2, 0, 3}, std::vector<uint16_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u16,
+                      std::vector<ngraph::bfloat16> {1, 2, 0, 3}, std::vector<uint16_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u16, std::vector<float> {1, 2, 2, 3},
+                      std::vector<uint16_t> {1, 2, 2, 3}),
+
+        // destination u32
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u32, std::vector<uint8_t> {0x81},
+                      std::vector<uint32_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u32, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<uint32_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u32, std::vector<uint8_t> {1, 2, 0, 3},
+                      std::vector<uint32_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u32, std::vector<uint16_t> {1, 2, 0, 3},
+                      std::vector<uint32_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u32, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<uint32_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u32, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<uint32_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u32, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<uint32_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u32, std::vector<int8_t> {1, 2, 2, 3},
+                      std::vector<uint32_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u32, std::vector<int16_t> {1, 2, 2, 3},
+                      std::vector<uint32_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u32, std::vector<int32_t> {1, 2, 2, 3},
+                      std::vector<uint32_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u32, std::vector<int64_t> {1, 2, 2, 3},
+                      std::vector<uint32_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u32,
+                      std::vector<ngraph::float16> {1, 2, 0, 3}, std::vector<uint32_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u32,
+                      std::vector<ngraph::bfloat16> {1, 2, 0, 3}, std::vector<uint32_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u32, std::vector<float> {1, 2, 2, 3},
+                      std::vector<uint32_t> {1, 2, 2, 3}),
+        // destination u64
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {8}, ngraph::element::u1, ngraph::element::u64, std::vector<uint8_t> {0x81},
+                      std::vector<uint64_t> {1, 0, 0, 0, 0, 0, 0, 1}, 8),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u4, ngraph::element::u64, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<uint64_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u8, ngraph::element::u64, std::vector<uint8_t> {1, 2, 0, 3},
+                      std::vector<uint64_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u16, ngraph::element::u64, std::vector<uint16_t> {1, 2, 0, 3},
+                      std::vector<uint64_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u32, ngraph::element::u64, std::vector<uint32_t> {1, 2, 0, 3},
+                      std::vector<uint64_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::u64, ngraph::element::u64, std::vector<uint64_t> {1, 2, 0, 3},
+                      std::vector<uint64_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i4, ngraph::element::u64, std::vector<uint8_t> {0x21, 0x43},
+                      std::vector<uint64_t> {2, 1, 4, 3}, 4),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i8, ngraph::element::u64, std::vector<int8_t> {1, 2, 2, 3},
+                      std::vector<uint64_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i16, ngraph::element::u64, std::vector<int16_t> {1, 2, 2, 3},
+                      std::vector<uint64_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i32, ngraph::element::u64, std::vector<int32_t> {1, 2, 2, 3},
+                      std::vector<uint64_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::i64, ngraph::element::u64, std::vector<int64_t> {1, 2, 2, 3},
+                      std::vector<uint64_t> {1, 2, 2, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f16, ngraph::element::u64,
+                      std::vector<ngraph::float16> {1, 2, 0, 3}, std::vector<uint64_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::bf16, ngraph::element::u64,
+                      std::vector<ngraph::bfloat16> {1, 2, 0, 3}, std::vector<uint64_t> {1, 2, 0, 3}),
+        ConvertParams(ConversionTypes::CONVERT_LIKE, ngraph::PartialShape {4}, ngraph::element::f32, ngraph::element::u64, std::vector<float> {1, 2, 2, 3},
+                      std::vector<uint64_t> {1, 2, 2, 3})),
+    ReferenceConversionLayerTest::getTestCaseName);
+} // namespace
+} // namespace ConversionOpsRefTestDefinitions
+} // namespace reference_tests
diff --git a/docs/template_plugin/tests/functional/op_reference/equal.cpp b/docs/template_plugin/tests/functional/op_reference/equal.cpp
new file mode 100644
index 00000000000..d80ec3271fb
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/equal.cpp
@@ -0,0 +1,84 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+
+#include "comparison.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using ComparisonTypes = ngraph::helpers::ComparisonTypes;
+
+
+namespace reference_tests {
+namespace ComparisonOpsRefTestDefinitions {
+namespace {
+
+TEST_P(ReferenceComparisonLayerTest, EqualCompareWithHardcodedRefs) {
+    Exec();
+}
+
+template <element::Type_t IN_ET>
+std::vector<RefComparisonParams> generateComparisonParams(const element::Type& type) {
+    using T = typename element_type_traits<IN_ET>::value_type;
+    std::vector<RefComparisonParams> compParams {
+        // 1D // 2D // 3D // 4D
+        Builder {}
+            .compType(ComparisonTypes::EQUAL)
+            .input1({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
+            .input2({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
+            .expected({{2, 2}, element::boolean, std::vector<char> {1, 1, 1, 1}}),
+        Builder {}
+            .compType(ComparisonTypes::EQUAL)
+            .input1({{2, 3}, type, std::vector<T> {0, 6, 45, 1, 21, 21}})
+            .input2({{2, 3}, type, std::vector<T> {1, 18, 23, 1, 19, 21}})
+            .expected({{2, 3}, element::boolean, std::vector<char> {0, 0, 0, 1, 0, 1}}),
+        Builder {}
+            .compType(ComparisonTypes::EQUAL)
+            .input1({{1}, type, std::vector<T> {53}})
+            .input2({{1}, type, std::vector<T> {53}})
+            .expected({{1}, element::boolean, std::vector<char> {1}}),
+        Builder {}
+            .compType(ComparisonTypes::EQUAL)
+            .input1({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 1, 5, 11, 8}})
+            .input2({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 10, 5, 11, 8}})
+            .expected({{2, 4}, element::boolean, std::vector<char> {1, 1, 1, 1, 0, 1, 1, 1}}),
+        Builder {}
+            .compType(ComparisonTypes::EQUAL)
+            .input1({{3, 1, 2}, type, std::vector<T> {2, 1, 4, 1, 3, 1}})
+            .input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
+            .expected({{3, 2, 2}, element::boolean, std::vector<char> {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}),
+        Builder {}
+            .compType(ComparisonTypes::EQUAL)
+            .input1({{2, 1, 2, 1}, type, std::vector<T> {2, 1, 4, 1}})
+            .input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
+            .expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {0, 1, 0, 1}})};
+    return compParams;
+}
+
+std::vector<RefComparisonParams> generateComparisonCombinedParams() {
+    const std::vector<std::vector<RefComparisonParams>> compTypeParams {
+        generateComparisonParams<element::Type_t::f32>(element::f32),
+        generateComparisonParams<element::Type_t::f16>(element::f16),
+        generateComparisonParams<element::Type_t::i32>(element::i32),
+        generateComparisonParams<element::Type_t::u32>(element::u32),
+        generateComparisonParams<element::Type_t::u8>(element::boolean)};
+    std::vector<RefComparisonParams> combinedParams;
+
+    for (const auto& params : compTypeParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()),
+                         ReferenceComparisonLayerTest::getTestCaseName);
+} // namespace
+} // namespace ComparisonOpsRefTestDefinitions
+} // namespace reference_tests
\ No newline at end of file
diff --git a/docs/template_plugin/tests/functional/op_reference/erf.cpp b/docs/template_plugin/tests/functional/op_reference/erf.cpp
new file mode 100644
index 00000000000..bd888a8e03c
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/erf.cpp
@@ -0,0 +1,94 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <limits>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+#include <tuple>
+
+#include "base_reference_test.hpp"
+
+using namespace reference_tests;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+struct ErfParams {
+    template <class IT>
+    ErfParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector<IT>& iValues)
+        : pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)) {
+        std::vector<IT> oValues;
+        std::vector<double> output;
+        for (auto element : iValues)
+            output.push_back(static_cast<double>(element));
+
+        std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double {
+            return std::erf(input);
+        });
+
+        if (std::is_integral<IT>()) {
+            std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double {
+                return std::round(input);
+            });
+        }
+
+        for (auto element : output)
+            oValues.push_back(static_cast<IT>(element));
+        refData = CreateBlob(outType, oValues);
+    }
+    ngraph::PartialShape pshape;
+    ngraph::element::Type inType;
+    ngraph::element::Type outType;
+    InferenceEngine::Blob::Ptr inputData;
+    InferenceEngine::Blob::Ptr refData;
+};
+
+class ReferenceErfLayerTest : public testing::TestWithParam<ErfParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params.pshape, params.inType, params.outType);
+        inputData = {params.inputData};
+        refOutData = {params.refData};
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<ErfParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "shape=" << param.pshape << "_";
+        result << "iType=" << param.inType << "_";
+        result << "oType=" << param.outType;
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const PartialShape& input_shape, const element::Type& input_type,
+                                                    const element::Type& expected_output_type) {
+        const auto in = std::make_shared<op::Parameter>(input_type, input_shape);
+        const auto erf = std::make_shared<op::Erf>(in);
+        return std::make_shared<Function>(NodeVector {erf}, ParameterVector {in});
+    }
+};
+
+TEST_P(ReferenceErfLayerTest, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_Erf_With_Hardcoded_Refs, ReferenceErfLayerTest,
+    ::testing::Values(ErfParams(ngraph::PartialShape {2, 5}, ngraph::element::f32,
+                                std::vector<float> {-INFINITY, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, INFINITY}),
+                      ErfParams(ngraph::PartialShape {2, 5}, ngraph::element::f16,
+                                std::vector<float16> {-INFINITY, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, INFINITY}),
+                      ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::i32,
+                                std::vector<int32_t> {std::numeric_limits<int32_t>::min(), -2, -1, 1, 2, std::numeric_limits<int32_t>::max()}),
+                      ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::u32,
+                                std::vector<uint32_t> {std::numeric_limits<uint32_t>::min(), 0, 1, 2, 3, std::numeric_limits<uint32_t>::max()}),
+                      ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::i64,
+                                std::vector<int64_t> {std::numeric_limits<int64_t>::min(), -2, -1, 1, 2, std::numeric_limits<int64_t>::max()}),
+                      ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::u64,
+                                std::vector<uint64_t> {std::numeric_limits<uint64_t>::min(), 0, 1, 2, 3, std::numeric_limits<uint64_t>::max()})),
+    ReferenceErfLayerTest::getTestCaseName);
diff --git a/docs/template_plugin/tests/functional/op_reference/less.cpp b/docs/template_plugin/tests/functional/op_reference/less.cpp
new file mode 100644
index 00000000000..5d01cdfab64
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/less.cpp
@@ -0,0 +1,82 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+
+#include "comparison.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using ComparisonTypes = ngraph::helpers::ComparisonTypes;
+
+namespace reference_tests {
+namespace ComparisonOpsRefTestDefinitions {
+namespace {
+TEST_P(ReferenceComparisonLayerTest, LessCompareWithHardcodedRefs) {
+    Exec();
+}
+
+template <element::Type_t IN_ET>
+std::vector<RefComparisonParams> generateComparisonParams(const element::Type& type) {
+    using T = typename element_type_traits<IN_ET>::value_type;
+    std::vector<RefComparisonParams> compParams {
+        // 1D // 2D // 3D // 4D
+        Builder {}
+            .compType(ComparisonTypes::LESS)
+            .input1({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
+            .input2({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
+            .expected({{2, 2}, element::boolean, std::vector<char> {0, 0, 0, 0}}),
+        Builder {}
+            .compType(ComparisonTypes::LESS)
+            .input1({{2, 3}, type, std::vector<T> {0, 6, 45, 1, 21, 21}})
+            .input2({{2, 3}, type, std::vector<T> {1, 18, 23, 1, 19, 21}})
+            .expected({{2, 3}, element::boolean, std::vector<char> {1, 1, 0, 0, 0, 0}}),
+        Builder {}
+            .compType(ComparisonTypes::LESS)
+            .input1({{1}, type, std::vector<T> {53}})
+            .input2({{1}, type, std::vector<T> {53}})
+            .expected({{1}, element::boolean, std::vector<char> {0}}),
+        Builder {}
+            .compType(ComparisonTypes::LESS)
+            .input1({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 1, 5, 11, 8}})
+            .input2({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 10, 5, 11, 8}})
+            .expected({{2, 4}, element::boolean, std::vector<char> {0, 0, 0, 0, 1, 0, 0, 0}}),
+        Builder {}
+            .compType(ComparisonTypes::LESS)
+            .input1({{3, 1, 2}, type, std::vector<T> {2, 1, 4, 1, 3, 1}})
+            .input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
+            .expected({{3, 2, 2}, element::boolean, std::vector<char> {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}),
+        Builder {}
+            .compType(ComparisonTypes::LESS)
+            .input1({{2, 1, 2, 1}, type, std::vector<T> {2, 1, 4, 1}})
+            .input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
+            .expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {0, 0, 0, 0}})};
+    return compParams;
+}
+
+std::vector<RefComparisonParams> generateComparisonCombinedParams() {
+    const std::vector<std::vector<RefComparisonParams>> compTypeParams {
+        generateComparisonParams<element::Type_t::f32>(element::f32),
+        generateComparisonParams<element::Type_t::f16>(element::f16),
+        generateComparisonParams<element::Type_t::i32>(element::i32),
+        generateComparisonParams<element::Type_t::u32>(element::u32),
+        generateComparisonParams<element::Type_t::u8>(element::boolean)};
+    std::vector<RefComparisonParams> combinedParams;
+
+    for (const auto& params : compTypeParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+} // namespace
+INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()),
+                         ReferenceComparisonLayerTest::getTestCaseName);
+} // namespace ComparisonOpsRefTestDefinitions
+} // namespace reference_tests
\ No newline at end of file
diff --git a/docs/template_plugin/tests/functional/op_reference/less_eq.cpp b/docs/template_plugin/tests/functional/op_reference/less_eq.cpp
new file mode 100644
index 00000000000..f530867f847
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/less_eq.cpp
@@ -0,0 +1,82 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+
+#include "comparison.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using ComparisonTypes = ngraph::helpers::ComparisonTypes;
+
+namespace reference_tests {
+namespace ComparisonOpsRefTestDefinitions {
+namespace {
+TEST_P(ReferenceComparisonLayerTest, LessEqualCompareWithHardcodedRefs) {
+    Exec();
+}
+
+template <element::Type_t IN_ET>
+std::vector<RefComparisonParams> generateComparisonParams(const element::Type& type) {
+    using T = typename element_type_traits<IN_ET>::value_type;
+    std::vector<RefComparisonParams> compParams {
+        // 1D // 2D // 3D // 4D
+        Builder {}
+            .compType(ComparisonTypes::LESS_EQUAL)
+            .input1({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
+            .input2({{2, 2}, type, std::vector<T> {0, 12, 23, 0}})
+            .expected({{2, 2}, element::boolean, std::vector<char> {1, 1, 1, 1}}),
+        Builder {}
+            .compType(ComparisonTypes::LESS_EQUAL)
+            .input1({{2, 3}, type, std::vector<T> {0, 6, 45, 1, 21, 21}})
+            .input2({{2, 3}, type, std::vector<T> {1, 18, 23, 1, 19, 21}})
+            .expected({{2, 3}, element::boolean, std::vector<char> {1, 1, 0, 1, 0, 1}}),
+        Builder {}
+            .compType(ComparisonTypes::LESS_EQUAL)
+            .input1({{1}, type, std::vector<T> {53}})
+            .input2({{1}, type, std::vector<T> {53}})
+            .expected({{1}, element::boolean, std::vector<char> {1}}),
+        Builder {}
+            .compType(ComparisonTypes::LESS_EQUAL)
+            .input1({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 1, 5, 11, 8}})
+            .input2({{2, 4}, type, std::vector<T> {0, 12, 23, 0, 10, 5, 11, 8}})
+            .expected({{2, 4}, element::boolean, std::vector<char> {1, 1, 1, 1, 1, 1, 1, 1}}),
+        Builder {}
+            .compType(ComparisonTypes::LESS_EQUAL)
+            .input1({{3, 1, 2}, type, std::vector<T> {2, 1, 4, 1, 3, 1}})
+            .input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
+            .expected({{3, 2, 2}, element::boolean, std::vector<char> {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}}),
+        Builder {}
+            .compType(ComparisonTypes::LESS_EQUAL)
+            .input1({{2, 1, 2, 1}, type, std::vector<T> {2, 1, 4, 1}})
+            .input2({{1, 2, 1}, type, std::vector<T> {1, 1}})
+            .expected({{2, 1, 2, 1}, element::boolean, std::vector<char> {0, 1, 0, 1}})};
+    return compParams;
+}
+
+std::vector<RefComparisonParams> generateComparisonCombinedParams() {
+    const std::vector<std::vector<RefComparisonParams>> compTypeParams {
+        generateComparisonParams<element::Type_t::f32>(element::f32),
+        generateComparisonParams<element::Type_t::f16>(element::f16),
+        generateComparisonParams<element::Type_t::i32>(element::i32),
+        generateComparisonParams<element::Type_t::u32>(element::u32),
+        generateComparisonParams<element::Type_t::u8>(element::boolean)};
+    std::vector<RefComparisonParams> combinedParams;
+
+    for (const auto& params : compTypeParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+} // namespace
+INSTANTIATE_TEST_SUITE_P(smoke_Comparison_With_Hardcoded_Refs, ReferenceComparisonLayerTest, ::testing::ValuesIn(generateComparisonCombinedParams()),
+                         ReferenceComparisonLayerTest::getTestCaseName);
+} // namespace ComparisonOpsRefTestDefinitions
+} // namespace reference_tests
\ No newline at end of file
diff --git a/docs/template_plugin/tests/functional/op_reference/logical_and.cpp b/docs/template_plugin/tests/functional/op_reference/logical_and.cpp
new file mode 100644
index 00000000000..0313874533e
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/logical_and.cpp
@@ -0,0 +1,83 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+#include <tuple>
+
+#include "base_reference_test.hpp"
+
+using namespace reference_tests;
+using namespace ngraph;
+using namespace InferenceEngine;
+
+
+struct LogicalAndParams {
+    template <class IT, class OT>
+    LogicalAndParams(const ngraph::PartialShape& input_shape1, const ngraph::PartialShape& input_shape2 ,
+                     const std::vector<IT>& iValues1, const std::vector<IT>& iValues2, const std::vector<OT>& oValues)
+        : pshape1(input_shape1), pshape2(input_shape2), inType(ngraph::element::boolean), outType(ngraph::element::boolean),
+          inputData1(CreateBlob(ngraph::element::boolean, iValues1)), inputData2(CreateBlob(ngraph::element::boolean, iValues2)),
+          refData(CreateBlob(ngraph::element::boolean, oValues)) {}
+    ngraph::PartialShape pshape1;
+    ngraph::PartialShape pshape2;
+    ngraph::element::Type inType;
+    ngraph::element::Type outType;
+    InferenceEngine::Blob::Ptr inputData1;
+    InferenceEngine::Blob::Ptr inputData2;
+    InferenceEngine::Blob::Ptr refData;
+};
+
+class ReferenceLogicalAndLayerTest : public testing::TestWithParam<LogicalAndParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params.pshape1, params.pshape2, params.inType);
+        inputData = {params.inputData1, params.inputData2};
+        refOutData = {params.refData};
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<LogicalAndParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "input_shape1=" << param.pshape1 << "_";
+        result << "input_shape2=" << param.pshape2 << "_";
+        result << "iType=" << param.inType << "_";
+        result << "oType=" << param.outType;
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const PartialShape& input_shape1,
+    const PartialShape& input_shape2, const element::Type& input_type) {
+        const auto in = std::make_shared<op::Parameter>(input_type, input_shape1);
+        const auto in2 = std::make_shared<op::Parameter>(input_type, input_shape2);
+        const auto logical_and = std::make_shared<op::v1::LogicalAnd>(in, in2);
+        return std::make_shared<Function>(NodeVector {logical_and}, ParameterVector {in, in2});
+    }
+};
+
+TEST_P(ReferenceLogicalAndLayerTest, CompareWithHardcodedRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_LogicalAnd_With_Hardcoded_Refs, ReferenceLogicalAndLayerTest,
+    ::testing::Values(
+        LogicalAndParams(ngraph::PartialShape {2, 2}, ngraph::PartialShape {2, 2},
+                std::vector<char> {true, false, true, false},
+                std::vector<char> {false, true, true, false},
+                std::vector<char> {false, false, true, false}),
+        LogicalAndParams(ngraph::PartialShape {2, 1, 2, 1}, ngraph::PartialShape {1, 1, 2, 1},
+                std::vector<char> {true, false, true, false},
+                std::vector<char> {true, false},
+                std::vector<char> {true, false, true, false}),
+        LogicalAndParams(ngraph::PartialShape {3, 4}, ngraph::PartialShape {3, 4},
+                std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, true},
+                std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, false},
+                std::vector<char> {true, true, true, true, true, false, true, false, false, true, true, false})),
+    ReferenceLogicalAndLayerTest::getTestCaseName);
diff --git a/docs/template_plugin/tests/functional/op_reference/roi_pooling.cpp b/docs/template_plugin/tests/functional/op_reference/roi_pooling.cpp
new file mode 100644
index 00000000000..9baedeb3404
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/roi_pooling.cpp
@@ -0,0 +1,226 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <tuple>
+
+#include "base_reference_test.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace reference_tests;
+
+struct ROIPoolingParams {
+    template <class T>
+    ROIPoolingParams(const size_t iH, const size_t iW, const size_t ch, const size_t rois,
+                     const size_t oH, const size_t oW, const float sS, const std::string mode,
+                     const ngraph::element::Type& type, const std::vector<T>& inputValues,
+                     const std::vector<T>& proposalValues, const std::vector<T>& outputValues)
+            : inputH(iH), inputW(iW), channelCount(ch), roiCount(rois), outputH(oH), outputW(oW), spatialScale(sS),
+              poolingMode(mode), dataType(type), featureMap(CreateBlob(type, inputValues)),
+              proposal(CreateBlob(type, proposalValues)), refData(CreateBlob(type, outputValues)) {}
+    size_t inputH;
+    size_t inputW;
+    size_t channelCount;
+    size_t roiCount;
+    size_t outputH;
+    size_t outputW;
+    float spatialScale;
+    std::string poolingMode;
+    ngraph::element::Type dataType;
+    InferenceEngine::Blob::Ptr featureMap;
+    InferenceEngine::Blob::Ptr proposal;
+    InferenceEngine::Blob::Ptr refData;
+
+public:
+    template<class T>
+    inline static std::vector<T> increasinglyFilledBlob(size_t size) {
+        std::vector<T> inputValues;
+        T one = 1;
+        for (size_t i = 0; i < size; i++) {
+            inputValues.push_back(one * i / 10);
+        }
+        return inputValues;
+    }
+    template<class T>
+    inline static std::vector<T> equallyFilledBlob(size_t size, T value) {
+        std::vector<T> inputValues;
+        for (size_t i = 0; i < size; i++) {
+            inputValues.push_back(value);
+        }
+        return inputValues;
+    }
+};
+
+class ReferenceRoiPoolingLayerTest : public testing::TestWithParam<ROIPoolingParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params.inputH, params.inputW, params.channelCount, params.roiCount,
+                                  params.outputH, params.outputW, params.spatialScale, params.poolingMode, params.dataType);
+        inputData = {params.featureMap, params.proposal};
+        refOutData = {params.refData};
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<ROIPoolingParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "IS=" << param.inputH << "," << param.inputW << "_";
+        result << "OS=" << param.outputH << "," << param.outputW << "_";
+        result << "Ch=" << param.channelCount << "_";
+        result << "Rois=" << param.roiCount << "_";
+        result << "Ss=" << param.spatialScale << "_";
+        result << "Mode=" << param.poolingMode << "_";
+        result << "Prec=" << param.dataType << "_";
+        result << std::to_string(obj.index);
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const size_t i_h, const size_t i_w, const size_t ch, const size_t roi_count,
+                                                    const size_t o_h, const size_t o_w, const float spat_scale, const std::string mode,
+                                                    const ngraph::element::Type& type) {
+        Shape feat_map_shape{1, ch, i_h, i_w};
+        Shape rois_shape{roi_count, 5};
+        Shape pooled_shape{o_h, o_w};
+        Shape output_shape{roi_count, ch, o_h, o_w};
+
+        const auto feat_map = std::make_shared<op::Parameter>(type, feat_map_shape);
+        const auto rois = std::make_shared<op::Parameter>(type, rois_shape);
+        const auto roi_pooling = std::make_shared<op::v0::ROIPooling>(feat_map, rois, pooled_shape, spat_scale, mode);
+        return std::make_shared<Function>(roi_pooling, ParameterVector{feat_map, rois});
+    }
+};
+
+TEST_P(ReferenceRoiPoolingLayerTest, CompareWithHardcodedRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(
+        smoke_ROIPooling_With_Hardcoded_Refs, ReferenceRoiPoolingLayerTest,
+        ::testing::Values(
+                // fp32
+                // roi_pooling_1x1_max
+                ROIPoolingParams(6, 6,              // iH, iW
+                                 3, 3,              // channels, rois
+                                 1, 1,              // oH, oW
+                                 1.f, "max",        // scale, mode
+                                 element::f32, ROIPoolingParams::increasinglyFilledBlob<float>(3 * 6 * 6),
+                                 std::vector<float> {0, 1, 1, 2, 3, 0, 1, 1, 2, 3, 0, 1, 1, 2, 3},
+                                 std::vector<float> {2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f}),
+                // roi_pooling_2x2_max
+                ROIPoolingParams(6, 6,              // iH, iW
+                                 1, 3,              // channels, rois
+                                 2, 2,              // oH, oW
+                                 1.f, "max",        // scale, mode
+                                 element::f32, ROIPoolingParams::increasinglyFilledBlob<float>(1 * 6 * 6),
+                                 std::vector<float> {0, 1, 1, 3, 3, 0, 1, 2, 2, 4, 0, 0, 1, 4, 5},
+                                 std::vector<float> {1.4f, 1.5f, 2.0f, 2.1f, 1.9f, 2.0f, 2.5f, 2.6f, 2.0f, 2.2f, 3.2f, 3.4f}),
+                // roi_pooling_1x1_bilinear
+                ROIPoolingParams(6, 6,              // iH, iW
+                                 3, 2,              // channels, rois
+                                 1, 1,              // oH, oW
+                                 1.f, "bilinear",   // scale, mode
+                                 element::f32, ROIPoolingParams::increasinglyFilledBlob<float>(3 * 6 * 6),
+                                 std::vector<float> {0, 0.2, 0.2, 0.4, 0.4, 0, 0.2, 0.2, 0.6, 0.6},
+                                 std::vector<float> {1.05f, 4.65f, 8.25f, 1.4f, 5.0f, 8.6f}),
+                // roi_pooling_2x2_bilinear
+                ROIPoolingParams(8, 8,              // iH, iW
+                                 1, 3,              // channels, rois
+                                 2, 2,              // oH, oW
+                                 1.f, "bilinear",   // scale, mode
+                                 element::f32, ROIPoolingParams::increasinglyFilledBlob<float>(1 * 8 * 8),
+                                 std::vector<float> {0.f, 0.15f, 0.2f, 0.75f, 0.8f,
+                                                     0.f, 0.15f, 0.2f, 0.75f, 0.8f,
+                                                     0.f, 0.15f, 0.2f, 0.75f, 0.8f},
+                                 std::vector<float> {1.225f, 1.645f, 4.585f, 5.005f,
+                                                     1.225f, 1.645f, 4.585f, 5.005f,
+                                                     1.225f, 1.645f, 4.585f, 5.005f}),
+                // roi_pooling_2x2_bilinear_border_proposal
+                ROIPoolingParams(50, 50,            // iH, iW
+                                 1, 1,              // channels, rois
+                                 4, 4,              // oH, oW
+                                 1.f, "bilinear",   // scale, mode
+                                 element::f32, ROIPoolingParams::equallyFilledBlob<float>(1 * 50 * 50, 1),
+                                 std::vector<float> {0.f, 0.f, 0.248046786f, 0.471333951f, 1.f},
+                                 std::vector<float>(16, 1.f)),
+
+                // bf16
+                // roi_pooling_1x1_max
+                ROIPoolingParams(6, 6,              // iH, iW
+                                 3, 3,              // channels, rois
+                                 1, 1,              // oH, oW
+                                 1.f, "max",        // scale, mode
+                                 element::bf16, ROIPoolingParams::increasinglyFilledBlob<bfloat16>(3 * 6 * 6),
+                                 std::vector<bfloat16> {0, 1, 1, 2, 3, 0, 1, 1, 2, 3, 0, 1, 1, 2, 3},
+                                 std::vector<bfloat16> {2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f}),
+                // roi_pooling_2x2_max
+                ROIPoolingParams(6, 6,              // iH, iW
+                                 1, 3,              // channels, rois
+                                 2, 2,              // oH, oW
+                                 1.f, "max",        // scale, mode
+                                 element::bf16, ROIPoolingParams::increasinglyFilledBlob<bfloat16>(1 * 6 * 6),
+                                 std::vector<bfloat16> {0, 1, 1, 3, 3, 0, 1, 2, 2, 4, 0, 0, 1, 4, 5},
+                                 std::vector<bfloat16> {1.4f, 1.5f, 2.0f, 2.1f, 1.9f, 2.0f, 2.5f, 2.6f, 2.0f, 2.2f, 3.2f, 3.4f}),
+                // roi_pooling_1x1_bilinear
+                ROIPoolingParams(6, 6,              // iH, iW
+                                 3, 2,              // channels, rois
+                                 1, 1,              // oH, oW
+                                 1.f, "bilinear",   // scale, mode
+                                 element::bf16, ROIPoolingParams::increasinglyFilledBlob<bfloat16>(3 * 6 * 6),
+                                 std::vector<bfloat16> {0, 0.2, 0.2, 0.4, 0.4, 0, 0.2, 0.2, 0.6, 0.6},
+                                 std::vector<bfloat16> {1.05f, 4.65f, 8.25f, 1.4f, 5.0f, 8.6f}),
+                // roi_pooling_2x2_bilinear
+                ROIPoolingParams(8, 8,              // iH, iW
+                                 1, 3,              // channels, rois
+                                 2, 2,              // oH, oW
+                                 1.f, "bilinear",   // scale, mode
+                                 element::bf16, ROIPoolingParams::increasinglyFilledBlob<bfloat16>(1 * 8 * 8),
+                                 std::vector<bfloat16> {0.f, 0.15f, 0.2f, 0.75f, 0.8f,
+                                                     0.f, 0.15f, 0.2f, 0.75f, 0.8f,
+                                                     0.f, 0.15f, 0.2f, 0.75f, 0.8f},
+                                 std::vector<bfloat16> {1.225f, 1.645f, 4.585f, 4.937f,
+                                                     1.225f, 1.645f, 4.585f, 4.937f,
+                                                     1.225f, 1.645f, 4.585f, 4.937f}),
+                // fp16
+                // roi_pooling_1x1_max
+                ROIPoolingParams(6, 6,              // iH, iW
+                                 3, 3,              // channels, rois
+                                 1, 1,              // oH, oW
+                                 1.f, "max",        // scale, mode
+                                 element::f16, ROIPoolingParams::increasinglyFilledBlob<float16>(3 * 6 * 6),
+                                 std::vector<float16> {0, 1, 1, 2, 3, 0, 1, 1, 2, 3, 0, 1, 1, 2, 3},
+                                 std::vector<float16> {2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f}),
+                // roi_pooling_2x2_max
+                ROIPoolingParams(6, 6,              // iH, iW
+                                 1, 3,              // channels, rois
+                                 2, 2,              // oH, oW
+                                 1.f, "max",        // scale, mode
+                                 element::f16, ROIPoolingParams::increasinglyFilledBlob<float16>(1 * 6 * 6),
+                                 std::vector<float16> {0, 1, 1, 3, 3, 0, 1, 2, 2, 4, 0, 0, 1, 4, 5},
+                                 std::vector<float16> {1.4f, 1.5f, 2.0f, 2.1f, 1.9f, 2.0f, 2.5f, 2.6f, 2.0f, 2.2f, 3.2f, 3.4f}),
+                // roi_pooling_1x1_bilinear
+                ROIPoolingParams(6, 6,              // iH, iW
+                                 3, 2,              // channels, rois
+                                 1, 1,              // oH, oW
+                                 1.f, "bilinear",   // scale, mode
+                                 element::f16, ROIPoolingParams::increasinglyFilledBlob<float16>(3 * 6 * 6),
+                                 std::vector<float16> {0, 0.2, 0.2, 0.4, 0.4, 0, 0.2, 0.2, 0.6, 0.6},
+                                 std::vector<float16> {1.05f, 4.65f, 8.25f, 1.4f, 5.0f, 8.6f}),
+                // roi_pooling_2x2_bilinear
+                ROIPoolingParams(8, 8,              // iH, iW
+                                 1, 3,              // channels, rois
+                                 2, 2,              // oH, oW
+                                 1.f, "bilinear",   // scale, mode
+                                 element::f16, ROIPoolingParams::increasinglyFilledBlob<float16>(1 * 8 * 8),
+                                 std::vector<float16> {0.f, 0.15f, 0.2f, 0.75f, 0.8f,
+                                                     0.f, 0.15f, 0.2f, 0.75f, 0.8f,
+                                                     0.f, 0.15f, 0.2f, 0.75f, 0.8f},
+                                 std::vector<float16> {1.225f, 1.645f, 4.585f, 5.005f,
+                                                     1.225f, 1.645f, 4.585f, 5.005f,
+                                                     1.225f, 1.645f, 4.585f, 5.005f})),
+        ReferenceRoiPoolingLayerTest::getTestCaseName);
diff --git a/docs/template_plugin/tests/functional/op_reference/tan.cpp b/docs/template_plugin/tests/functional/op_reference/tan.cpp
new file mode 100644
index 00000000000..5be7a7ad03c
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/tan.cpp
@@ -0,0 +1,85 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <ie_core.hpp>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph/ngraph.hpp>
+#include <shared_test_classes/base/layer_test_utils.hpp>
+#include <tuple>
+
+#include "base_reference_test.hpp"
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace reference_tests;
+
+namespace {
+struct TanParams {
+    template <class IT>
+    TanParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector<IT>& iValues,
+              const std::vector<IT>& oValues)
+        :pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)), refData(CreateBlob(iType, oValues)) {}
+    ngraph::PartialShape pshape;
+    ngraph::element::Type inType;
+    ngraph::element::Type outType;
+    InferenceEngine::Blob::Ptr inputData;
+    InferenceEngine::Blob::Ptr refData;
+};
+
+class ReferenceTanLayerTest : public testing::TestWithParam<TanParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params.pshape, params.inType);
+        inputData = {params.inputData};
+        refOutData = {params.refData};
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<TanParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "shape=" << param.pshape << "_";
+        result << "iType=" << param.inType << "_";
+        result << "oType=" << param.outType;
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const PartialShape& input_shape, const element::Type& input_type) {
+        const auto in = std::make_shared<op::Parameter>(input_type, input_shape);
+        const auto tan = std::make_shared<op::Tan>(in);
+        return std::make_shared<Function>(tan, ParameterVector {in});
+    }
+};
+
+TEST_P(ReferenceTanLayerTest, CompareWithHardcodedRefs) {
+    Exec();
+}
+
+std::vector<TanParams> generateTanCombinedParams() {
+    std::vector<TanParams> combinedParams {
+        TanParams(ngraph::PartialShape {5}, ngraph::element::i32, std::vector<int32_t> {-2, -1, 0, 1, 2},
+                  std::vector<int32_t> {2, -2, 0, 2, -2}),
+        TanParams(ngraph::PartialShape {5}, ngraph::element::i64, std::vector<int64_t> {-2, -1, 0, 1, 2},
+                  std::vector<int64_t> {2, -2, 0, 2, -2}),
+        TanParams(ngraph::PartialShape {5}, ngraph::element::u32, std::vector<uint32_t> {1, 2, 3, 4, 5},
+                  std::vector<uint32_t> {2, 0xFFFFFFFF - 1, 0, 1, 0xFFFFFFFF - 2}),
+        TanParams(ngraph::PartialShape {5}, ngraph::element::u64, std::vector<uint64_t> {1, 2, 3, 4, 5},
+                  std::vector<uint64_t> {2, 0xFFFFFFFFFFFFFFFF - 1, 0, 1, 0xFFFFFFFFFFFFFFFF - 2}),
+        TanParams(ngraph::PartialShape {11}, ngraph::element::f32, std::vector<float> {0.f, 0.25f,
+                      -0.25f, 0.5f, -0.5f, 1.f, -1.f, 2.f, -2.f, 4.f, -4.f},
+                  std::vector<float> {0.00000000f, 0.25534192f, -0.25534192f, 0.54630249f, -0.54630249f,
+                       1.55740772f, -1.55740772f, -2.18503986f, 2.18503986f, 1.15782128f, -1.15782128f}),
+        TanParams(ngraph::PartialShape {11}, ngraph::element::f16, std::vector<float16> {0.f, 0.25f,
+                      -0.25f, 0.5f, -0.5f, 1.f, -1.f, 2.f, -2.f, 4.f, -4.f},
+                  std::vector<float16> {0.00000000f, 0.25534192f, -0.25534192f, 0.54630249f, -0.54630249f,
+                       1.55740772f, -1.55740772f, -2.18503986f, 2.18503986f, 1.15782128f, -1.15782128f})
+    };
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TAN_With_Hardcoded_Refs, ReferenceTanLayerTest, ::testing::ValuesIn(generateTanCombinedParams()),
+                         ReferenceTanLayerTest::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/cmake/ie_parallel.cmake b/inference-engine/cmake/ie_parallel.cmake
index d33a73a5fa7..eb844d25b76 100644
--- a/inference-engine/cmake/ie_parallel.cmake
+++ b/inference-engine/cmake/ie_parallel.cmake
@@ -29,6 +29,7 @@ function(set_ie_threading_interface_for TARGET_NAME)
         set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
         set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE)
         if (NOT TBB_FOUND)
+            set(THREADING "SEQ" PARENT_SCOPE)
             ext_message(WARNING "TBB was not found by the configured TBB_DIR/TBBROOT path.\
                                 SEQ method will be used.")
         endif ()
@@ -95,6 +96,7 @@ function(set_ie_threading_interface_for TARGET_NAME)
             set(IE_THREAD_DEFINE "IE_THREAD_TBB")
             ie_target_link_libraries(${TARGET_NAME} ${LINK_TYPE} ${TBB_IMPORTED_TARGETS})
         else ()
+            set(THREADING "SEQ" PARENT_SCOPE)
             ext_message(WARNING "TBB was not found by the configured TBB_DIR path.\
                                  SEQ method will be used for ${TARGET_NAME}")
         endif ()
@@ -133,6 +135,7 @@ function(set_ie_threading_interface_for TARGET_NAME)
 
         if (NOT OMP_LIBRARIES_RELEASE)
             ext_message(WARNING "Intel OpenMP not found. Intel OpenMP support will be disabled. ${IE_THREAD_DEFINE} is defined")
+            set(THREADING "SEQ" PARENT_SCOPE)
         else ()
             set(IE_THREAD_DEFINE "IE_THREAD_OMP")
 
diff --git a/inference-engine/ie_bridges/c/src/CMakeLists.txt b/inference-engine/ie_bridges/c/src/CMakeLists.txt
index 69760a52de9..a0e1b3469c9 100644
--- a/inference-engine/ie_bridges/c/src/CMakeLists.txt
+++ b/inference-engine/ie_bridges/c/src/CMakeLists.txt
@@ -14,7 +14,7 @@ add_library(${TARGET_NAME} SHARED ${HEADERS} ${SOURCES})
 target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
 
 target_include_directories(${TARGET_NAME} PUBLIC
-    $<INSTALL_INTERFACE:${IE_CPACK_IE_DIR}/include>
+    $<INSTALL_INTERFACE:${IE_CPACK_IE_DIR}/include/ie>
     $<BUILD_INTERFACE:${InferenceEngine_C_API_SOURCE_DIR}/include>)
 
 add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
@@ -40,5 +40,5 @@ install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets
         LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core_c)
 
 install(DIRECTORY ${InferenceEngine_C_API_SOURCE_DIR}/include/
-        DESTINATION ${IE_CPACK_IE_DIR}/include
+        DESTINATION ${IE_CPACK_IE_DIR}/include/ie
         COMPONENT core_c_dev)
diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt
index 15d248379d7..a88b1017a12 100644
--- a/inference-engine/ie_bridges/python/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/CMakeLists.txt
@@ -58,6 +58,13 @@ else()
     endif()
 endif()
 
+function(ov_python_disable_intel_warnings target)
+    if(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+        # 1292: unknown attribute "fallthrough"
+        target_compile_options(${target} PRIVATE -diag-disable=1292)
+    endif()
+endfunction()
+
 set (PYTHON_BRIDGE_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
 add_subdirectory (src/openvino/inference_engine)
 add_subdirectory (src/openvino/offline_transformations)
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
index 059f335f5df..cfab4f2d907 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
@@ -20,13 +20,15 @@ set_source_files_properties(${PYX_SOURCES} PROPERTIES CYTHON_IS_CXX ON)
 # create target
 
 cython_add_module(${TARGET_NAME} ${SOURCES})
-set(INSTALLED_TARGETS ${TARGET_NAME})
+ov_python_disable_intel_warnings(${TARGET_NAME})
 
+set(INSTALLED_TARGETS ${TARGET_NAME})
 list(REMOVE_ITEM PYX_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/ie_api.pyx")
 
 foreach(PYX_FILE IN LISTS PYX_SOURCES)
     get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE)
     cython_add_module(${PYX_NAME} ${PYX_FILE})
+    ov_python_disable_intel_warnings(${PYX_NAME})
     add_dependencies(${TARGET_NAME} ${PYX_NAME})
     target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
     target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
index 66269fba630..5f7a0a02bcb 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
@@ -284,7 +284,9 @@ cdef class IECore:
     #                          If the parameter is not specified, the default configuration is handled automatically.
     # @return Instance of IECore class
     def __cinit__(self, xml_config_file: str = ""):
-        self.impl = C.IECore(xml_config_file.encode())
+        cdef string c_xml_config_file = xml_config_file.encode()
+        with nogil:
+            self.impl = C.IECore(c_xml_config_file)
 
     ## Get a `namedtuple` object with versions of the plugin specified
     #  @param device_name: Name of the the registered plugin
@@ -326,12 +328,15 @@ cdef class IECore:
         cdef string weights_
         cdef string model_
         cdef IENetwork net = IENetwork()
+        cdef size_t bin_size
         if init_from_buffer:
             model_ = bytes(model)
-            net.impl = self.impl.readNetwork(model_, weights, len(weights))
+            bin_buffer = <uint8_t*> weights
+            bin_size = len(weights)
+            with nogil:
+                net.impl = self.impl.readNetwork(model_, bin_buffer, bin_size)
         else:
             weights_ = "".encode()
-
             model = os.fspath(model)
             if not os.path.isfile(model):
                 raise Exception(f"Path to the model {model} doesn't exist or it's a directory")
@@ -342,8 +347,8 @@ cdef class IECore:
                 if not os.path.isfile(weights):
                     raise Exception(f"Path to the weights {weights} doesn't exist or it's a directory")
                 weights_ = weights.encode()
-
-            net.impl = self.impl.readNetwork(model_, weights_)
+            with nogil:
+                net.impl = self.impl.readNetwork(model_, weights_)
         return net
 
     ## Loads a network that was read from the Intermediate Representation (IR) to the plugin with specified device name
@@ -367,16 +372,22 @@ cdef class IECore:
     cpdef ExecutableNetwork load_network(self, network: [IENetwork, str], str device_name, config=None, int num_requests=1):
         cdef ExecutableNetwork exec_net = ExecutableNetwork()
         cdef map[string, string] c_config
+        cdef string c_device_name
+        cdef string c_network_path
         if num_requests < 0:
             raise ValueError(f"Incorrect number of requests specified: {num_requests}. Expected positive integer number "
                              "or zero for auto detection")
         if config:
             c_config = dict_to_c_map(config)
         exec_net.ie_core_impl = self.impl
+        c_device_name = device_name.encode()
         if isinstance(network, str):
-            exec_net.impl = move(self.impl.loadNetworkFromFile((<str>network).encode(), device_name.encode(), c_config, num_requests))
+            c_network_path = network.encode()
+            with nogil:
+                exec_net.impl = move(self.impl.loadNetworkFromFile(c_network_path, c_device_name, c_config, num_requests))
         else:
-            exec_net.impl = move(self.impl.loadNetwork((<IENetwork>network).impl, device_name.encode(), c_config, num_requests))
+            with nogil:
+                exec_net.impl = move(self.impl.loadNetwork((<IENetwork>network).impl, c_device_name, c_config, num_requests))
         return exec_net
 
     ## Creates an executable network from a previously exported network
@@ -534,7 +545,9 @@ cdef class IECore:
     # If there are more than one device of a specific type, they all are listed followed by a dot and a number.
     @property
     def available_devices(self):
-        cdef vector[string] c_devices = self.impl.getAvailableDevices()
+        cdef vector[string] c_devices
+        with nogil:
+            c_devices = self.impl.getAvailableDevices()
         return [d.decode() for d in c_devices]
 
 ## This structure stores info about pre-processing of network inputs (scale, mean image, ...)
@@ -897,15 +910,19 @@ cdef class ExecutableNetwork:
     ## A tuple of `InferRequest` instances
     @property
     def requests(self):
+        cdef size_t c_infer_requests_size
+        with nogil:
+            c_infer_requests_size = deref(self.impl).infer_requests.size()
         if len(self._infer_requests) == 0:
-            for i in range(deref(self.impl).infer_requests.size()):
+            for i in range(c_infer_requests_size):
                 infer_request = InferRequest()
-                infer_request.impl = &(deref(self.impl).infer_requests[i])
+                with nogil:
+                    infer_request.impl = &(deref(self.impl).infer_requests[i])
                 infer_request._inputs_list = list(self.input_info.keys())
                 infer_request._outputs_list = list(self.outputs.keys())
                 self._infer_requests.append(infer_request)
 
-        if len(self._infer_requests) != deref(self.impl).infer_requests.size():
+        if len(self._infer_requests) != c_infer_requests_size:
             raise Exception("Mismatch of infer requests number!")
 
         return self._infer_requests
@@ -923,26 +940,6 @@ cdef class ExecutableNetwork:
             inputs[in_.first.decode()] = input_info_ptr
         return inputs
 
-    ## \note The property is deprecated. Please use the input_info property
-    #        to get the map of inputs
-    #
-    ## A dictionary that maps input layer names to DataPtr objects
-    @property
-    def inputs(self):
-        warnings.warn("'inputs' property of ExecutableNetwork class is deprecated. "
-                      "To access DataPtrs user need to use 'input_data' property "
-                      "of InputInfoCPtr objects which can be accessed by 'input_info' property.",
-                      DeprecationWarning)
-        cdef map[string, C.DataPtr] c_inputs = deref(self.impl).getInputs()
-        inputs = {}
-        cdef DataPtr data_ptr
-        for in_ in c_inputs:
-            data_ptr = DataPtr()
-            data_ptr._ptr = in_.second
-            data_ptr._ptr_plugin = deref(self.impl).getPluginLink()
-            inputs[in_.first.decode()] = data_ptr
-        return inputs
-
     ## A dictionary that maps output layer names to CDataPtr objects
     @property
     def outputs(self):
@@ -1022,16 +1019,26 @@ cdef class ExecutableNetwork:
     #                  If not specified, `timeout` value is set to -1 by default.
     #  @return Request status code: OK or RESULT_NOT_READY
     cpdef wait(self, num_requests=None, timeout=None):
+        cdef int status_code
+        cdef int64_t c_timeout
+        cdef int c_num_requests
         if num_requests is None:
             num_requests = len(self.requests)
+        c_num_requests = <int> num_requests
         if timeout is None:
             timeout = WaitMode.RESULT_READY
-        return deref(self.impl).wait(<int> num_requests, <int64_t> timeout)
+        c_timeout = <int64_t> timeout
+        with nogil:
+            status_code = deref(self.impl).wait(c_num_requests, c_timeout)
+        return status_code
 
     ## Get idle request ID
     #  @return Request index
     cpdef get_idle_request_id(self):
-        return deref(self.impl).getIdleRequestId()
+        cdef int request_id
+        with nogil:
+            request_id = deref(self.impl).getIdleRequestId()
+        return request_id
 
 ctypedef extern void (*cb_type)(void*, int) with gil
 
@@ -1177,8 +1184,8 @@ cdef class InferRequest:
     cpdef infer(self, inputs=None):
         if inputs is not None:
             self._fill_inputs(inputs)
-
-        deref(self.impl).infer()
+        with nogil:
+            deref(self.impl).infer()
 
     ## Starts asynchronous inference of the infer request and fill outputs array
     #
@@ -1197,7 +1204,8 @@ cdef class InferRequest:
             self._fill_inputs(inputs)
         if self._py_callback_used:
             self._py_callback_called.clear()
-        deref(self.impl).infer_async()
+        with nogil:
+            deref(self.impl).infer_async()
 
     ## Waits for the result to become available. Blocks until specified timeout elapses or the result
     #  becomes available, whichever comes first.
@@ -1213,9 +1221,14 @@ cdef class InferRequest:
     #
     #  Usage example: See `async_infer()` method of the the `InferRequest` class.
     cpdef wait(self, timeout=None):
+        cdef int status
+        cdef int64_t c_timeout
+        cdef int c_wait_mode
         if self._py_callback_used:
             # check request status to avoid blocking for idle requests
-            status = deref(self.impl).wait(WaitMode.STATUS_ONLY)
+            c_wait_mode = WaitMode.STATUS_ONLY
+            with nogil:
+                status = deref(self.impl).wait(c_wait_mode)
             if status != StatusCode.RESULT_NOT_READY:
                 return status
             if not self._py_callback_called.is_set():
@@ -1230,8 +1243,10 @@ cdef class InferRequest:
 
         if timeout is None:
             timeout = WaitMode.RESULT_READY
-
-        return deref(self.impl).wait(<int64_t> timeout)
+        c_timeout = <int64_t> timeout
+        with nogil:
+            status = deref(self.impl).wait(c_timeout)
+        return status
 
     ## Queries performance measures per layer to get feedback of what is the most time consuming layer.
     #
@@ -1268,27 +1283,6 @@ cdef class InferRequest:
                                             "cpu_time": info.cpu_time, "execution_index": info.execution_index}
         return profile
 
-    ## A dictionary that maps input layer names to `numpy.ndarray`
-    #  objects of proper shape with input data for the layer
-    @property
-    def inputs(self):
-        warnings.warn("'inputs' property of InferRequest is deprecated. Please instead use 'input_blobs' property.",
-                      DeprecationWarning)
-        inputs = {}
-        for input in self._inputs_list:
-            inputs[input] = self._get_blob_buffer(input.encode()).to_numpy()
-        return inputs
-
-    ## A dictionary that maps output layer names to `numpy.ndarray` objects with output data of the layer
-    @property
-    def outputs(self):
-        warnings.warn("'outputs' property of InferRequest is deprecated. Please instead use 'output_blobs' property.",
-                      DeprecationWarning)
-        outputs = {}
-        for output in self._outputs_list:
-            outputs[output] = self._get_blob_buffer(output.encode()).to_numpy()
-        return deepcopy(outputs)
-
     ## Current infer request inference time in milliseconds
     @property
     def latency(self):
@@ -1333,68 +1327,25 @@ cdef class InferRequest:
 cdef class IENetwork:
     ## Class constructor
     #
-    #  \note Reading networks using IENetwork constructor is deprecated.
-    #  Please, use IECore.read_network() method instead.
+    #  @param model: A PyCapsule containing smart pointer to nGraph function.
     #
-    #  @param model: A `.xml` file of the IR or PyCapsule containing smart pointer to nGraph function.
-    #                In case of passing a `.xml` file  attribute value can be a string path or bytes with file content
-    #                depending on `init_from_buffer` attribute value
-    #                .
-    #  @param weights: A `.bin` file of the IR. Depending on `init_from_buffer` value, can be a string path or
-    #                  bytes with file content.
-    #  @param init_from_buffer: Defines the way of how `model` and `weights` attributes are interpreted.
-    #                           If  `False`, attributes are interpreted as strings with paths to .xml and .bin files
-    #                           of IR. If `True`, they are  interpreted as Python `bytes` object with .xml and .bin files content.
-    #                           Ignored in case of `IENetwork` object  initialization from nGraph function.
     #  @return Instance of IENetwork class
     #
     #  Usage example:\n
     #   Initializing `IENetwork` object from IR files:
     #   ```python
-    #   net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
+    #   func = Function([relu], [param], 'test')
+    #   caps = Function.to_capsule(func)
+    #   net = IENetwork(caps)
     #   ```
-    #
-    #   Initializing `IENetwork` object bytes with content of IR files:
-    #   ```python
-    #   with open(path_to_bin_file, 'rb') as f:
-    #       bin = f.read()
-    #   with open(path_to_xml_file, 'rb') as f:
-    #       xml = f.read()
-    #   net = IENetwork(model=xml, weights=bin, init_from_buffer=True)
-    #   ```
-
-    def __cinit__(self, model: [str, bytes] = "", weights: [str, bytes] = "", init_from_buffer: bool = False):
+    def __cinit__(self, model = None):
         # Try to create Inference Engine network from capsule
-        if model.__class__.__name__ == 'PyCapsule' and weights == '' and init_from_buffer is False:
-            self.impl = C.IENetwork(model)
-            return
-        cdef char*xml_buffer = <char*> malloc(len(model)+1)
-        cdef uint8_t*bin_buffer = <uint8_t *> malloc(len(weights))
-        cdef string model_
-        cdef string weights_
-        if init_from_buffer:
-            warnings.warn("Reading network using constructor is deprecated. "
-                          "Please, use IECore.read_network() method instead", DeprecationWarning)
-            memcpy(xml_buffer, <char*> model, len(model))
-            memcpy(bin_buffer, <uint8_t *> weights, len(weights))
-            xml_buffer[len(model)] = b'\0'
-            self.impl = C.IENetwork()
-            self.impl.load_from_buffer(xml_buffer, len(model), bin_buffer, len(weights))
+        if model is not None:
+            with nogil:
+                self.impl = C.IENetwork(model)
         else:
-            if model and weights:
-                warnings.warn("Reading network using constructor is deprecated. "
-                              "Please, use IECore.read_network() method instead", DeprecationWarning)
-                if not os.path.isfile(model):
-                    raise Exception(f"Path to the model {model} doesn't exist or it's a directory")
-                if not os.path.isfile(weights):
-                    raise Exception(f"Path to the weights {weights} doesn't exist or it's a directory")
-                model_ = model.encode()
-                weights_ = weights.encode()
-                self.impl = C.IENetwork(model_, weights_)
-            else:
+            with nogil:
                 self.impl = C.IENetwork()
-            free(bin_buffer)
-        free(xml_buffer)
 
     ## Name of the loaded network
     @property
@@ -1405,7 +1356,9 @@ cdef class IENetwork:
     ## A dictionary that maps input layer names to InputInfoPtr objects.
     @property
     def input_info(self):
-        cdef map[string, C.InputInfo.Ptr] c_inputs = self.impl.getInputsInfo()
+        cdef map[string, C.InputInfo.Ptr] c_inputs
+        with nogil:
+            c_inputs = self.impl.getInputsInfo()
         inputs = {}
         cdef InputInfoPtr input_info_ptr
         for input in c_inputs:
@@ -1415,30 +1368,12 @@ cdef class IENetwork:
             inputs[input.first.decode()] = input_info_ptr
         return inputs
 
-    ## \note The property is deprecated. Please use the input_info property
-    #        to get the map of inputs
-    #
-    ## A dictionary that maps input layer names to DataPtr objects
-    @property
-    def inputs(self):
-        warnings.warn("'inputs' property of IENetwork class is deprecated. "
-                      "To access DataPtrs user need to use 'input_data' property "
-                      "of InputInfoPtr objects which can be accessed by 'input_info' property.",
-                      DeprecationWarning)
-        cdef map[string, C.DataPtr] c_inputs = self.impl.getInputs()
-        inputs = {}
-        cdef DataPtr data_ptr
-        for input in c_inputs:
-            data_ptr = DataPtr()
-            data_ptr._ptr_network = &self.impl
-            data_ptr._ptr = input.second
-            inputs[input.first.decode()] = data_ptr
-        return inputs
-
     ## A dictionary that maps output layer names to DataPtr objects
     @property
     def outputs(self):
-        cdef map[string, C.DataPtr] c_outputs = self.impl.getOutputs()
+        cdef map[string, C.DataPtr] c_outputs
+        with nogil:
+            c_outputs = self.impl.getOutputs()
         outputs = {}
         cdef DataPtr data_ptr
         for output in c_outputs:
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
index e9d4f7660ba..1a6ae4f57ed 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
@@ -200,14 +200,6 @@ InferenceEnginePython::IENetwork InferenceEnginePython::read_network(std::string
     return InferenceEnginePython::IENetwork(std::make_shared<InferenceEngine::CNNNetwork>(net));
 }
 
-InferenceEnginePython::IENetwork::IENetwork(const std::string& model, const std::string& weights) {
-    InferenceEngine::Core reader;
-    auto net = reader.ReadNetwork(model, weights);
-    actual = std::make_shared<InferenceEngine::CNNNetwork>(net);
-    name = actual->getName();
-    batch_size = actual->getBatchSize();
-}
-
 InferenceEnginePython::IENetwork::IENetwork(const std::shared_ptr<InferenceEngine::CNNNetwork>& cnn_network): actual(cnn_network) {
     if (actual == nullptr)
         IE_THROW() << "IENetwork was not initialized.";
@@ -228,16 +220,6 @@ InferenceEnginePython::IENetwork::IENetwork(PyObject* network) {
     batch_size = actual->getBatchSize();
 }
 
-void InferenceEnginePython::IENetwork::load_from_buffer(const char* xml, size_t xml_size, uint8_t* bin, size_t bin_size) {
-    InferenceEngine::Core reader;
-    InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, {bin_size}, InferenceEngine::Layout::C);
-    auto weights_blob = InferenceEngine::make_shared_blob<uint8_t>(tensorDesc, bin, bin_size);
-    auto net = reader.ReadNetwork(std::string(xml, xml + xml_size), weights_blob);
-    name = net.getName();
-    actual = std::make_shared<InferenceEngine::CNNNetwork>(net);
-    batch_size = actual->getBatchSize();
-}
-
 void InferenceEnginePython::IENetwork::serialize(const std::string& path_to_xml, const std::string& path_to_bin) {
     actual->serialize(path_to_xml, path_to_bin);
 }
@@ -275,15 +257,6 @@ const std::map<std::string, InferenceEngine::InputInfo::Ptr> InferenceEnginePyth
     return inputs;
 }
 
-const std::map<std::string, InferenceEngine::DataPtr> InferenceEnginePython::IENetwork::getInputs() {
-    std::map<std::string, InferenceEngine::DataPtr> inputs;
-    const InferenceEngine::InputsDataMap& inputsInfo = actual->getInputsInfo();
-    for (auto& in : inputsInfo) {
-        inputs[in.first] = in.second->getInputData();
-    }
-    return inputs;
-}
-
 const std::map<std::string, InferenceEngine::DataPtr> InferenceEnginePython::IENetwork::getOutputs() {
     std::map<std::string, InferenceEngine::DataPtr> outputs;
     const InferenceEngine::OutputsDataMap& outputsInfo = actual->getOutputsInfo();
@@ -338,15 +311,6 @@ void InferenceEnginePython::IEExecNetwork::exportNetwork(const std::string& mode
     actual->Export(model_file);
 }
 
-std::map<std::string, InferenceEngine::DataPtr> InferenceEnginePython::IEExecNetwork::getInputs() {
-    InferenceEngine::ConstInputsDataMap inputsDataMap = actual->GetInputsInfo();
-    std::map<std::string, InferenceEngine::DataPtr> pyInputs;
-    for (const auto& item : inputsDataMap) {
-        pyInputs[item.first] = item.second->getInputData();
-    }
-    return pyInputs;
-}
-
 std::map<std::string, InferenceEngine::InputInfo::CPtr> InferenceEnginePython::IEExecNetwork::getInputsInfo() {
     InferenceEngine::ConstInputsDataMap inputsDataMap = actual->GetInputsInfo();
     std::map<std::string, InferenceEngine::InputInfo::CPtr> pyInputs;
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
index cd3fa07c49a..23d27474aff 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
@@ -60,18 +60,12 @@ struct IENetwork {
 
     const std::map<std::string, InferenceEngine::InputInfo::Ptr> getInputsInfo();
 
-    const std::map<std::string, InferenceEngine::DataPtr> getInputs();
-
     const std::map<std::string, InferenceEngine::DataPtr> getOutputs();
 
     void reshape(const std::map<std::string, std::vector<size_t>>& input_shapes);
 
     void serialize(const std::string& path_to_xml, const std::string& path_to_bin);
 
-    void load_from_buffer(const char* xml, size_t xml_size, uint8_t* bin, size_t bin_size);
-
-    IENetwork(const std::string& model, const std::string& weights);
-
     IENetwork(const std::shared_ptr<InferenceEngine::CNNNetwork>& cnn_network);
 
     IENetwork(PyObject* network);
@@ -146,7 +140,6 @@ struct IEExecNetwork {
     void exportNetwork(const std::string& model_file);
 
     std::map<std::string, InferenceEngine::InputInfo::CPtr> getInputsInfo();
-    std::map<std::string, InferenceEngine::DataPtr> getInputs();
     std::map<std::string, InferenceEngine::CDataPtr> getOutputs();
 
     PyObject* getMetric(const std::string& metric_name);
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd
index 6f7fd918089..103c8d77d53 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd
@@ -14,7 +14,7 @@ cdef extern from "<inference_engine.hpp>" namespace "InferenceEngine":
     ctypedef vector[size_t] SizeVector
 
     cdef cppclass CExecutableNetwork "InferenceEngine::ExecutableNetwork"
-    
+
     cdef cppclass TBlob[T]:
         ctypedef shared_ptr[TBlob[T]] Ptr
 
@@ -154,27 +154,24 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
     cdef cppclass IEExecNetwork:
         vector[InferRequestWrap] infer_requests
         IENetwork GetExecGraphInfo() except +
-        map[string, DataPtr] getInputs() except +
         map[string, CDataPtr] getOutputs() except +
         map[string, InputInfo.CPtr] getInputsInfo()
         void exportNetwork(const string & model_file) except +
         object getMetric(const string & metric_name) except +
         object getConfig(const string & metric_name) except +
-        int wait(int num_requests, int64_t timeout)
-        int getIdleRequestId()
+        int wait(int num_requests, int64_t timeout) nogil
+        int getIdleRequestId() nogil
         shared_ptr[CExecutableNetwork] getPluginLink() except +
 
     cdef cppclass IENetwork:
-        IENetwork() except +
-        IENetwork(object) except +
-        IENetwork(const string &, const string &) except +
+        IENetwork() nogil except +
+        IENetwork(object) nogil except +
         string name
         size_t batch_size
         string precision
         map[string, vector[size_t]] inputs
-        const map[string, InputInfo.Ptr] getInputsInfo() except +
-        const map[string, DataPtr] getInputs() except +
-        map[string, DataPtr] getOutputs() except +
+        const map[string, InputInfo.Ptr] getInputsInfo() nogil except +
+        map[string, DataPtr] getOutputs() nogil except +
         void addOutput(string &, size_t) except +
         void setAffinity(map[string, string] & types_affinity_map, map[string, string] & layers_affinity_map) except +
         void setBatch(size_t size) except +
@@ -182,7 +179,6 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
         void setLayerParams(map[string, map[string, string]] params_map) except +
         void serialize(const string& path_to_xml, const string& path_to_bin) except +
         void reshape(map[string, vector[size_t]] input_shapes) except +
-        void load_from_buffer(const char*xml, size_t xml_size, uint8_t*bin, size_t bin_size) except +
         object getFunction() except +
         void convertToOldRepresentation() except +
         string getOVNameForTensor(const string &) except +
@@ -195,23 +191,23 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
         void setBlob(const string &blob_name, const CBlob.Ptr &blob_ptr, CPreProcessInfo& info) except +
         const CPreProcessInfo& getPreProcess(const string& blob_name) except +
         map[string, ProfileInfo] getPerformanceCounts() except +
-        void infer() except +
-        void infer_async() except +
-        int wait(int64_t timeout) except +
+        void infer() nogil except +
+        void infer_async() nogil except +
+        int wait(int64_t timeout) nogil except +
         void setBatch(int size) except +
         void setCyCallback(void (*)(void*, int), void *) except +
         vector[CVariableState] queryState() except +
 
     cdef cppclass IECore:
-        IECore() except +
-        IECore(const string & xml_config_file) except +
+        IECore() nogil except +
+        IECore(const string & xml_config_file) nogil except +
         map[string, Version] getVersions(const string & deviceName) except +
-        IENetwork readNetwork(const string& modelPath, const string& binPath) except +
-        IENetwork readNetwork(const string& modelPath,uint8_t*bin, size_t bin_size) except +
+        IENetwork readNetwork(const string& modelPath, const string& binPath) nogil except +
+        IENetwork readNetwork(const string& modelPath,uint8_t*bin, size_t bin_size) nogil except +
         unique_ptr[IEExecNetwork] loadNetwork(IENetwork network, const string deviceName,
-                                              const map[string, string] & config, int num_requests) except +
+                                              const map[string, string] & config, int num_requests) nogil except +
         unique_ptr[IEExecNetwork] loadNetworkFromFile(const string & modelPath, const string & deviceName,
-                                              const map[string, string] & config, int num_requests) except +
+                                              const map[string, string] & config, int num_requests) nogil except +
         unique_ptr[IEExecNetwork] importNetwork(const string & modelFIle, const string & deviceName,
                                                 const map[string, string] & config, int num_requests) except +
         map[string, string] queryNetwork(IENetwork network, const string deviceName,
@@ -221,7 +217,7 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
         void unregisterPlugin(const string & deviceName) except +
         void registerPlugins(const string & xmlConfigFile) except +
         void addExtension(const string & ext_lib_path, const string & deviceName) except +
-        vector[string] getAvailableDevices() except +
+        vector[string] getAvailableDevices() nogil except +
         object getMetric(const string & deviceName, const string & name) except +
         object getConfig(const string & deviceName, const string & name) except +
 
diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt
index ba526c3761d..512b1662be5 100644
--- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt
@@ -20,7 +20,9 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/offline_transformations_
 # create target
 
 cython_add_module(${TARGET_NAME} ${SOURCES})
+
 add_dependencies(${TARGET_NAME} ie_api)
+ov_python_disable_intel_warnings(${TARGET_NAME})
 
 if(COMMAND ie_add_vs_version_file)
     ie_add_vs_version_file(NAME ${TARGET_NAME}
diff --git a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt
index 8367f941d9f..9d3e1e0ffc0 100644
--- a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt
@@ -20,7 +20,9 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/test_utils_api.pyx
 # create target
 
 cython_add_module(${TARGET_NAME} ${SOURCES})
+
 add_dependencies(${TARGET_NAME} ie_api)
+ov_python_disable_intel_warnings(${TARGET_NAME})
 
 if(COMMAND ie_add_vs_version_file)
     ie_add_vs_version_file(NAME ${TARGET_NAME}
diff --git a/inference-engine/ie_bridges/python/tests/conftest.py b/inference-engine/ie_bridges/python/tests/conftest.py
index fd327147c33..e697b58ee63 100644
--- a/inference-engine/ie_bridges/python/tests/conftest.py
+++ b/inference-engine/ie_bridges/python/tests/conftest.py
@@ -21,11 +21,6 @@ def model_onnx_path():
     test_onnx = os.path.join(path_to_repo, "models", "test_model", 'test_model.onnx')
     return test_onnx
 
-def model_prototxt_path():
-    path_to_repo = os.environ["MODELS_PATH"]
-    test_prototxt = os.path.join(path_to_repo, "models", "test_model", 'test_model.prototxt')
-    return test_prototxt
-
 def image_path():
     path_to_repo = os.environ["DATA_PATH"]
     path_to_img = os.path.join(path_to_repo, 'validation_set', '224x224', 'dog.bmp')
diff --git a/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py b/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py
index 11e4b479dac..65811503d98 100644
--- a/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py
+++ b/inference-engine/ie_bridges/python/tests/test_ExecutableNetwork.py
@@ -82,24 +82,6 @@ def test_input_info(device):
     del ie_core
 
 
-def test_inputs_deprecated(device):
-    ie_core = ie.IECore()
-    net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
-    exec_net = ie_core.load_network(net, device, num_requests=5)
-    with warnings.catch_warnings(record=True) as w:
-        assert len(exec_net.inputs) == 1
-        assert "data" in exec_net.inputs
-        assert isinstance(exec_net.inputs['data'], ie.DataPtr)
-    assert len(w) == 3
-    for i in range (len(w)):
-        assert "'inputs' property of ExecutableNetwork class is deprecated. " \
-            "To access DataPtrs user need to use 'input_data' property " \
-            "of InputInfoCPtr objects which " \
-            "can be accessed by 'input_info' property." in str(w[i].message)
-    del exec_net
-    del ie_core
-
-
 def test_outputs(device):
     ie_core = ie.IECore()
     net = ie_core.read_network(model=test_net_xml, weights=test_net_bin)
diff --git a/inference-engine/ie_bridges/python/tests/test_IECore.py b/inference-engine/ie_bridges/python/tests/test_IECore.py
index 41d28f1c41b..ed15b12d9b9 100644
--- a/inference-engine/ie_bridges/python/tests/test_IECore.py
+++ b/inference-engine/ie_bridges/python/tests/test_IECore.py
@@ -5,14 +5,16 @@ import os
 import pytest
 from sys import platform
 from pathlib import Path
+from threading import Thread
+from time import sleep, time
+from queue import Queue
 
 from openvino.inference_engine import IENetwork, IECore, ExecutableNetwork
-from conftest import model_path, plugins_path, model_onnx_path, model_prototxt_path
+from conftest import model_path, plugins_path, model_onnx_path
 
 
 test_net_xml, test_net_bin = model_path()
 test_net_onnx = model_onnx_path()
-test_net_prototxt = model_prototxt_path()
 plugins_xml, plugins_win_xml, plugins_osx_xml = plugins_path()
 
 
@@ -201,18 +203,6 @@ def test_read_network_from_onnx_as_path():
     assert isinstance(net, IENetwork)
 
 
-def test_read_network_from_prototxt():
-    ie = IECore()
-    net = ie.read_network(model=test_net_prototxt)
-    assert isinstance(net, IENetwork)
-
-
-def test_read_network_from_prototxt_as_path():
-    ie = IECore()
-    net = ie.read_network(model=Path(test_net_prototxt))
-    assert isinstance(net, IENetwork)
-
-
 def test_incorrect_xml():
     ie = IECore()
     with pytest.raises(Exception) as e:
@@ -253,3 +243,37 @@ def test_net_from_buffer_valid():
     o_net2 = ref_net.outputs
     assert ii_net.keys() == ii_net2.keys()
     assert o_net.keys() == o_net2.keys()
+
+
+@pytest.mark.skipif(os.environ.get("TEST_DEVICE","CPU") != "GPU", reason=f"Device dependent test")
+def test_load_network_release_gil(device):
+    running = True
+    message_queue = Queue()
+    def detect_long_gil_holds():
+        sleep_time = 0.01
+        latency_alert_threshold = 0.1
+        # Send a message to indicate the thread is running and ready to detect GIL locks
+        message_queue.put("ready to detect")
+        while running:
+            start_sleep = time()
+            sleep(sleep_time)
+            elapsed = time() - start_sleep
+            if elapsed > latency_alert_threshold:
+                # Send a message to the testing thread that a long GIL lock occurred
+                message_queue.put(latency_alert_threshold)
+    ie = IECore()
+    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
+    # Wait for the GIL lock detector to be up and running
+    gil_hold_detection_thread = Thread(daemon=True, target=detect_long_gil_holds)
+    gil_hold_detection_thread.start()
+    # Wait to make sure the thread is started and checking for GIL holds
+    sleep(0.1)
+    assert message_queue.get(timeout=5) == "ready to detect"
+    # Run the function that should unlock the GIL
+    exec_net = ie.load_network(net, device)
+    # Ensure resources are closed
+    running = False
+    gil_hold_detection_thread.join(timeout=5)
+    # Assert there were never any long gil locks
+    assert message_queue.qsize() == 0, \
+        f"More than 0 GIL locks occured! Latency: {message_queue.get()})"
diff --git a/inference-engine/ie_bridges/python/tests/test_IENetwork.py b/inference-engine/ie_bridges/python/tests/test_IENetwork.py
index 1c3474e6891..d808e177234 100644
--- a/inference-engine/ie_bridges/python/tests/test_IENetwork.py
+++ b/inference-engine/ie_bridges/python/tests/test_IENetwork.py
@@ -12,60 +12,12 @@ from conftest import model_path
 test_net_xml, test_net_bin = model_path()
 
 
-def test_create_ie_network_deprecated():
-    with warnings.catch_warnings(record=True) as w:
-        net = IENetwork(model=test_net_xml, weights=test_net_bin)
-        assert isinstance(net, IENetwork)
-        assert len(w) == 1
-        assert issubclass(w[-1].category, DeprecationWarning)
-        assert "Reading network using constructor is deprecated. " \
-               "Please, use IECore.read_network() method instead" in str(w[0].message)
-
-
-def test_incorrect_xml_deprecated():
-    with warnings.catch_warnings(record=True) as w:
-        with pytest.raises(Exception) as e:
-            IENetwork(model="./model.xml", weights=test_net_bin)
-        assert "Path to the model ./model.xml doesn't exist or it's a directory" in str(e.value)
-        assert len(w) == 1
-        assert issubclass(w[-1].category, DeprecationWarning)
-        assert "Reading network using constructor is deprecated. " \
-               "Please, use IECore.read_network() method instead" in str(w[0].message)
-
-
-def test_incorrect_bin_deprecated():
-    with warnings.catch_warnings(record=True) as w:
-        with pytest.raises(Exception) as e:
-            IENetwork(model=test_net_xml, weights="./model.bin")
-        assert "Path to the weights ./model.bin doesn't exist or it's a directory" in str(e.value)
-        assert len(w) == 1
-        assert issubclass(w[-1].category, DeprecationWarning)
-        assert "Reading network using constructor is deprecated. " \
-               "Please, use IECore.read_network() method instead" in str(w[0].message)
-
-
 def test_name():
     ie = IECore()
     net = ie.read_network(model=test_net_xml, weights=test_net_bin)
     assert net.name == "test_model"
 
 
-def test_inputs_deprecated():
-    ie = IECore()
-    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
-    with warnings.catch_warnings(record=True) as w:
-        inp = net.inputs
-        assert isinstance(inp['data'], DataPtr)
-        assert inp['data'].layout == "NCHW"
-        assert inp['data'].precision == "FP32"
-        assert inp['data'].shape == [1, 3, 32, 32]
-    assert len(w) == 1
-    assert "'inputs' property of IENetwork class is deprecated. " \
-               "To access DataPtrs user need to use 'input_data' property " \
-               "of InputInfoPtr objects which " \
-               "can be accessed by 'input_info' property." in str(w[-1].message)
-
-
 def test_input_info():
     ie = IECore()
     net = ie.read_network(model=test_net_xml, weights=test_net_bin)
@@ -208,21 +160,7 @@ def test_reshape():
     net.reshape({"data": (2, 3, 32, 32)})
 
 
-def test_read_net_from_buffer_deprecated():
-    with warnings.catch_warnings(record=True) as w:
-        with open(test_net_bin, 'rb') as f:
-            bin = f.read()
-        with open(test_net_xml, 'rb') as f:
-            xml = f.read()
-        net = IENetwork(model=xml, weights=bin, init_from_buffer=True)
-        assert isinstance(net, IENetwork)
-        assert len(w) == 1
-        assert issubclass(w[-1].category, DeprecationWarning)
-        assert "Reading network using constructor is deprecated. " \
-               "Please, use IECore.read_network() method instead" in str(w[0].message)
-
-
-def test_net_from_buffer_valid_deprecated():
+def test_net_from_buffer_valid():
     ie = IECore()
     with open(test_net_bin, 'rb') as f:
         bin = f.read()
diff --git a/inference-engine/ie_bridges/python/tests/test_InferRequest.py b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
index af79c0ff155..44afdfa9b61 100644
--- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py
+++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
@@ -66,32 +66,6 @@ def test_output_blobs(device):
     assert executable_network.requests[0].output_blobs['fc_out'].tensor_desc == td
 
 
-def test_inputs_deprecated(device):
-    ie_core = ie.IECore()
-    net = ie_core.read_network(test_net_xml, test_net_bin)
-    executable_network = ie_core.load_network(net, device, num_requests=2)
-    with warnings.catch_warnings(record=True) as w:
-        inputs = executable_network.requests[0].inputs
-    assert "'inputs' property of InferRequest is deprecated. " \
-           "Please instead use 'input_blobs' property." in str(w[-1].message)
-    del executable_network
-    del ie_core
-    del net
-
-
-def test_outputs_deprecated(device):
-    ie_core = ie.IECore()
-    net = ie_core.read_network(test_net_xml, test_net_bin)
-    executable_network = ie_core.load_network(net, device, num_requests=2)
-    with warnings.catch_warnings(record=True) as w:
-        outputs = executable_network.requests[0].outputs
-    assert "'outputs' property of InferRequest is deprecated. Please instead use 'output_blobs' property." in str(
-        w[-1].message)
-    del executable_network
-    del ie_core
-    del net
-
-
 def test_inputs_list(device):
     ie_core = ie.IECore()
     net = ie_core.read_network(test_net_xml, test_net_bin)
@@ -552,11 +526,10 @@ def test_query_state_write_buffer(device, input_shape, data_type, mode):
             pytest.skip("Can't run on ARM plugin")
 
     layout = ["C", "HW", "CHW", "NCHW"]
-    np_data_type = {"FP32": np.float32, "FP16": np.float16, "I32": np.int32}
 
-    from openvino.inference_engine import TensorDesc, Blob
+    from openvino.inference_engine import TensorDesc, Blob, format_map
 
-    net = ie.IENetwork(create_function_with_memory(input_shape, np_data_type[data_type]))
+    net = ie.IENetwork(create_function_with_memory(input_shape, format_map[data_type]))
     ie_core = ie.IECore()
     exec_net = ie_core.load_network(network=net, device_name=device, num_requests=1)
     request = exec_net.requests[0]
@@ -572,23 +545,23 @@ def test_query_state_write_buffer(device, input_shape, data_type, mode):
         if mode == "set_init_memory_state":
             # create initial value
             const_init = 5
-            init_array = np.full(input_shape, const_init, dtype=np_data_type[mem_state.state.tensor_desc.precision])
+            init_array = np.full(input_shape, const_init, dtype=format_map[mem_state.state.tensor_desc.precision])
             tensor_desc = TensorDesc(mem_state.state.tensor_desc.precision, input_shape, layout[len(input_shape) - 1])
             blob = Blob(tensor_desc, init_array)
             mem_state.state = blob
 
-            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])})
-            expected_res = np.full(input_shape, 1 + const_init, dtype=np_data_type[data_type])
+            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=format_map[data_type])})
+            expected_res = np.full(input_shape, 1 + const_init, dtype=format_map[data_type])
         elif mode == "reset_memory_state":
             # reset initial state of ReadValue to zero
             mem_state.reset()
-            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])})
+            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=format_map[data_type])})
 
             # always ones
-            expected_res = np.full(input_shape, 1, dtype=np_data_type[data_type])
+            expected_res = np.full(input_shape, 1, dtype=format_map[data_type])
         else:
-            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=np_data_type[data_type])})
-            expected_res = np.full(input_shape, i, dtype=np_data_type[data_type])
+            res = exec_net.infer({"input_data": np.full(input_shape, 1, dtype=format_map[data_type])})
+            expected_res = np.full(input_shape, i, dtype=format_map[data_type])
 
         assert np.allclose(res['MemoryAdd'], expected_res, atol=1e-6), \
-            "Expected values: {} \n Actual values: {} \n".format(expected_res, res)
+            "Expected values: {} \n Actual values: {} \n".format(expected_res, res)
\ No newline at end of file
diff --git a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
index 681954f2766..1b1931c08a4 100644
--- a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
@@ -56,13 +56,13 @@ endif()
 
 add_custom_command(TARGET ie_wheel
     PRE_BUILD
-        COMMAND ${CMAKE_COMMAND} -E rm -rf "${CMAKE_CURRENT_BINARY_DIR}/site-packages"
+        COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/site-packages"
         COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} clean bdist_wheel
             --dist-dir ${CMAKE_BINARY_DIR}/wheels
             --build=${WHEEL_BUILD}
             --plat-name=${WHEEL_PLATFORM}
     POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E rm "${CMAKE_CURRENT_SOURCE_DIR}/.env"
+        COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_SOURCE_DIR}/.env"
     WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
     COMMENT "Building Python wheel ${WHEEL_PACKAGE_NAME}"
     VERBATIM
diff --git a/inference-engine/samples/CMakeLists.txt b/inference-engine/samples/CMakeLists.txt
index aef11e16f47..bccc7be715b 100644
--- a/inference-engine/samples/CMakeLists.txt
+++ b/inference-engine/samples/CMakeLists.txt
@@ -56,35 +56,30 @@ set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${IE_MAIN_SAMPLES_DIR}/${BIN_FOLDER})
 
 if (WIN32)
     set_property (DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS)
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS -DNOMINMAX")
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") #no asynchronous structured exception handling
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") # no asynchronous structured exception handling
     set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
 
     if (TREAT_WARNING_AS_ERROR)
-        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") #treating warnings as errors
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") # treating warnings as errors
     endif ()
 
     if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qdiag-disable:177")
     endif()
 
+    # disable some noisy warnings
     if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267 /wd4819") #disable some warnings
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267 /wd4819")
     endif()
 else()
+    # treating warnings as errors
     if(TREAT_WARNING_AS_ERROR)
-        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") #treating warnings as errors
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
     endif()
 
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
-    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")
-    if (APPLE)
-        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-command-line-argument")
-    elseif(UNIX)
-        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized -Winit-self")
-        if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wmaybe-uninitialized")
-        endif()
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-disable:177")
     endif()
 endif()
 
@@ -92,6 +87,15 @@ if(APPLE)
     set(CMAKE_MACOSX_RPATH ON)
 endif()
 
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm64.*|aarch64.*|AARCH64.*)")
+  set(AARCH64 ON)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
+  set(ARM ON)
+endif()
+if(ARM AND NOT CMAKE_CROSSCOMPILING)
+    add_compile_options(-march=armv7-a)
+endif()
+
 set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 set(CMAKE_CXX_VISIBILITY_PRESET hidden)
@@ -104,9 +108,6 @@ if(NOT DEFINED CMAKE_CXX_STANDARD)
     set (CMAKE_CXX_STANDARD 11)
     set (CMAKE_CXX_EXTENSIONS OFF)
     set (CMAKE_CXX_STANDARD_REQUIRED ON)
-    if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-        set (CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
-    endif()
 endif()
 ####################################
 
@@ -135,10 +136,6 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/cnpy")
     add_subdirectory(thirdparty/cnpy EXCLUDE_FROM_ALL)
 endif()
 
-if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
-endif()
-
 if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/utils")
     add_subdirectory(common/utils)
 endif()
diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp
index a369c2f1055..c21222f9a96 100644
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@@ -147,6 +147,14 @@ static constexpr char iop_message[] = "Optional. Specifies precision for input a
                                       "                                             Overwrites precision from ip and op options for "
                                       "specified layers.";
 
+static constexpr char input_image_scale_message[] = "Optional. Scale values to be used for the input image per channel.\n"
+                                                    "Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n"
+                                                    "Example: -iscale data[255,255,255],info[255,255,255]\n";
+
+static constexpr char input_image_mean_message[] = "Optional. Mean values to be used for the input image per channel.\n"
+                                                   "Values to be provided in the [R, G, B] format. Can be defined for desired input of the model,\n"
+                                                   "Example: -imean data[255,255,255],info[255,255,255]\n";
+
 /// @brief Define flag for showing help message <br>
 DEFINE_bool(h, false, help_message);
 
@@ -259,6 +267,12 @@ DEFINE_string(cache_dir, "", cache_dir_message);
 /// @brief Define flag for load network from model file by name without ReadNetwork <br>
 DEFINE_bool(load_from_file, false, load_from_file_message);
 
+/// @brief Define flag for using input image scale <br>
+DEFINE_string(iscale, "", input_image_scale_message);
+
+/// @brief Define flag for using input image mean <br>
+DEFINE_string(imean, "", input_image_mean_message);
+
 /**
  * @brief This function show a help message
  */
@@ -304,4 +318,6 @@ static void showUsage() {
     std::cout << "    -ip                          <value>     " << inputs_precision_message << std::endl;
     std::cout << "    -op                          <value>     " << outputs_precision_message << std::endl;
     std::cout << "    -iop                        \"<value>\"    " << iop_message << std::endl;
+    std::cout << "    -iscale                    " << input_image_scale_message << std::endl;
+    std::cout << "    -imean                     " << input_image_mean_message << std::endl;
 }
diff --git a/inference-engine/samples/benchmark_app/inputs_filling.cpp b/inference-engine/samples/benchmark_app/inputs_filling.cpp
index ef8a045279a..eadd4eceeae 100644
--- a/inference-engine/samples/benchmark_app/inputs_filling.cpp
+++ b/inference-engine/samples/benchmark_app/inputs_filling.cpp
@@ -91,7 +91,9 @@ void fillBlobImage(Blob::Ptr& inputBlob, const std::vector<std::string>& filePat
                     size_t offset = imageId * numChannels * width * height + (((app_info.layout == "NCHW") || (app_info.layout == "CHW"))
                                                                                   ? (ch * width * height + h * width + w)
                                                                                   : (h * width * numChannels + w * numChannels + ch));
-                    inputBlobData[offset] = static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]);
+                    inputBlobData[offset] =
+                        (static_cast<T>(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]) - static_cast<T>(app_info.mean[ch])) /
+                        static_cast<T>(app_info.scale[ch]);
                 }
             }
         }
diff --git a/inference-engine/samples/benchmark_app/inputs_filling.hpp b/inference-engine/samples/benchmark_app/inputs_filling.hpp
index 4410faae11e..000d613db59 100644
--- a/inference-engine/samples/benchmark_app/inputs_filling.hpp
+++ b/inference-engine/samples/benchmark_app/inputs_filling.hpp
@@ -12,4 +12,4 @@
 #include "utils.hpp"
 
 void fillBlobs(const std::vector<std::string>& inputFiles, const size_t& batchSize, benchmark_app::InputsInfo& app_inputs_info,
-               std::vector<InferReqWrap::Ptr> requests);
+               std::vector<InferReqWrap::Ptr> requests);
\ No newline at end of file
diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp
index 8df3bc2f8e4..da2b77a0ce9 100644
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@@ -380,7 +380,7 @@ int main(int argc, char* argv[]) {
             batchSize = cnnNetwork.getBatchSize();
             // Parse input shapes if specified
             bool reshape = false;
-            app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape, FLAGS_layout, FLAGS_b, inputInfo, reshape);
+            app_inputs_info = getInputsInfo<InputInfo::Ptr>(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, inputInfo, reshape);
             if (reshape) {
                 InferenceEngine::ICNNNetwork::InputShapes shapes = {};
                 for (auto& item : app_inputs_info)
@@ -441,7 +441,7 @@ int main(int argc, char* argv[]) {
             slog::info << "Import network took " << duration_ms << " ms" << slog::endl;
             if (statistics)
                 statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"import network time (ms)", duration_ms}});
-            app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape, FLAGS_layout, FLAGS_b, exeNetwork.GetInputsInfo());
+            app_inputs_info = getInputsInfo<InputInfo::CPtr>(FLAGS_shape, FLAGS_layout, FLAGS_b, FLAGS_iscale, FLAGS_imean, exeNetwork.GetInputsInfo());
             if (batchSize == 0) {
                 batchSize = 1;
             }
diff --git a/inference-engine/samples/benchmark_app/utils.cpp b/inference-engine/samples/benchmark_app/utils.cpp
index 2b99c3b555c..66deb5bad31 100644
--- a/inference-engine/samples/benchmark_app/utils.cpp
+++ b/inference-engine/samples/benchmark_app/utils.cpp
@@ -88,6 +88,17 @@ std::vector<std::string> split(const std::string& s, char delim) {
     return result;
 }
 
+std::vector<float> splitFloat(const std::string& s, char delim) {
+    std::vector<float> result;
+    std::stringstream ss(s);
+    std::string item;
+
+    while (getline(ss, item, delim)) {
+        result.push_back(std::stof(item));
+    }
+    return result;
+}
+
 std::vector<std::string> parseDevices(const std::string& device_string) {
     std::string comma_separated_devices = device_string;
     if (comma_separated_devices.find(":") != std::string::npos) {
@@ -161,6 +172,44 @@ std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& sha
     return ss.str();
 }
 
+std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info) {
+    //  Format: data:[255,255,255],info[255,255,255]
+    std::map<std::string, std::vector<float>> return_value;
+
+    std::string search_string = scale_mean;
+    auto start_pos = search_string.find_first_of('[');
+    while (start_pos != std::string::npos) {
+        auto end_pos = search_string.find_first_of(']');
+        if (end_pos == std::string::npos)
+            break;
+        auto input_name = search_string.substr(0, start_pos);
+        auto input_value_string = search_string.substr(start_pos + 1, end_pos - start_pos - 1);
+        auto input_value = splitFloat(input_value_string, ',');
+
+        if (!input_name.empty()) {
+            if (inputs_info.count(input_name)) {
+                return_value[input_name] = input_value;
+            }
+            // ignore wrong input name
+        } else {
+            for (auto& item : inputs_info) {
+                if (item.second.isImage())
+                    return_value[item.first] = input_value;
+            }
+            search_string.clear();
+            break;
+        }
+        search_string = search_string.substr(end_pos + 1);
+        if (search_string.empty() || search_string.front() != ',')
+            break;
+        search_string = search_string.substr(1);
+        start_pos = search_string.find_first_of('[');
+    }
+    if (!search_string.empty())
+        throw std::logic_error("Can't parse input parameter string: " + scale_mean);
+    return return_value;
+}
+
 #ifdef USE_OPENCV
 void dump_config(const std::string& filename, const std::map<std::string, std::map<std::string, std::string>>& config) {
     cv::FileStorage fs(filename, cv::FileStorage::WRITE);
diff --git a/inference-engine/samples/benchmark_app/utils.hpp b/inference-engine/samples/benchmark_app/utils.hpp
index 0abebefe9e0..4452556b3c4 100644
--- a/inference-engine/samples/benchmark_app/utils.hpp
+++ b/inference-engine/samples/benchmark_app/utils.hpp
@@ -13,6 +13,8 @@ struct InputInfo {
     InferenceEngine::Precision precision;
     InferenceEngine::SizeVector shape;
     std::string layout;
+    std::vector<float> scale;
+    std::vector<float> mean;
     bool isImage() const;
     bool isImageInfo() const;
     size_t getDimentionByLayout(char character) const;
@@ -31,6 +33,7 @@ std::map<std::string, std::string> parseNStreamsValuePerDevice(const std::vector
 std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes);
 size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info);
 std::vector<std::string> split(const std::string& s, char delim);
+std::map<std::string, std::vector<float>> parseScaleOrMean(const std::string& scale_mean, const benchmark_app::InputsInfo& inputs_info);
 
 template <typename T>
 std::map<std::string, std::string> parseInputParameters(const std::string parameter_string, const std::map<std::string, T>& input_info) {
@@ -65,9 +68,11 @@ std::map<std::string, std::string> parseInputParameters(const std::string parame
 
 template <typename T>
 benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size,
-                                        const std::map<std::string, T>& input_info, bool& reshape_required) {
+                                        const std::string& scale_string, const std::string& mean_string, const std::map<std::string, T>& input_info,
+                                        bool& reshape_required) {
     std::map<std::string, std::string> shape_map = parseInputParameters(shape_string, input_info);
     std::map<std::string, std::string> layout_map = parseInputParameters(layout_string, input_info);
+
     reshape_required = false;
     benchmark_app::InputsInfo info_map;
     for (auto& item : input_info) {
@@ -106,14 +111,33 @@ benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const s
         }
         info_map[name] = info;
     }
+
+    // Update scale and mean
+    std::map<std::string, std::vector<float>> scale_map = parseScaleOrMean(scale_string, info_map);
+    std::map<std::string, std::vector<float>> mean_map = parseScaleOrMean(mean_string, info_map);
+
+    for (auto& item : info_map) {
+        if (item.second.isImage()) {
+            item.second.scale.assign({1, 1, 1});
+            item.second.mean.assign({0, 0, 0});
+
+            if (scale_map.count(item.first)) {
+                item.second.scale = scale_map.at(item.first);
+            }
+            if (mean_map.count(item.first)) {
+                item.second.mean = mean_map.at(item.first);
+            }
+        }
+    }
+
     return info_map;
 }
 
 template <typename T>
 benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size,
-                                        const std::map<std::string, T>& input_info) {
+                                        const std::string& scale_string, const std::string& mean_string, const std::map<std::string, T>& input_info) {
     bool reshape_required = false;
-    return getInputsInfo<T>(shape_string, layout_string, batch_size, input_info, reshape_required);
+    return getInputsInfo<T>(shape_string, layout_string, batch_size, scale_string, mean_string, input_info, reshape_required);
 }
 
 #ifdef USE_OPENCV
diff --git a/inference-engine/samples/common/utils/include/samples/classification_results.h b/inference-engine/samples/common/utils/include/samples/classification_results.h
index 7fd999d87b0..1a8ea4306c3 100644
--- a/inference-engine/samples/common/utils/include/samples/classification_results.h
+++ b/inference-engine/samples/common/utils/include/samples/classification_results.h
@@ -54,21 +54,27 @@ private:
      * @param output Vector of indexes for the top n places
      */
     template <class T>
-    void topResults(unsigned int n, InferenceEngine::TBlob<T>& input, std::vector<unsigned>& output) {
-        InferenceEngine::SizeVector dims = input.getTensorDesc().getDims();
+    void topResults(unsigned int n, InferenceEngine::Blob::Ptr& input, std::vector<unsigned>& output) {
+        InferenceEngine::SizeVector dims = input->getTensorDesc().getDims();
         size_t input_rank = dims.size();
         if (!input_rank || !dims[0])
             IE_THROW() << "Input blob has incorrect dimensions!";
         size_t batchSize = dims[0];
-        std::vector<unsigned> indexes(input.size() / batchSize);
+        std::vector<unsigned> indexes(input->size() / batchSize);
 
-        n = static_cast<unsigned>(std::min<size_t>((size_t)n, input.size()));
+        n = static_cast<unsigned>(std::min<size_t>((size_t)n, input->size()));
 
         output.resize(n * batchSize);
+        InferenceEngine::MemoryBlob::CPtr moutput = InferenceEngine::as<InferenceEngine::MemoryBlob>(input);
+        if (!moutput) {
+            IE_THROW() << "Output blob should be inherited from MemoryBlob";
+        }
+        // locked memory holder should be alive all time while access to its buffer happens
+        auto moutputHolder = moutput->rmap();
 
         for (size_t i = 0; i < batchSize; i++) {
-            size_t offset = i * (input.size() / batchSize);
-            T* batchData = input.data();
+            size_t offset = i * (input->size() / batchSize);
+            T* batchData = moutputHolder.as<T*>();
             batchData += offset;
 
             std::iota(std::begin(indexes), std::end(indexes), 0);
@@ -88,16 +94,15 @@ private:
      * @param input 1D blob that contains probabilities
      * @param output Vector of indexes for the top n places
      */
-    void topResults(unsigned int n, InferenceEngine::Blob& input, std::vector<unsigned>& output) {
+    void topResults(unsigned int n, InferenceEngine::Blob::Ptr& input, std::vector<unsigned>& output) {
 #define TBLOB_TOP_RESULT(precision)                                                                            \
     case InferenceEngine::Precision::precision: {                                                              \
         using myBlobType = InferenceEngine::PrecisionTrait<InferenceEngine::Precision::precision>::value_type; \
-        InferenceEngine::TBlob<myBlobType>& tblob = dynamic_cast<InferenceEngine::TBlob<myBlobType>&>(input);  \
-        topResults(n, tblob, output);                                                                          \
+        topResults<myBlobType>(n, input, output);                                                              \
         break;                                                                                                 \
     }
 
-        switch (input.getTensorDesc().getPrecision()) {
+        switch (input->getTensorDesc().getPrecision()) {
             TBLOB_TOP_RESULT(FP32);
             TBLOB_TOP_RESULT(FP64);
             TBLOB_TOP_RESULT(FP16);
@@ -111,7 +116,7 @@ private:
             TBLOB_TOP_RESULT(U64);
             TBLOB_TOP_RESULT(I64);
         default:
-            IE_THROW() << "cannot locate blob for precision: " << input.getTensorDesc().getPrecision();
+            IE_THROW() << "cannot locate blob for precision: " << input->getTensorDesc().getPrecision();
         }
 
 #undef TBLOB_TOP_RESULT
@@ -129,7 +134,7 @@ public:
         if (_imageNames.size() != _batchSize) {
             throw std::logic_error("Batch size should be equal to the number of images.");
         }
-        topResults(_nTop, *_outBlob, _results);
+        topResults(_nTop, _outBlob, _results);
     }
 
     /**
@@ -146,18 +151,17 @@ public:
             std::wcout << std::endl << std::endl;
             printHeader();
 
+            InferenceEngine::MemoryBlob::CPtr moutput = InferenceEngine::as<InferenceEngine::MemoryBlob>(_outBlob);
+            auto moutputHolder = moutput->rmap();
             for (size_t id = image_id * _nTop, cnt = 0; id < (image_id + 1) * _nTop; ++cnt, ++id) {
                 std::cout.precision(7);
                 /** Getting probability for resulting class **/
-                InferenceEngine::MemoryBlob::CPtr moutput = InferenceEngine::as<InferenceEngine::MemoryBlob>(_outBlob);
                 if (!moutput) {
                     throw std::logic_error("We expect _outBlob to be inherited from MemoryBlob in "
                                            "ClassificationResult::print, "
                                            "but by fact we were not able to cast _outBlob to MemoryBlob");
                 }
                 // locked memory holder should be alive all time while access to its buffer happens
-                auto moutputHolder = moutput->rmap();
-
                 const auto result =
                     moutputHolder
                         .as<const InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()[_results.at(id) +
diff --git a/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp b/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp
index ac92f7c2aa4..1e6ae59bf6f 100644
--- a/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp
+++ b/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp
@@ -109,7 +109,7 @@ public:
         return std::make_shared<CustomReluOp>(new_args.at(0));
     }
 
-    bool visit_attributes(ngraph::AttributeVisitor& visitor) override {
+    bool visit_attributes(ngraph::AttributeVisitor&) override {
         return true;
     }
 };
diff --git a/inference-engine/samples/ngraph_function_creation_sample/main.cpp b/inference-engine/samples/ngraph_function_creation_sample/main.cpp
index 0855c961a63..6cb1e104305 100644
--- a/inference-engine/samples/ngraph_function_creation_sample/main.cpp
+++ b/inference-engine/samples/ngraph_function_creation_sample/main.cpp
@@ -108,7 +108,7 @@ TBlob<uint8_t>::CPtr ReadWeights(std::string filepath) {
 std::shared_ptr<Function> createNgraphFunction() {
     TBlob<uint8_t>::CPtr weightsPtr = ReadWeights(FLAGS_m);
 
-    if (weightsPtr->byteSize() != 1724336)
+    if (weightsPtr->byteSize() != 6897344)
         IE_THROW() << "Incorrect weights file. This sample works only with LeNet "
                       "classification network.";
 
diff --git a/inference-engine/src/auto_plugin/auto_plugin.cpp b/inference-engine/src/auto_plugin/auto_plugin.cpp
index 94b6a8a8b71..75e80faa2b4 100644
--- a/inference-engine/src/auto_plugin/auto_plugin.cpp
+++ b/inference-engine/src/auto_plugin/auto_plugin.cpp
@@ -274,31 +274,108 @@ DeviceName AutoInferencePlugin::SelectDevice(const std::vector<DeviceName>& meta
     }
 
     std::vector<DeviceName> CPU;
-    std::vector<DeviceName> GPU;
+    std::vector<DeviceName> dGPU;
+    std::vector<DeviceName> iGPU;
+    std::vector<DeviceName> MYRIAD;
+    std::vector<DeviceName> VPUX;
 
     for (auto& item : metaDevices) {
         if (item.find("CPU") == 0) {
             CPU.push_back(item);
             continue;
         }
+        if (item.find("MYRIAD") == 0) {
+            MYRIAD.push_back(item);
+            continue;
+        }
+        if (item.find("VPUX") == 0) {
+            VPUX.push_back(item);
+            continue;
+        }
         if (item.find("GPU") == 0) {
-            GPU.push_back(item);
+            auto gpuFullDeviceName = GetCore()->GetMetric(item, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
+            if (gpuFullDeviceName.find("iGPU") != std::string::npos) {
+                iGPU.push_back(item);
+            } else if (gpuFullDeviceName.find("dGPU") != std::string::npos) {
+                dGPU.push_back(item);
+            }
             continue;
         }
     }
 
-    if (CPU.empty() && GPU.empty()) {
+    if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) {
         IE_THROW(NotFound) << "No available device found";
     }
 
-    // Sort GPU by name: GPU.2 > GPU.1 > GPU.0 > GPU, so we always choose the GPU[0] as best device
-    std::sort(GPU.begin(), GPU.end(), [](const DeviceName& a, const DeviceName& b)->bool{return b < a;});
+    // Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU
+    if (!dGPU.empty()) {
+        for (auto&& item : dGPU) {
+            std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
+            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
+            if (supportNetwork != capability.end()) {
+                return item;
+            }
+        }
+    } else if (!VPUX.empty()) {
+        for (auto&& item : VPUX) {
+            std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
+            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
+            if (supportNetwork != capability.end()) {
+                return item;
+            }
+        }
+    } else if (!iGPU.empty()) {
+        for (auto&& item : iGPU) {
+            std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
+            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
+            if (supportNetwork != capability.end()) {
+                return item;
+            }
+        }
+    } else if (!MYRIAD.empty()) {
+        for (auto&& item : MYRIAD) {
+            std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
+            auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
+            if (supportNetwork != capability.end()) {
+                return item;
+            }
+        }
+    }
 
-    for (auto&& item : GPU) {
-        std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
-        auto res = std::find(capability.begin(), capability.end(), networkPrecision);
-        if (res != capability.end()) {
-            return item;
+    // If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16.
+    if (networkPrecision == "FP32") {
+        if (!dGPU.empty()) {
+            for (auto&& item : dGPU) {
+                std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
+                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
+                if (supportNetwork != capability.end()) {
+                    return item;
+                }
+            }
+        } else if (!VPUX.empty()) {
+            for (auto&& item : VPUX) {
+                std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
+                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
+                if (supportNetwork != capability.end()) {
+                    return item;
+                }
+            }
+        } else if (!iGPU.empty()) {
+            for (auto&& item : iGPU) {
+                std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
+                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
+                if (supportNetwork != capability.end()) {
+                    return item;
+                }
+            }
+        } else if (!MYRIAD.empty()) {
+            for (auto&& item : MYRIAD) {
+                std::vector<std::string> capability = GetCore()->GetMetric(item, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
+                auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
+                if (supportNetwork != capability.end()) {
+                    return item;
+                }
+            }
         }
     }
 
diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.cpp b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
index 2b333a38ee9..53cefa30cf7 100644
--- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
@@ -88,11 +88,11 @@ void CLDNNGraph::Build() {
 
 std::shared_ptr<cldnn::network> CLDNNGraph::BuildNetwork(std::shared_ptr<cldnn::program> program) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::BuildNetwork");
-    auto network = std::make_shared<cldnn::network>(*program, m_stream_id);
+    auto network = std::make_shared<cldnn::network>(program, m_stream_id);
 
     if (!m_config.graph_dumps_dir.empty() && m_stream_id == 0) {
         static int net_id = 0;
-        auto steps_info = network->get_optimization_steps_info();
+        auto steps_info = network->get_optimizer_passes_info();
         size_t step_idx = 0;
         for (auto& step : steps_info) {
             CNNNetwork net(GetExecGraphInfoByPrimitivesInfo(step.second, true));
diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.h b/inference-engine/src/cldnn_engine/cldnn_graph.h
index 5ce64712fef..feae62a03c2 100644
--- a/inference-engine/src/cldnn_engine/cldnn_graph.h
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.h
@@ -51,8 +51,10 @@ public:
     InferenceEngine::SizeVector GetOutputSize(std::string outName) const;
     std::string MapOutputName(std::string outName) const;
     std::string getName() const { return m_networkName; }
+    std::mutex& get_mutex() { return m_infer_mutex; }
 
 protected:
+    std::mutex m_infer_mutex;
     std::string m_networkName;
     Config m_config;
 
diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp
index bb923f373b9..9a55217975c 100644
--- a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp
@@ -17,122 +17,36 @@
 
 using namespace InferenceEngine;
 
-namespace CLDNNPlugin {
+namespace {
 
 const char fp32_suffix[] = "_fp32";
-const char str_not_allocated[] = "Input data was not allocated.";
 const char cannot_set_compound[] = "cannot set compound blob: supported only for input pre-processing";
 const char wrong_nv12_blob[] = "NV12 input blob is expected for input with NV12 color format";
-const char unsupported_batched_blob[] = "Batched input blob is expected to contain nv12 blobs";
+const char unsupported_batched_blob[] = "Batched input blob is expected to contain NV12 blobs";
+const char str_input_not_allocated[] = "Input data was not allocated.";
+const char str_output_not_allocated[] = "Output data was not allocated.";
 
-Blob::Ptr CLDNNInferRequest::createInputBlob(const TensorDesc& desc, uint8_t* mem_ptr) {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::createInputBlob");
-    const Precision p = desc.getPrecision();
-
-    switch (p) {
-    case Precision::FP32:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<float>(desc, reinterpret_cast<float*>(mem_ptr));
-        else
-            return make_shared_blob<float>(desc);
-    case Precision::FP16:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<uint16_t>(desc, reinterpret_cast<uint16_t*>(mem_ptr));
-        else
-            return make_shared_blob<uint16_t>(desc);
-    case Precision::I16:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<int16_t>(desc, reinterpret_cast<int16_t*>(mem_ptr));
-        else
-            return make_shared_blob<int16_t>(desc);
-    case Precision::U16:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<uint16_t>(desc, reinterpret_cast<uint16_t*>(mem_ptr));
-        else
-            return make_shared_blob<uint16_t>(desc);
-    case Precision::I32:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<int32_t>(desc, reinterpret_cast<int32_t*>(mem_ptr));
-        else
-            return make_shared_blob<int32_t>(desc);
-    case Precision::I64:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<int64_t>(desc, reinterpret_cast<int64_t*>(mem_ptr));
-        else
-            return make_shared_blob<int64_t>(desc);
-    case Precision::I8:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<int8_t>(desc, reinterpret_cast<int8_t*>(mem_ptr));
-        else
-            return make_shared_blob<int8_t>(desc);
-    case Precision::U8:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<uint8_t>(desc, reinterpret_cast<uint8_t*>(mem_ptr));
-        else
-            return make_shared_blob<uint8_t>(desc);
-    case Precision::BOOL:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<uint8_t>(desc, reinterpret_cast<uint8_t*>(mem_ptr));
-        else
-            return make_shared_blob<uint8_t>(desc);
-    default:
-        IE_THROW() << "The plugin does not support input " << p.name() << " precision";
+template <typename T>
+void copyToFloat(float* dst, const InferenceEngine::Blob* src) {
+    if (!dst) {
+        return;
     }
-}
-
-Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* mem_ptr) {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::createOutputBlob");
-    const Precision p = desc.getPrecision();
-
-    switch (p) {
-    case Precision::FP32:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<float>(desc, reinterpret_cast<float*>(mem_ptr));
-        else
-            return make_shared_blob<float>(desc);
-    case Precision::FP16:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<uint16_t>(desc, reinterpret_cast<uint16_t*>(mem_ptr));
-        else
-            return make_shared_blob<uint16_t>(desc);
-    case Precision::I32:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<int32_t>(desc, reinterpret_cast<int32_t*>(mem_ptr));
-        else
-            return make_shared_blob<int32_t>(desc);
-     case Precision::I64:
-        if (mem_ptr != nullptr)
-            return make_shared_blob<int64_t>(desc, reinterpret_cast<int64_t*>(mem_ptr));
-        else
-            return make_shared_blob<int64_t>(desc);
-    default:
-        IE_THROW() << "The plugin does not support output " << p.name() << " precision";
+    auto t_blob = dynamic_cast<const InferenceEngine::TBlob<T>*>(src);
+    if (!t_blob) {
+        IE_THROW() << "input type is " << src->getTensorDesc().getPrecision() << " but input is not "
+                   << typeid(T).name();
     }
-}
 
-void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem) {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_attach");
-    auto impl = getContextImpl(m_graph->GetContext());
-    impl->acquire_lock();
-
-    auto mem_itr = inputsMemory.find(name);
-
-    if (mem_itr != inputsMemory.end())
-        mem_itr->second = inputMem;
-    else
-        inputsMemory.insert({ name, inputMem });
-
-    impl->release_lock();
-}
-
-void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layout& layout) {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_alloc");
-    cldnn::memory::ptr input_mem = m_graph->GetEngine()->allocate_memory(layout);
-    input_attach(name, input_mem);
+    const T* srcPtr = t_blob->readOnly();
+    if (!srcPtr) {
+        IE_THROW(NotAllocated) << str_input_not_allocated;
+    }
+    for (size_t i = 0; i < t_blob->size(); i++)
+        dst[i] = srcPtr[i];
 }
 
 template<typename T>
-void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi, cldnn::stream& stream) {
+void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, CLDNNPlugin::buf_info* bi, cldnn::stream& stream) {
     size_t n = (bi == nullptr) ? dst->size() : bi->buf_size;
     size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
 
@@ -169,80 +83,15 @@ void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi,
     }
 }
 
-void CLDNNInferRequest::copyOutputData(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData");
-    auto& stream = m_graph->GetNetwork()->get_stream();
-    switch (dst->getTensorDesc().getPrecision()) {
-    case Precision::FP32: copyResultToOutputBlob<float>(src, dst, bi, stream);    break;
-    case Precision::FP16: copyResultToOutputBlob<uint16_t>(src, dst, bi, stream); break;
-    case Precision::I32:  copyResultToOutputBlob<int32_t>(src, dst, bi, stream);  break;
-    case Precision::I64:  copyResultToOutputBlob<int64_t>(src, dst, bi, stream);  break;
-    default: IE_THROW(NotImplemented) << "The plugin does not support output " << dst->getTensorDesc().getPrecision() << " precision";
+inline void checkAlloc(const Blob::Ptr& blob, const std::string& err_str) {
+    bool not_allocated = false;
+    if (!blob->is<gpu::ClBlob>()) {
+        not_allocated = (blob->buffer() == nullptr);
+    } else {
+        not_allocated = !CLDNNPlugin::getBlobImpl(blob->as<gpu::ClBlob>())->is_allocated();
     }
-}
-
-void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
-                                      const cldnn::primitive_id &inputName,
-                                      const cldnn::layout& inputLayout,
-                                      const Blob &inputBlob, buf_info* bi) {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyInputData");
-
-    size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
-
-    cldnn::primitive_id internalName = "parameter:" + inputName;
-    auto locked = inputBlob.cbuffer();
-    switch (inputBlob.getTensorDesc().getPrecision()) {
-    case Precision::FP32: {
-        float* blob_ptr = const_cast<float*>(locked.as<const float*>()) + offset;
-        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
-        break;
-    }
-    case Precision::I32: {
-        int32_t* blob_ptr = const_cast<int32_t*>(locked.as<const int32_t*>()) + offset;
-        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
-        break;
-    }
-    case Precision::I64: {
-        int64_t* blob_ptr = const_cast<int64_t*>(locked.as<const int64_t*>()) + offset;
-        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
-        break;
-    }
-    case Precision::FP16: {
-        uint16_t* blob_ptr = const_cast<uint16_t*>(locked.as<const uint16_t*>()) + offset;
-        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
-        break;
-    }
-    case Precision::I8: {
-        int8_t* blob_ptr = const_cast<int8_t*>(locked.as<const int8_t*>()) + offset;
-        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
-        break;
-    }
-    case Precision::U8: {
-        uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
-        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
-        break;
-    }
-    case Precision::BOOL: {
-        uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
-        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
-        break;
-    }
-    default:
-        IE_THROW() << "The plugin does not support input " << inputBlob.getTensorDesc().getPrecision() << " precision";
-    }
-}
-
-void checkInputBlobNV12(const NV12Blob *nv12_ptr) {
-    auto y_ptr = nv12_ptr->y()->as<gpu::ClBlob>();
-
-    // if the blobs are not remote, check their size
-    if (!y_ptr) {
-        if (nv12_ptr->y()->buffer() == nullptr) IE_THROW(NotAllocated) << str_not_allocated;
-    }
-
-    auto uv_ptr = nv12_ptr->uv()->as<gpu::ClBlob>();
-    if (!uv_ptr) {
-        if (nv12_ptr->uv()->buffer() == nullptr) IE_THROW(NotAllocated) << str_not_allocated;
+    if (not_allocated) {
+        IE_THROW(NotAllocated) << err_str;
     }
 }
 
@@ -260,17 +109,19 @@ void checkInputBlob(const Blob::Ptr &blob,
     const std::string strNotMatched("The input blob size is not equal to the network input size");
 
     if (!blob) {
-        IE_THROW() << str_not_allocated;
+        IE_THROW(NotAllocated) << str_input_not_allocated;
     }
 
     if (ColorFormat::NV12 == foundInput->getPreProcess().getColorFormat() &&
         nv12_two_inputs) {
         if (auto nv12_ptr = blob->as<NV12Blob>()) {
-            checkInputBlobNV12(nv12_ptr);
+            checkAlloc(nv12_ptr->y(), str_input_not_allocated);
+            checkAlloc(nv12_ptr->uv(), str_input_not_allocated);
         } else if (auto batched_ptr = blob->as<BatchedBlob>()) {
             for (auto i = 0; i < batched_ptr->size(); i++) {
                 auto nv12_ptr = getNV12BlobOrException(batched_ptr, i);
-                checkInputBlobNV12(nv12_ptr);
+                checkAlloc(nv12_ptr->y(), str_input_not_allocated);
+                checkAlloc(nv12_ptr->uv(), str_input_not_allocated);
             }
         } else {
             IE_THROW(ParameterMismatch) << wrong_nv12_blob;
@@ -287,20 +138,17 @@ void checkInputBlob(const Blob::Ptr &blob,
             IE_THROW() << strNotMatched + ": got " << blob->size() << " expecting " << refSize;
         }
 
-        if (!blob->is<gpu::ClBlob>()) {
-            if (blob->buffer() == nullptr) IE_THROW() << str_not_allocated;
-        }
+        checkAlloc(blob, str_input_not_allocated);
     }
 }
 
 void checkOutputBlob(const Blob::Ptr &blob,
     const std::string &name,
     const DataPtr foundOutput) {
-    const std::string strNotAllocated("Output data was not allocated.");
     const std::string strNotMatched("The output blob size is not equal to the network output size");
 
     if (!blob) {
-        IE_THROW() << strNotAllocated;
+        IE_THROW(NotAllocated) << str_output_not_allocated;
     }
     SizeVector dims = foundOutput->getTensorDesc().getDims();
     size_t refSize = foundOutput->getTensorDesc().getLayout() != SCALAR
@@ -311,43 +159,17 @@ void checkOutputBlob(const Blob::Ptr &blob,
         IE_THROW() << strNotMatched + ": got " << blob->size() << " expecting " << refSize;
     }
 
-    if (!blob->is<gpu::ClBlob>()) {
-        if (blob->buffer() == nullptr) IE_THROW() << strNotAllocated;
-    }
+    checkAlloc(blob, str_output_not_allocated);
 }
 
-void CLDNNInferRequest::checkBlobs() {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::checkBlobs");
-    for (auto const &input : _inputs) {
-        InputInfo::Ptr foundInput = nullptr;
-        auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs),
-            [&](const std::pair<std::string, InputInfo::Ptr> &pair) {
-            return pair.first == input.first;
-        });
-        if (foundInputPair != std::end(_networkInputs)) {
-            foundInput = foundInputPair->second;
-        } else {
-            IE_THROW(NotFound)
-                << "Failed to find input with name: \'" << input.first << "\'";
-        }
-        checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs);
-    }
-    for (auto const &output : _outputs) {
-        DataPtr foundOutput;
-        auto foundOutputPair = std::find_if(std::begin(_networkOutputs), std::end(_networkOutputs),
-            [&](const std::pair<std::string, DataPtr> &pair) {
-            return pair.first == output.first;
-        });
-        if (foundOutputPair != std::end(_networkOutputs)) {
-            foundOutput = foundOutputPair->second;
-        } else {
-            IE_THROW(NotFound)
-                << "Failed to find output with name: \'" << output.first << "\'";
-        }
-        checkOutputBlob(output.second, output.first, foundOutput);
-    }
-}
+}  // namespace
 
+namespace CLDNNPlugin {
+
+
+// ----------------------------------------------------------------------------------------- //
+// ---------------------------- IE API impl ------------------------------------------------ //
+// ----------------------------------------------------------------------------------------- //
 Blob::Ptr CLDNNInferRequest::GetBlob(const std::string& name) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetBlob");
     Blob::Ptr data;
@@ -371,7 +193,7 @@ Blob::Ptr CLDNNInferRequest::GetBlob(const std::string& name) {
     return data;
 }
 
-void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data) {
+void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBlob");
 
     // perform all common checks first
@@ -397,69 +219,73 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data)
         : foundOutput->getTensorDesc();
 
     if (desc.getPrecision() != blobDesc.getPrecision()) {
-        IE_THROW(ParameterMismatch)
-            << "Failed to set Blob with precision not corresponding to user "
-            << (is_input ? "input" : "output") << " precision";
+        IE_THROW(ParameterMismatch) << "Failed to set Blob with precision not corresponding to user "
+                                    << (is_input ? "input" : "output") << " precision";
     }
 
     auto remote_ptr = data->as<gpu::ClBlob>();
     bool is_remote = remote_ptr != nullptr;
     if (is_remote) {
         auto impl = getBlobImpl(remote_ptr);
-        impl->allocate_if_needed();
+        impl->allocate();
     }
-
     if (is_input) {
-        cldnn::primitive_id internalName(name);
-
         if (is_remote) {
-            auto inputMem = getBlobImpl(remote_ptr)->getMemory();
-            input_attach(internalName, inputMem);
+            _deviceInputs[name] = data;
             _inputs[name] = data;
-        } else if (compoundBlobPassed) {
+        } else {
+            auto nv12_ptr = data->as<NV12Blob>();
+            auto batched_ptr = data->as<BatchedBlob>();
+            bool is_batched = batched_ptr != nullptr;
+            bool is_nv12 = nv12_ptr != nullptr;
+            int expected_batch = is_batched ? desc.getDims()[0] : 1;
             if (ColorFormat::NV12 == foundInput->getPreProcess().getColorFormat() &&
                 m_graph->getConfig().nv12_two_inputs) {
                 // try extracting Y and UV remote blobs from it
                 // and put them into appropriate network inputs
                 // that should then go into biplanar NV12 reorder
-                auto nv12_ptr = data->as<NV12Blob>();
-                auto batched_ptr = data->as<BatchedBlob>();
 
-                if (nv12_ptr != nullptr || batched_ptr != nullptr) {
-                    int num_blobs = batched_ptr != nullptr ? batched_ptr->size() : 1;
-
-                    for (auto i = 0; i < num_blobs; i++) {
-                        if (batched_ptr != nullptr)
-                            nv12_ptr = getNV12BlobOrException(batched_ptr, i);
+                if (is_nv12 || is_batched) {
+                    int num_blobs = is_batched ? batched_ptr->size() : 1;
+                    for (auto i = 0; i < expected_batch; i++) {
+                        std::string y_name = name + "_Y" + std::to_string(i);
+                        std::string uv_name = name + "_UV" + std::to_string(i);
+                        if (is_batched) {
+                            int idx = i < num_blobs ? i : num_blobs-1;
+                            nv12_ptr = getNV12BlobOrException(batched_ptr, idx);
+                        }
 
                         auto y_ptr = nv12_ptr->y()->as<gpu::ClBlob>();
                         if (y_ptr) {
                             auto y_impl = getBlobImpl(y_ptr);
-                            y_impl->allocate_if_needed();
-                            input_attach(internalName + "_Y" + std::to_string(i), y_impl->getMemory());
+                            y_impl->allocate();
+                            _deviceInputs[y_name] = nv12_ptr->y();
                             is_remote = true;
                         }
 
                         auto uv_ptr = nv12_ptr->uv()->as<gpu::ClBlob>();
                         if (uv_ptr) {
                             auto uv_impl = getBlobImpl(uv_ptr);
-                            uv_impl->allocate_if_needed();
-                            input_attach(internalName + "_UV" + std::to_string(i), uv_impl->getMemory());
+                            uv_impl->allocate();
+                            _deviceInputs[uv_name] = nv12_ptr->uv();
                             is_remote = true;
                         }
                     }
-                } else {
-                    IE_THROW(ParameterMismatch) << wrong_nv12_blob;
                 }
-
-                if (is_remote) _inputs[name] = data;
             }
+            if (is_remote)
+                _inputs[name] = data;
         }
 
         if (!is_remote) {
             if (preProcessingRequired(foundInput, data)) {
                 // Stores the given blob as ROI blob. It will be used to fill in network input
                 // during pre-processing
+                if (_inputs[name]->is<gpu::ClBlob>()) {
+                    Blob::Ptr inputHostBlob = create_input_host_blob(desc);
+                    inputHostBlob->allocate();
+                    _inputs[name] = inputHostBlob;
+                }
                 _preProcData[name] = CreatePreprocDataHelper();
                 _preProcData[name]->isApplicable(data, _inputs[name]);
                 _preProcData[name]->setRoiBlob(data);
@@ -467,17 +293,16 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data)
                 if (compoundBlobPassed) {
                     IE_THROW(NotImplemented) << cannot_set_compound;
                 }
-
                 size_t blobSize = desc.getLayout() != SCALAR
                     ? details::product(desc.getDims())
                     : 1;
                 if (dataSize != blobSize) {
                     IE_THROW() << "Input blob size is not equal network input size ("
-                        << dataSize << "!=" << blobSize << ").";
+                               << dataSize << "!=" << blobSize << ").";
                 }
 
                 if (data->buffer() == nullptr)
-                    IE_THROW() << str_not_allocated << " Input name: \'" << name << "\'";
+                    IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'";
                 _inputs[name] = data;
             }
         }
@@ -487,148 +312,49 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data)
         }
 
         if (is_remote) {
-            std::string outputID = m_graph->MapOutputName(name);
-            auto impl = getBlobImpl(remote_ptr);
-            m_graph->GetNetwork()->set_output_memory(outputID, impl->getMemory());
+            _deviceOutputs[name] = data;
         } else {
             size_t outputSize = desc.getLayout() != SCALAR
                 ? details::product(desc.getDims())
                 : 1;
             if (dataSize != outputSize) {
                 IE_THROW() << "Output blob size is not equal network output size (" << dataSize
-                    << "!=" << outputSize << ").";
+                           << "!=" << outputSize << ").";
             }
             if (data->buffer() == nullptr)
-                IE_THROW() << str_not_allocated << " Input name: \'" << name << "\'";
+                IE_THROW(NotAllocated) << str_input_not_allocated << " Input name: \'" << name << "\'";
         }
         _outputs[name] = data;
     }
 }
 
-void CLDNNInferRequest::AllocateInputs() {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputs");
-    auto inputLayouts = m_graph->GetInputLayouts();
-    auto& stream = m_graph->GetNetwork()->get_stream();
-    // allocate inputs
-    for (auto& ni : _networkInputs) {
-        std::string name = ni.first;
-        const TensorDesc& desc = ni.second->getTensorDesc();
-
-        if (ColorFormat::NV12 == ni.second->getPreProcess().getColorFormat() &&
-            m_graph->getConfig().nv12_two_inputs) {
-            std::vector<Blob::Ptr> blobs;
-            for (auto i = 0; i < desc.getDims()[0]; i++) {
-                cldnn::primitive_id YName(name + "_Y" + std::to_string(i));
-                cldnn::primitive_id UVName(name + "_UV" + std::to_string(i));
-
-                if (inputLayouts.find(YName) == inputLayouts.end()) {
-                    IE_THROW(ParameterMismatch) << "Input layout for " << YName << " is not found";
-                }
-                if (inputLayouts.find(UVName) == inputLayouts.end()) {
-                    IE_THROW(ParameterMismatch) << "Input layout for " << YName << " is not found";
-                }
-                input_alloc(YName, inputLayouts.at(YName));
-                input_alloc(UVName, inputLayouts.at(UVName));
-
-                size_t height = desc.getDims()[2], width = desc.getDims()[3];
-                cldnn::mem_lock<uint8_t> input_mem_ptr_Y{inputsMemory.at(YName), stream};
-                TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
-                auto blobY = createInputBlob(ydesc, input_mem_ptr_Y.data());
-
-                cldnn::mem_lock<uint8_t> input_mem_ptr_UV{ inputsMemory.at(UVName), stream };
-                TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
-                auto blobUV = createInputBlob(uvdesc, input_mem_ptr_UV.data());
-
-                blobs.push_back(make_shared_blob<NV12Blob>(blobY, blobUV));
-            }
-            _inputs[name] = desc.getDims()[0] == 1 ? blobs[0] : make_shared_blob<BatchedBlob>(blobs);
+void CLDNNInferRequest::checkBlobs() {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::checkBlobs");
+    for (auto const &input : _inputs) {
+        InputInfo::Ptr foundInput = nullptr;
+        auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs),
+            [&](const std::pair<std::string, InputInfo::Ptr> &pair) {
+            return pair.first == input.first;
+        });
+        if (foundInputPair != std::end(_networkInputs)) {
+            foundInput = foundInputPair->second;
         } else {
-            if (inputLayouts.find(name) == inputLayouts.end()) {
-                IE_THROW() << "Input layout for " << name << " is not found";
-            }
-            cldnn::layout layout = inputLayouts.at(name);
-            input_alloc(name, layout);
-            cldnn::mem_lock<uint8_t> mem_ptr{inputsMemory.at(name), stream};
-            _inputs[name] = createInputBlob(desc, mem_ptr.data());
-
-            if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
-                cldnn::layout layout_fp32 = layout;
-                layout_fp32.data_type = cldnn::data_types::f32;
-                input_alloc(name + fp32_suffix, layout_fp32);
-            }
+            IE_THROW(NotFound) << "Failed to find input with name: \'" << input.first << "\'";
         }
+        checkInputBlob(input.second, input.first, foundInput, m_graph->getConfig().nv12_two_inputs);
     }
-}
-
-void CLDNNInferRequest::AllocateInputsDyn() {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputsDyn");
-    // allocate inputs
-    for (auto &input : m_graph->GetInputLayouts()) {
-        InputInfo::Ptr ni = _networkInputs.at(input.first);
-        TensorDesc desc = ni->getTensorDesc();
-        SizeVector& dims = desc.getDims();
-
-        if (!dims.empty()) {
-            *dims.begin() = static_cast<size_t>(m_graph->GetMaxDynamicBatchSize());
+    for (auto const &output : _outputs) {
+        DataPtr foundOutput = nullptr;
+        auto foundOutputPair = std::find_if(std::begin(_networkOutputs), std::end(_networkOutputs),
+            [&](const std::pair<std::string, DataPtr> &pair) {
+            return pair.first == output.first;
+        });
+        if (foundOutputPair != std::end(_networkOutputs)) {
+            foundOutput = foundOutputPair->second;
         } else {
-            IE_THROW() << "Empty dimensions for input blob " << input.first;
+            IE_THROW(NotFound) << "Failed to find output with name: \'" << output.first << "\'";
         }
-
-        Blob::Ptr inputBlob = createInputBlob(desc);
-        if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
-            desc.setPrecision(Precision::FP32);
-            auto fp32inputBlob = InferenceEngine::make_shared_blob<float>(desc);
-            fp32inputBlob->allocate();
-            _inputs[input.first + fp32_suffix] = fp32inputBlob;
-        }
-        inputBlob->allocate();
-        _inputs[input.first] = inputBlob;
-    }
-}
-
-void CLDNNInferRequest::AllocateOutputs() {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateOutputs");
-    // allocate outputs
-    bool can_reuse_internal_mem = !m_useStreams;
-    for (auto& no : _networkOutputs) {
-        std::string outputID = m_graph->MapOutputName(no.first);
-        cldnn::memory::ptr output_mem = m_graph->GetNetwork()->get_output_memory(outputID);
-        cldnn::mem_lock<uint8_t> output_mem_ptr{output_mem, m_graph->GetNetwork()->get_stream()};
-        if (output_mem_ptr.data() == nullptr) {
-            IE_THROW() << "Empty output memory for primitive " << outputID;
-        }
-
-        DataPtr oi = no.second;
-        const TensorDesc& desc = oi->getTensorDesc();
-
-        if (can_reuse_internal_mem) {
-            _outputs[no.first] = createOutputBlob(desc, output_mem_ptr.data());
-        } else {
-            Blob::Ptr outputBlob = createOutputBlob(desc);
-            outputBlob->allocate();
-            _outputs[no.first] = outputBlob;
-        }
-        outputsMap[no.first] = outputID;
-    }
-}
-
-void CLDNNInferRequest::AllocateOutputsDyn() {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateOutputsDyn");
-    // allocate outputs
-    for (auto& no : _networkOutputs) {
-        DataPtr oi = no.second;
-        TensorDesc desc = oi->getTensorDesc();
-        SizeVector& dims = desc.getDims();
-
-        if (!dims.empty()) {
-            *dims.begin() = static_cast<size_t>(m_graph->GetMaxDynamicBatchSize());
-        } else {
-            IE_THROW() << "Empty dimensions for output blob " << no.first;
-        }
-
-        Blob::Ptr outputBlob = createOutputBlob(desc);
-        outputBlob->allocate();
-        _outputs[no.first] = outputBlob;
+        checkOutputBlob(output.second, output.first, foundOutput);
     }
 }
 
@@ -642,11 +368,11 @@ void CLDNNInferRequest::SetGraph(std::shared_ptr<CLDNNPlugin::CLDNNGraph> graph)
 
     if (m_graph->GetMaxDynamicBatchSize() > 1) {
         SetBatch(m_graph->GetMaxDynamicBatchSize());
-        AllocateInputsDyn();
-        AllocateOutputsDyn();
+        allocate_inputs_dynamic();
+        allocate_outputs_dynamic();
     } else {
-        AllocateInputs();
-        AllocateOutputs();
+        allocate_inputs();
+        allocate_outputs();
     }
 }
 
@@ -728,40 +454,272 @@ CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap
     streamExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(execNetwork->m_taskExecutor.get());
 }
 
-void CLDNNInferRequest::execAndParse() {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse");
-    auto networkOutputs = m_graph->GetNetwork()->execute();
+// ----------------------------------------------------------------------------------------- //
+// ---------------------------- internal utils --------- ----------------------------------- //
+// ----------------------------------------------------------------------------------------- //
+
+Blob::Ptr CLDNNInferRequest::create_input_host_blob(const TensorDesc& desc, uint8_t* mem_ptr) {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::create_input_host_blob");
+    const Precision& p = desc.getPrecision();
+
+    switch (p) {
+    case Precision::FP32:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<float>(desc, reinterpret_cast<float*>(mem_ptr));
+        else
+            return make_shared_blob<float>(desc);
+    case Precision::FP16:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<uint16_t>(desc, reinterpret_cast<uint16_t*>(mem_ptr));
+        else
+            return make_shared_blob<uint16_t>(desc);
+    case Precision::I16:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<int16_t>(desc, reinterpret_cast<int16_t*>(mem_ptr));
+        else
+            return make_shared_blob<int16_t>(desc);
+    case Precision::U16:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<uint16_t>(desc, reinterpret_cast<uint16_t*>(mem_ptr));
+        else
+            return make_shared_blob<uint16_t>(desc);
+    case Precision::I32:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<int32_t>(desc, reinterpret_cast<int32_t*>(mem_ptr));
+        else
+            return make_shared_blob<int32_t>(desc);
+    case Precision::I64:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<int64_t>(desc, reinterpret_cast<int64_t*>(mem_ptr));
+        else
+            return make_shared_blob<int64_t>(desc);
+    case Precision::I8:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<int8_t>(desc, reinterpret_cast<int8_t*>(mem_ptr));
+        else
+            return make_shared_blob<int8_t>(desc);
+    case Precision::U8:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<uint8_t>(desc, reinterpret_cast<uint8_t*>(mem_ptr));
+        else
+            return make_shared_blob<uint8_t>(desc);
+    case Precision::BOOL:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<uint8_t>(desc, reinterpret_cast<uint8_t*>(mem_ptr));
+        else
+            return make_shared_blob<uint8_t>(desc);
+    default:
+        IE_THROW(NotImplemented) << "The plugin does not support input " << p.name() << " precision";
+    }
+}
+
+Blob::Ptr CLDNNInferRequest::create_output_host_blob(const TensorDesc& desc, uint8_t* mem_ptr) {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::create_output_host_blob");
+    const Precision& p = desc.getPrecision();
+
+    switch (p) {
+    case Precision::FP32:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<float>(desc, reinterpret_cast<float*>(mem_ptr));
+        else
+            return make_shared_blob<float>(desc);
+    case Precision::FP16:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<uint16_t>(desc, reinterpret_cast<uint16_t*>(mem_ptr));
+        else
+            return make_shared_blob<uint16_t>(desc);
+    case Precision::I32:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<int32_t>(desc, reinterpret_cast<int32_t*>(mem_ptr));
+        else
+            return make_shared_blob<int32_t>(desc);
+     case Precision::I64:
+        if (mem_ptr != nullptr)
+            return make_shared_blob<int64_t>(desc, reinterpret_cast<int64_t*>(mem_ptr));
+        else
+            return make_shared_blob<int64_t>(desc);
+    default:
+        IE_THROW() << "The plugin does not support output " << p.name() << " precision";
+    }
+}
+
+void CLDNNInferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copy_output_data");
     auto& stream = m_graph->GetNetwork()->get_stream();
+    switch (dst->getTensorDesc().getPrecision()) {
+    case Precision::FP32: copyResultToOutputBlob<float>(src, dst, bi, stream);    break;
+    case Precision::FP16: copyResultToOutputBlob<uint16_t>(src, dst, bi, stream); break;
+    case Precision::I32:  copyResultToOutputBlob<int32_t>(src, dst, bi, stream);  break;
+    case Precision::I64:  copyResultToOutputBlob<int64_t>(src, dst, bi, stream);  break;
+    default: IE_THROW(NotImplemented) << "The plugin does not support output " << dst->getTensorDesc().getPrecision() << " precision";
+    }
+}
+
+void CLDNNInferRequest::copy_input_data(std::shared_ptr<cldnn::network> network,
+                                        const cldnn::primitive_id &inputName,
+                                        const cldnn::layout& inputLayout,
+                                        const Blob &inputBlob, buf_info* bi) {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copy_input_data");
+
+    size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
+
+    cldnn::primitive_id internalName = "parameter:" + inputName;
+    auto locked = inputBlob.cbuffer();
+    switch (inputBlob.getTensorDesc().getPrecision()) {
+    case Precision::FP32: {
+        float* blob_ptr = const_cast<float*>(locked.as<const float*>()) + offset;
+        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
+        break;
+    }
+    case Precision::I32: {
+        int32_t* blob_ptr = const_cast<int32_t*>(locked.as<const int32_t*>()) + offset;
+        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
+        break;
+    }
+    case Precision::I64: {
+        int64_t* blob_ptr = const_cast<int64_t*>(locked.as<const int64_t*>()) + offset;
+        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
+        break;
+    }
+    case Precision::FP16: {
+        uint16_t* blob_ptr = const_cast<uint16_t*>(locked.as<const uint16_t*>()) + offset;
+        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
+        break;
+    }
+    case Precision::I8: {
+        int8_t* blob_ptr = const_cast<int8_t*>(locked.as<const int8_t*>()) + offset;
+        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
+        break;
+    }
+    case Precision::U8: {
+        uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
+        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
+        break;
+    }
+    case Precision::BOOL: {
+        uint8_t* blob_ptr = const_cast<uint8_t*>(locked.as<const uint8_t*>()) + offset;
+        network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr));
+        break;
+    }
+    default:
+        IE_THROW() << "The plugin does not support input " << inputBlob.getTensorDesc().getPrecision() << " precision";
+    }
+}
+
+void CLDNNInferRequest::allocate_inputs() {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_inputs");
+    auto inputLayouts = m_graph->GetInputLayouts();
+    // allocate inputs
+    for (auto& ni : _networkInputs) {
+        std::string name = ni.first;
+        const TensorDesc& desc = ni.second->getTensorDesc();
+
+        if (ColorFormat::NV12 == ni.second->getPreProcess().getColorFormat() &&
+            m_graph->getConfig().nv12_two_inputs) {
+        } else {
+            auto litr = inputLayouts.find(name);
+            if (litr == inputLayouts.end()) {
+                IE_THROW() << "Input layout for " << name << " is not found";
+            }
+
+            if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
+                TensorDesc desc_fp32 = desc;
+                desc_fp32.setPrecision(Precision::FP32);
+                auto blobPtr = create_device_blob(desc_fp32, litr->second);
+                _deviceInputs[name] = blobPtr;
+                Blob::Ptr inputBlob = create_input_host_blob(desc);
+                inputBlob->allocate();
+                _inputs[name] = inputBlob;
+            } else {
+                auto blobPtr = create_device_blob(desc, litr->second);
+                _deviceInputs[name] = blobPtr;
+                _inputs[name] = blobPtr;
+            }
+        }
+    }
+}
+
+void CLDNNInferRequest::allocate_inputs_dynamic() {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_inputs_dynamic");
+    // allocate inputs
+    for (auto &input : m_graph->GetInputLayouts()) {
+        InputInfo::Ptr ni = _networkInputs.at(input.first);
+        TensorDesc desc = ni->getTensorDesc();
+        SizeVector& dims = desc.getDims();
+
+        if (!dims.empty()) {
+            *dims.begin() = static_cast<size_t>(m_graph->GetMaxDynamicBatchSize());
+        } else {
+            IE_THROW() << "Empty dimensions for input blob " << input.first;
+        }
+
+        Blob::Ptr inputBlob = create_input_host_blob(desc);
+        if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) {
+            desc.setPrecision(Precision::FP32);
+            auto fp32inputBlob = InferenceEngine::make_shared_blob<float>(desc);
+            fp32inputBlob->allocate();
+            _inputs[input.first + fp32_suffix] = fp32inputBlob;
+        }
+        inputBlob->allocate();
+        _inputs[input.first] = inputBlob;
+    }
+}
+
+void CLDNNInferRequest::allocate_outputs() {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_outputs");
+    // allocate outputs
+    for (auto& no : _networkOutputs) {
+        std::string outputID = m_graph->MapOutputName(no.first);
+        const cldnn::layout output_layout = m_graph->GetNetwork()->get_output_memory(outputID)->get_layout();
+        const TensorDesc& desc = no.second->getTensorDesc();
+
+        auto blobPtr = create_device_blob(desc, output_layout);
+        _deviceOutputs[no.first] = blobPtr;
+        _outputs[no.first] = blobPtr;
+        outputsMap[no.first] = outputID;
+    }
+}
+
+void CLDNNInferRequest::allocate_outputs_dynamic() {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_outputs_dynamic");
+    // allocate outputs
+    for (auto& no : _networkOutputs) {
+        DataPtr oi = no.second;
+        TensorDesc desc = oi->getTensorDesc();
+        SizeVector& dims = desc.getDims();
+
+        if (!dims.empty()) {
+            *dims.begin() = static_cast<size_t>(m_graph->GetMaxDynamicBatchSize());
+        } else {
+            IE_THROW() << "Empty dimensions for output blob " << no.first;
+        }
+
+        Blob::Ptr outputBlob = create_output_host_blob(desc);
+        outputBlob->allocate();
+        _outputs[no.first] = outputBlob;
+    }
+}
+
+void CLDNNInferRequest::exec_and_parse(const std::vector<cldnn::event::ptr>& dependencies) {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse");
+    auto networkOutputs = m_graph->GetNetwork()->execute(dependencies);
 
     // Collect outputs as requested by the model
     for (auto& no : _networkOutputs) {
         Blob::Ptr bptr = _outputs[no.first];
-
-        std::string outputID = outputsMap[no.first];
+        std::string outputID = outputsMap.at(no.first);
         auto outputMemory = networkOutputs.at(outputID).get_memory();
 
         // mapping remote blobs not needed -
         // let the user take care of them explicitly
         if (!bptr->is<gpu::ClBlob>()) {
-            cldnn::mem_lock<uint8_t> out_ptr{outputMemory, stream};
-            auto blob_ptr = bptr->buffer().as<uint8_t*>();
-
-            // If Async API is used, copy of output blobs is not needed, unless SetBlob function was called.
-            // But in the case when old API is used we have to copy data to memory provided by user.
-            if (blob_ptr != out_ptr.data()) {
-                copyOutputData(outputMemory, bptr);
-            }
+            copy_output_data(outputMemory, bptr);
         }
     }
-
-    // finally collect profiling info
-    if (m_useProfiling) {
-        m_graph->UpdatePerfStatistics();
-    }
 }
 
-void CLDNNInferRequest::execAndParseDyn() {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParseDyn");
+void CLDNNInferRequest::exec_and_parse_dynamic() {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::exec_and_parse_dynamic");
     std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> networkOutputs(m_graph->GetNetworksCount());
 
     // set up exection and put all graphs into driver queue
@@ -769,6 +727,14 @@ void CLDNNInferRequest::execAndParseDyn() {
         unsigned int mask = 1 << nb;
 
         if (m_curBatch & mask) {
+            for (auto& item : _inputs) {
+                const cldnn::primitive_id& inputName = item.first;
+                const Blob::Ptr inputBlob = item.second;
+
+                auto inputLayout = m_graph->GetInputLayouts().at(inputName);
+                inputLayout.size.batch[0] = mask;
+                copy_input_data(m_graph->GetNetwork(nb), inputName, inputLayout, *inputBlob, &batchInputs[inputName][nb]);
+            }
             networkOutputs[nb] = m_graph->GetNetwork(nb)->execute();
         }
     }
@@ -783,7 +749,7 @@ void CLDNNInferRequest::execAndParseDyn() {
                 auto outputMemory = networkOutputs[nb].at(outputID).get_memory();
                 Blob::Ptr bptr = _outputs[no.first];
 
-                copyOutputData(outputMemory, bptr, &batchOutputs[no.first][nb]);
+                copy_output_data(outputMemory, bptr, &batchOutputs[no.first][nb]);
             }
         }
     }
@@ -799,38 +765,61 @@ void CLDNNInferRequest::InferImpl() {
     // execute input pre-processing.
     execDataPreprocessing(_inputs, true);  // "true" stands for serial preprocessing in case of OpenMP
 
-    for (auto &item : _inputs) {
-        std::string name = item.first;
-        Blob::Ptr inputBlob = item.second;
+    if (m_graph->GetMaxDynamicBatchSize() > 1) {
+        exec_and_parse_dynamic();
+        return;
+    }
+
+    {
+        // try locking stream infer mutex
+        const std::lock_guard<std::mutex> lock(m_graph->get_mutex());
+
+        // set input and output memory from request blob maps
+        // into the network object primitives
+        std::vector<cldnn::event::ptr> dependencies;
+        for (auto& item : _inputs) {
+            std::string inputName = item.first;
+            Blob::Ptr& inputBlob = item.second;
 
-        if (m_graph->GetMaxDynamicBatchSize() > 1) {
-            PrepareInputDyn(name, *inputBlob);
-        } else {
             auto nv12_ptr = inputBlob->as<NV12Blob>();
             auto batched_ptr = inputBlob->as<BatchedBlob>();
+            bool is_batched = batched_ptr != nullptr;
+            bool is_nv12 = nv12_ptr != nullptr;
 
-            if (nv12_ptr != nullptr || batched_ptr != nullptr) {
-                // special case for NV12 input blob
-                int num_blobs = batched_ptr != nullptr ? batched_ptr->size() : 1;
-                for (auto i = 0; i < num_blobs; i++) {
-                    if (batched_ptr != nullptr)
-                        nv12_ptr = getNV12BlobOrException(batched_ptr, i);
-
-                    PrepareInput(name + "_Y" + std::to_string(i), *nv12_ptr->y());
-                    PrepareInput(name + "_UV" + std::to_string(i), *nv12_ptr->uv());
+            if (is_nv12 || is_batched) {
+                int num_blobs = is_batched ? batched_ptr->size() : 1;
+                int expected_batch = is_batched
+                    ? _networkInputs.at(inputName)->getTensorDesc().getDims()[0]
+                    : 1;
+                for (auto i = 0; i < expected_batch; i++) {
+                    std::string y_name = inputName + "_Y" + std::to_string(i);
+                    std::string uv_name = inputName + "_UV" + std::to_string(i);
+                    if (is_batched) {
+                        int idx = i < num_blobs ? i : num_blobs - 1;
+                        nv12_ptr = getNV12BlobOrException(batched_ptr, idx);
+                    }
+                    prepare_input(y_name, nv12_ptr->y(), dependencies);
+                    prepare_input(uv_name, nv12_ptr->uv(), dependencies);
                 }
             } else {
                 // regular blob
-                PrepareInput(name, *inputBlob);
+                prepare_input(inputName, inputBlob, dependencies);
             }
         }
-    }
 
-    // The actual inference
-    if (m_graph->GetMaxDynamicBatchSize() > 1) {
-        execAndParseDyn();
-    } else {
-        execAndParse();
+        for (auto& item : _outputs) {
+            std::string outputName = item.first;
+            Blob::Ptr& outputBlob = item.second;
+            prepare_output(outputName, outputBlob);
+        }
+
+        // The actual inference
+        exec_and_parse(dependencies);
+
+        // finally collect profiling info
+        if (m_useProfiling) {
+            m_graph->UpdatePerfStatistics();
+        }
     }
 }
 
@@ -843,101 +832,83 @@ std::map<std::string, InferenceEngineProfileInfo> CLDNNInferRequest::GetPerforma
     }
 }
 
-namespace {
-
-template <typename T>
-void copyToFloat(float* dst, const InferenceEngine::Blob* src) {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "copyToFloat");
-    if (!dst) {
-        return;
-    }
-    const InferenceEngine::TBlob<T>* t_blob = dynamic_cast<const InferenceEngine::TBlob<T>*>(src);
-    if (t_blob == nullptr) {
-        IE_THROW() << "input type is " << src->getTensorDesc().getPrecision() << " but input is not "
-                           << typeid(T).name();
-    }
-
-    const T* srcPtr = t_blob->readOnly();
-    if (srcPtr == nullptr) {
-        IE_THROW() << "Input data was not allocated.";
-    }
-    for (size_t i = 0; i < t_blob->size(); i++) dst[i] = srcPtr[i];
-}
-
-}  // namespace
-
-void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const Blob &inputBlob) {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::PrepareInput");
-    // Get input layout
-    if (m_graph->GetInputLayouts().find(inputName) == m_graph->GetInputLayouts().end()) {
+void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr& inputBlob,
+                                      std::vector<cldnn::event::ptr>& dependencies) {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::prepare_input");
+    auto inputLayoutItr = m_graph->GetInputLayouts().find(inputName);
+    if (inputLayoutItr == m_graph->GetInputLayouts().end()) {
         IE_THROW() << "Input name mismatch.";
     }
-    auto inputLayout = m_graph->GetInputLayouts().at(inputName);
-    auto is_same_buffer = [&](const Blob& blob, cldnn::memory::ptr memory) -> bool {
-        const std::string str_not_allocated("Input data was not allocated.");
-        cldnn::mem_lock<uint8_t> ptr{memory, m_graph->GetNetwork()->get_stream()};
-        const uint8_t* blob_ptr = blob.cbuffer().as<const uint8_t*>();
-        const uint8_t* mem_ptr = ptr.data();
-        if (blob_ptr == nullptr || mem_ptr == nullptr) {
-            IE_THROW() << str_not_allocated;
-        }
-        return (blob_ptr == mem_ptr) && (blob.byteSize() == memory->size());
-    };
-
-    cldnn::primitive_id internalName = "parameter:" + inputName;
-    cldnn::memory::ptr memory = inputsMemory.at(inputName);
-    auto& stream = m_graph->GetNetwork()->get_stream();
+    Blob::Ptr reqBlob = _deviceInputs.at(inputName);
     auto _nw_ptr = m_graph->GetNetwork();
-    auto prec = inputBlob.getTensorDesc().getPrecision();
-
-    if (inputBlob.is<gpu::ClBlob>()) {
-        // no need to check for reuse
-        _nw_ptr->set_input_data(internalName, memory);
-    } else if (prec == Precision::I16 || prec == Precision::U16) {
-        // clDNN doesn't support I16 input precision, so we always have to convert input data to fp32 precision
-        cldnn::memory::ptr fp32_mem = inputsMemory.at(inputName+fp32_suffix);
-        cldnn::mem_lock<float> ptr {fp32_mem, stream};
-        if (prec == Precision::I16) {
-            copyToFloat<int16_t>(ptr.data(), &inputBlob);
-        } else {
-            copyToFloat<uint16_t>(ptr.data(), &inputBlob);
-        }
-
-        _nw_ptr->set_input_data(internalName, fp32_mem);
-    } else if (is_same_buffer(inputBlob, memory)) {
-        // If input memory was allocated by cldnn engine and wasn't overwritten by user set_input_data method won't copy input data.
-        switch (prec) {
-            case Precision::FP32:
-            case Precision::FP16:
-            case Precision::I8:
-            case Precision::U8:
-            case Precision::BOOL:
-            case Precision::I32:
-            case Precision::I64: {
-                _nw_ptr->set_input_data(internalName, memory);
-                break;
+    cldnn::primitive_id internalName = "parameter:" + inputName;
+    const auto& prec = inputBlob->getTensorDesc().getPrecision();
+    auto remote_ptr = inputBlob->as<gpu::ClBlob>();
+    auto& stream = m_graph->GetNetwork()->get_stream();
+    bool is_dev_input = remote_ptr != nullptr;
+    switch (prec) {
+        case Precision::FP32:
+        case Precision::FP16:
+        case Precision::I8:
+        case Precision::U8:
+        case Precision::BOOL:
+        case Precision::I16:
+        case Precision::U16:
+        case Precision::I32:
+        case Precision::I64: {
+            auto impl = getBlobImpl(is_dev_input ?
+                                    remote_ptr :
+                                    reqBlob->as<gpu::ClBlob>());
+            if (!impl->is_allocated()) {
+                IE_THROW() << str_input_not_allocated;
             }
-            default:
-                IE_THROW() << "Unsupported input precision " << prec;
+            auto inputMem = impl->getMemory();
+
+            if (!is_dev_input) {
+                if (prec == Precision::I16 || prec == Precision::U16) {
+                    // clDNN doesn't support I16 input precision,
+                    // so have to convert input data to fp32 precision
+                    cldnn::mem_lock<float> ptr{ inputMem, stream };
+                    if (prec == Precision::I16) {
+                        copyToFloat<int16_t>(ptr.data(), inputBlob.get());
+                    } else {
+                        copyToFloat<uint16_t>(ptr.data(), inputBlob.get());
+                    }
+                } else {
+                    auto src_lock = inputBlob->cbuffer();
+                    auto ev = inputMem->copy_from(stream, src_lock.as<const uint8_t*>());
+                    dependencies.push_back(ev);
+                }
+            }
+            _nw_ptr->set_input_data(internalName, inputMem);
+            break;
         }
-    } else {
-        // Otherwise, we have to attach to user memory and then copy the data.
-        copyInputData(_nw_ptr, inputName, inputLayout, inputBlob);
+        default:
+            IE_THROW() << "Unsupported input precision " << prec;
     }
 }
 
-void CLDNNInferRequest::PrepareInputDyn(const cldnn::primitive_id &inputName, const Blob &inputBlob) {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::PrepareInputDyn");
-    // now try to get execution results
-    for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) {
-        unsigned int mask = 1 << nb;
-
-        if (m_curBatch & mask) {
-            auto inputLayout = m_graph->GetInputLayouts().at(inputName);
-            inputLayout.size.batch[0] = mask;
-            copyInputData(m_graph->GetNetwork(nb), inputName, inputLayout, inputBlob, &batchInputs[inputName][nb]);
-        }
+void CLDNNInferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::prepare_output");
+    Blob::Ptr reqBlob = _deviceOutputs.at(outputName);
+    cldnn::primitive_id internalName = outputsMap[outputName];
+    auto _nw_ptr = m_graph->GetNetwork();
+    auto remote_ptr = outputBlob->as<gpu::ClBlob>();
+    auto output_blob_ptr = (reqBlob != outputBlob && remote_ptr != nullptr)
+        ? remote_ptr
+        : reqBlob->as<gpu::ClBlob>();
+    auto impl = getBlobImpl(output_blob_ptr);
+    if (!impl->is_allocated()) {
+        IE_THROW(NotAllocated) << str_output_not_allocated;
     }
+    auto outputMem = impl->getMemory();
+    _nw_ptr->set_output_memory(internalName, outputMem);
+}
+
+InferenceEngine::Blob::Ptr CLDNNInferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout) {
+    auto blobPtr = std::make_shared<CLDNNRemoteCLbuffer>(m_graph->GetContext(), m_graph->GetNetwork()->get_stream(), desc, layout);
+    getBlobImpl(blobPtr.get())->allocate();
+    return blobPtr;
 }
 
 }  // namespace CLDNNPlugin
diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.h b/inference-engine/src/cldnn_engine/cldnn_infer_request.h
index a988438e8d6..43a40eea1bc 100644
--- a/inference-engine/src/cldnn_engine/cldnn_infer_request.h
+++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.h
@@ -23,6 +23,7 @@ class CLDNNExecNetwork;
 
 class CLDNNInferRequest : public InferenceEngine::IInferRequestInternal {
 public:
+    using Ptr = std::shared_ptr<CLDNNInferRequest>;
     // make sure all blobs and cldnn::memory objects
     // are in place and valid
     void checkBlobs() override;
@@ -45,8 +46,9 @@ public:
     void EnableProfiling() { m_useProfiling = true; }
     void EnableStreams() { m_useStreams = true; }
 
-protected:
-    std::map<std::string, cldnn::memory::ptr> inputsMemory;
+private:
+    InferenceEngine::BlobMap _deviceOutputs;
+    std::map<std::string, cldnn::primitive_id> inputsMap;
     std::map<std::string, cldnn::primitive_id> outputsMap;
 
     bool m_useProfiling;
@@ -58,24 +60,25 @@ protected:
     std::map<std::string, std::vector<buf_info>> batchOutputs;
     InferenceEngine::IStreamsExecutor* streamExecutor = nullptr;
 
-    InferenceEngine::Blob::Ptr createInputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
-    InferenceEngine::Blob::Ptr createOutputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
-    void copyOutputData(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr);
-    void copyInputData(std::shared_ptr<cldnn::network> network, const cldnn::primitive_id &inputName,
-                       const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob,
-                       buf_info* bi = nullptr);
+    void prepare_input(const cldnn::primitive_id &inputName, InferenceEngine::Blob::Ptr &inputBlob,
+                       std::vector<cldnn::event::ptr>& dependencies);
+    void prepare_output(const cldnn::primitive_id& outputName, InferenceEngine::Blob::Ptr& outputBlob);
 
-    void input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem);
-    void input_alloc(cldnn::primitive_id name, const cldnn::layout& layout);
-    void AllocateInputs();
-    void AllocateOutputs();
-    void AllocateInputsDyn();
-    void AllocateOutputsDyn();
-    void execAndParse();
-    void execAndParseDyn();
+    InferenceEngine::Blob::Ptr create_input_host_blob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
+    InferenceEngine::Blob::Ptr create_output_host_blob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr);
+    InferenceEngine::Blob::Ptr create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout);
 
-    void PrepareInput(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob);
-    void PrepareInputDyn(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob);
+    void copy_output_data(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr);
+    void copy_input_data(std::shared_ptr<cldnn::network> network, const cldnn::primitive_id &inputName,
+                         const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob,
+                         buf_info* bi = nullptr);
+
+    void allocate_inputs();
+    void allocate_outputs();
+    void allocate_inputs_dynamic();
+    void allocate_outputs_dynamic();
+    void exec_and_parse(const std::vector<cldnn::event::ptr>& dependencies);
+    void exec_and_parse_dynamic();
 };
 
 };  // namespace CLDNNPlugin
diff --git a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
index 81a055a4a09..0c0ddf7e637 100644
--- a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
+++ b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
@@ -204,6 +204,7 @@ REGISTER_FACTORY(v5, Loop);
 // ------------------------------ Supported v6 ops ------------------------------ //
 REGISTER_FACTORY(v6, CTCGreedyDecoderSeqLen);
 REGISTER_FACTORY(v6, MVN);
+REGISTER_FACTORY(v6, GatherElements);
 
 // ------------------------------ Supported v7 ops ------------------------------ //
 REGISTER_FACTORY(v7, Gather);
diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp
index 275aeca31ca..7386501f0b1 100644
--- a/inference-engine/src/cldnn_engine/cldnn_program.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp
@@ -6,7 +6,6 @@
 #include "ngraph/ops.hpp"
 #include "ngraph_ops/nms_ie_internal.hpp"
 #include "cldnn_itt.h"
-#include "cldnn/runtime/debug_configuration.hpp"
 
 using namespace InferenceEngine;
 using namespace InferenceEngine::details;
@@ -178,16 +177,11 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
                                                       bool createTopologyOnly) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::BuildProgram");
     cldnn::build_options options;
-    GPU_DEBUG_GET_INSTANCE(debug_config);
 
     if (!m_config.graph_dumps_dir.empty()) {
         options.set_option(cldnn::build_option::graph_dumps_dir(m_config.graph_dumps_dir));
     }
 
-    GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
-        options.set_option(cldnn::build_option::graph_dumps_dir(debug_config->dump_graphs));
-    }
-
     options.set_option(cldnn::build_option::optimize_data(true));
     options.set_option(cldnn::build_option::tuning_config(m_config.tuningConfig));
 
@@ -199,7 +193,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
         return {};
     } else {
         OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::CreateProgram");
-        auto program = std::make_shared<cldnn::program>(*m_engine, *m_topology, options);
+        auto program = cldnn::program::build_program(*m_engine, *m_topology, options);
         CleanupBuild();
 
         return program;
diff --git a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp
index ce52a5eea07..34c3ae30d29 100644
--- a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp
@@ -81,22 +81,13 @@ bool CLDNNRemoteBlobImpl::is_locked() const noexcept {
     return lockedHolder != nullptr;
 }
 
-void CLDNNRemoteBlobImpl::allocate_if_needed() {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNRemoteBlobImpl::AllocateIfNeeded");
-    auto _impl = getContextImpl(m_context.lock());
-    _impl->acquire_lock();
-
-    if (m_memObject == nullptr) {
-        allocate();
-    }
-
-    _impl->release_lock();
-}
-
 void CLDNNRemoteBlobImpl::allocate() noexcept {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNRemoteBlobImpl::Allocate");
     assert(m_memObject == nullptr);
 
-    std::shared_ptr<cldnn::engine> eng = getContextImpl(m_context.lock())->GetEngine();
+    auto _impl = getContextImpl(m_context.lock());
+    _impl->acquire_lock();
+    std::shared_ptr<cldnn::engine> eng = _impl->GetEngine();
 
     switch (m_mem_type) {
     case BlobType::BT_BUF_INTERNAL: {
@@ -129,6 +120,7 @@ void CLDNNRemoteBlobImpl::allocate() noexcept {
     default:
         m_memObject.reset();
     }
+    _impl->release_lock();
 }
 
 const std::shared_ptr<IAllocator>& CLDNNRemoteBlobImpl::getAllocator() const noexcept {
@@ -154,7 +146,7 @@ void CLDNNRemoteBlobImpl::lock() const {
 }
 
 void CLDNNRemoteBlobImpl::unlock() const {
-    lockedHolder.release();
+    lockedHolder.reset();
 }
 
 LockedMemory<void> CLDNNRemoteBlobImpl::buffer() noexcept {
diff --git a/inference-engine/src/cldnn_engine/cldnn_remote_context.h b/inference-engine/src/cldnn_engine/cldnn_remote_context.h
index f6a92e82c48..a68612df041 100644
--- a/inference-engine/src/cldnn_engine/cldnn_remote_context.h
+++ b/inference-engine/src/cldnn_engine/cldnn_remote_context.h
@@ -44,8 +44,8 @@ public:
     explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
                                  cldnn::stream& stream,
                                  const cldnn::layout& layout,
-                                 cldnn::shared_handle mem,
-                                 cldnn::shared_surface surf,
+                                 cldnn::shared_handle mem = nullptr,
+                                 cldnn::shared_surface surf = 0,
                                  uint32_t plane = 0,
                                  BlobType mem_type = BT_BUF_INTERNAL);
 
@@ -64,7 +64,6 @@ public:
 
     bool is_allocated() const noexcept;
     bool is_locked() const noexcept;
-    void allocate_if_needed();
     cldnn::memory::ptr getMemory() { return m_memObject; }
 
 protected:
@@ -99,10 +98,10 @@ public:
                                   cldnn::stream& stream,
                                   const InferenceEngine::TensorDesc& desc,
                                   const cldnn::layout& layout,
-                                  cldnn::shared_handle mem,
-                                  cldnn::shared_surface surf,
-                                  uint32_t plane,
-                                  CLDNNRemoteBlobImpl::BlobType mem_type)
+                                  cldnn::shared_handle mem = nullptr,
+                                  cldnn::shared_surface surf = 0,
+                                  uint32_t plane = 0,
+                                  CLDNNRemoteBlobImpl::BlobType mem_type = CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL)
         : _impl(context, stream, layout, mem, surf, plane, mem_type)
         , TpublicAPI(desc) {}
 
@@ -184,7 +183,7 @@ public:
     * @brief Maps handle to heap memory accessible by any memory manipulation routines.
     * @return Generic pointer to memory
     */
-    void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE)  noexcept override { return nullptr; };
+    void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE)  noexcept override { return handle; };
     /**
     * @brief Unmaps memory by handle with multiple sequential mappings of the same handle.
     * The multiple sequential mappings of the same handle are suppose to get the same
diff --git a/inference-engine/src/cldnn_engine/ops/concat.cpp b/inference-engine/src/cldnn_engine/ops/concat.cpp
index 9d37f959f03..453e9996530 100644
--- a/inference-engine/src/cldnn_engine/ops/concat.cpp
+++ b/inference-engine/src/cldnn_engine/ops/concat.cpp
@@ -12,14 +12,14 @@
 namespace CLDNNPlugin {
 
 static cldnn::concatenation::concatenation_axis GetConcatAxis(int32_t axis, size_t rank) {
-    if (axis >= rank)
+    unsigned cldnn_axis = axis >= 0 ? axis : axis + static_cast<int32_t>(rank);
+    if (cldnn_axis >= rank)
         IE_THROW() << "Concatenation axis exceeds number of dimensions";
 
     // Difference in dimension ordering between IE and clDNN,
     // reverse spatial dimensions after batch and feature.
-    unsigned cldnn_axis = axis;
-    if (axis >= 2) {
-        auto spatial_axis = axis - 2;
+    if (cldnn_axis >= 2) {
+        auto spatial_axis = cldnn_axis - 2;
         // Default and minimum number of dimensions is 4
         auto spatial_size = std::max<size_t>(rank, 4) - 2;
         cldnn_axis = spatial_size - spatial_axis - 1 + 2;
diff --git a/inference-engine/src/cldnn_engine/ops/gather_elements.cpp b/inference-engine/src/cldnn_engine/ops/gather_elements.cpp
new file mode 100644
index 00000000000..d6138280750
--- /dev/null
+++ b/inference-engine/src/cldnn_engine/ops/gather_elements.cpp
@@ -0,0 +1,66 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/gather_elements.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "cldnn/primitives/gather_elements.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::gather_elements::gather_elements_axis GetGatherAxis(int axis, unsigned rank) {
+    if (axis < 0)
+        axis += rank;
+    if (axis < 0 || axis >= rank)
+        IE_THROW() << "GatherElements axis is not correspond to number of dimensions";
+
+    // Difference in dimension ordering between IE and clDNN,
+    // reverse spatial dimensions after batch and feature.
+    unsigned cldnn_axis = axis;
+    if (axis >= 2) {
+        auto spatial_axis = axis - 2;
+        // Default and minimum number of dimensions is 4
+        auto spatial_size = std::max(rank, 4u) - 2;
+        cldnn_axis = spatial_size - spatial_axis - 1 + 2;
+    }
+
+    switch (cldnn_axis) {
+        case 0: return cldnn::gather_elements::gather_elements_axis::along_b;
+        case 1: return cldnn::gather_elements::gather_elements_axis::along_f;
+        case 2: return cldnn::gather_elements::gather_elements_axis::along_x;
+        case 3: return cldnn::gather_elements::gather_elements_axis::along_y;
+        case 4: return cldnn::gather_elements::gather_elements_axis::along_z;
+        case 5: return cldnn::gather_elements::gather_elements_axis::along_w;
+        default: IE_THROW() << "Unsupported GatherElements axis: " << axis;
+    }
+    return cldnn::gather_elements::gather_elements_axis::along_f;  // shouldn't get here
+}
+
+void CreateGatherElementsOp(Program& p, const std::shared_ptr<ngraph::op::v6::GatherElements>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    size_t rank = op->get_input_shape(0).size();
+    int32_t axis = static_cast<int32_t>(op->get_axis());
+
+    auto outLayout = DefaultFormatForDims(op->get_output_shape(0).size());
+
+    auto primitive = cldnn::gather_elements(layerName,
+                                            inputPrimitives[0],
+                                            inputPrimitives[1],
+                                            outLayout,
+                                            CldnnTensorFromIEDims(op->get_output_shape(0)),
+                                            GetGatherAxis(axis, rank));
+
+    p.AddPrimitive(primitive);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v6, GatherElements);
+
+}  // namespace CLDNNPlugin
diff --git a/inference-engine/src/gna_plugin/backend/dnn.hpp b/inference-engine/src/gna_plugin/backend/dnn.hpp
index 4cb0b74a54b..a005468c118 100644
--- a/inference-engine/src/gna_plugin/backend/dnn.hpp
+++ b/inference-engine/src/gna_plugin/backend/dnn.hpp
@@ -58,7 +58,8 @@ void AdvanceCnnOperationIfAllApplied(const std::vector<intel_dnn_component_t>& c
 
 template <class T>
 void AdvancePwlOperationIfAllApplied(const std::vector<intel_dnn_component_t>& component, int i, T*& operation) {
-    if (i == component.size() - 1 || (component[i + 1].operation != kDnnMaxPoolOp)) {
+    if (i == component.size() - 1 || ((component[i + 1].operation != kDnnMaxPoolOp)
+                                      && (component[i + 1].operation != kDnnPiecewiselinearOp))) {
         operation++;
     }
 }
diff --git a/inference-engine/src/gna_plugin/backend/dnn_types.h b/inference-engine/src/gna_plugin/backend/dnn_types.h
index d08d9346d35..0b00b41ec83 100644
--- a/inference-engine/src/gna_plugin/backend/dnn_types.h
+++ b/inference-engine/src/gna_plugin/backend/dnn_types.h
@@ -227,7 +227,7 @@ OvGnaType OvGnaTypeIntFromBytes(T bytesPerElement) {
     return r->second;
 }
 
-static std::string OvGnaTypeToString(OvGnaType type) {
+inline std::string OvGnaTypeToString(OvGnaType type) {
     static const std::map<OvGnaType, std::string> typeToString = {
         {OvGnaTypeInt8, "OvGnaTypeInt8"},
         {OvGnaTypeInt16, "OvGnaTypeInt16"},
@@ -241,7 +241,7 @@ static std::string OvGnaTypeToString(OvGnaType type) {
     return r->second;
 }
 
-static std::string OvGnaModeToString(OvGnaMode mode) {
+inline std::string OvGnaModeToString(OvGnaMode mode) {
     static const std::map<OvGnaMode, std::string> modeToString = {
         {OvGnaModeDefault, "OvGnaModeDefault"},
         {OvGnaModeDisabled, "OvGnaModeDisabled"},
diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
index 90af0451929..6a3af8e428b 100644
--- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
@@ -24,6 +24,10 @@ constexpr uint32_t noOfInputsLowPrecDivisor = 16;
 
 constexpr uint32_t affineMaxBatchSize = 8;
 
+constexpr uint32_t maxPoolMaxWindowSize = 6;
+
+constexpr uint32_t copyMaxGrouping = 8;
+
 namespace Cnn2D {
 struct RangeLimit {
     uint32_t min;
@@ -87,6 +91,8 @@ class Validator {
 
     static void ThrowIfNotEmpty(const std::string prefix, const std::string error);
 public:
+    Validator() = default;
+
     void ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
         const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
         const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision) const;
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index 3c91f18dc3b..bb3451c0aa7 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -63,6 +63,7 @@
 #include "transformations/swap_input_matmul_gna.hpp"
 #include "transformations/convert_matmul_to_pointwise_convolution.hpp"
 #include "transformations/split_convolution_with_large_buffer_size.hpp"
+#include "transformations/decompose_2d_conv.hpp"
 #include "transformations/convert_padded2valid_conv.hpp"
 
 #include <ngraph/opsets/opset7.hpp>
@@ -673,6 +674,11 @@ void GNAPlugin::AddDebugProperties(const InferenceEngine::CNNLayerPtr layer,
 void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
     OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "LoadNetwork");
     std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork;
+
+    if (!gnaFlags->sw_fp32) {
+        InitGNADevice();
+    }
+
     if (_network.getFunction()) {
         CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
         const auto& graph = clonedNetwork.getFunction();
@@ -682,6 +688,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
         manager.register_pass<ngraph::pass::ConvertPriorBox>();
         manager.register_pass<ngraph::pass::CommonOptimizations>();
         manager.register_pass<ConvertPadded2ValidConv>();
+        if (config.gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
+            manager.register_pass<Decompose2DConvTransposedWithBiasAF>();
+            manager.register_pass<Decompose2DConvTransposedWithBias>();
+            manager.register_pass<Decompose2DConv>();
+        }
         // TODO enable this transformation for networks with convolutions
         if (!ngraph::op::util::has_op_with_type<ngraph::opset7::Convolution>(graph)) {
             manager.register_pass<ConvertMatmulWithFqToPointWiseConvolution>();
@@ -870,15 +881,16 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
     // fill in extra storage with memory layers
     graphCompiler.fillMemoryConnections(memoryPairs);
 
-    if (!graphCompiler.memory_connection.empty()) {
+    if (!graphCompiler.memory_connection.empty() && gnaFlags->gna_lib_async_threads_num != 1) {
+        // TODO: check if updating the number of threads is needed for sw_fp32
         gnaFlags->gna_lib_async_threads_num = 1;
+        if (!gnaFlags->sw_fp32)
+            InitGNADevice();
     }
 
     if (gnaFlags->sw_fp32) {
         gnamem.reset(new gna_memory_type(memory::make_polymorph<std::allocator<uint8_t>>()));
         graphCompiler.setGNAMemoryPtr(gnamem);
-    } else {
-        InitGNADevice();
     }
 
     // keep inputs information and create input primitives
diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.cpp b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
index e18847e851c..f5e28e10aed 100644
--- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
@@ -90,8 +90,8 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
                 }
             }
             auto scale_factor = InferenceEngine::CNNLayer::ie_parse_float(value);
-            if (fp32eq(scale_factor, 0.0f)) {
-                THROW_GNA_EXCEPTION << "input scale factor of 0.0f not supported";
+            if (fp32eq(scale_factor, 0.0f) || std::isinf(scale_factor)) {
+                THROW_GNA_EXCEPTION << "input scale factor of 0.0f or +-inf not supported";
             }
             // missing scale factors are set to be 1.0f
             if (inputScaleFactors.size() <= input_index) {
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index f4e5fc7a931..fb7a673ca1b 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -2173,7 +2173,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
         // Find all output layers connected to FQ
         auto nextLayers = CNNNetGetAllNextLayersSkipCertain(*fqLayer, -1, donotSkip);
         if (nextLayers.empty()) {
-            return;
+            continue;
         }
 
         if (isFQFuseAllowed) {
diff --git a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp
index 1701993f387..82f8ccc5ead 100644
--- a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp
+++ b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.cpp
@@ -14,72 +14,26 @@
 #include <ngraph/rt_info.hpp>
 #include <ngraph/pass/manager.hpp>
 #include <ie_common.h>
+#include "utils/transformation_helper.hpp"
 
 
 using namespace GNAPluginNS;
 
 NGRAPH_RTTI_DEFINITION(ConvertPadded2ValidConv, "ConvertPadded2ValidConv", 0);
 
-struct ConvData {
-    size_t input_height;
-    size_t input_width;
-    size_t input_channel_count;
-    size_t filter_count;
-    size_t pads_begin_width;
-    size_t pads_begin_height;
-    size_t pads_end_width;
-    size_t pads_end_height;
-    ngraph::op::PadType padding_type;
-    ngraph::element::Type element_type;
-};
-
-static bool VerifyAndGetConvParams(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data) {
+static bool VerifyAndGetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data) {
     const auto& input = conv->input_value(0);
 
-    // We support only 2D conv batch 1
-    if (conv->get_dilations().size() != 2 ||
-        conv->get_strides().size() != 2 ||
-        input.get_shape()[0] != 1) {
+    // We support only batch 1
+    if (input.get_shape()[0] != 1) {
         return false;
     }
 
-    conv_data.padding_type = conv->get_auto_pad();
-    conv_data.input_channel_count = conv->input_value(0).get_shape()[1];
-    conv_data.input_height = conv->input_value(0).get_shape()[2];
-    conv_data.input_width = conv->input_value(0).get_shape()[3];
-    conv_data.filter_count = conv->input_value(1).get_shape()[0];
-    conv_data.pads_begin_height = conv->get_pads_begin()[0];
-    conv_data.pads_begin_width = conv->get_pads_begin()[1];
-    conv_data.pads_end_height = conv->get_pads_end()[0];
-    conv_data.pads_end_width = conv->get_pads_end()[1];
-    conv_data.element_type = conv->get_element_type();
+    GetConvData(conv, conv_data);
 
     return conv_data.pads_begin_height || conv_data.pads_end_height || conv_data.pads_begin_width || conv_data.pads_end_width;
 }
 
-static bool TransposeOrderMatches(std::shared_ptr<ngraph::opset7::Transpose> transpose, std::vector<size_t> order) {
-    if (!transpose)
-        return false;
-    const ngraph::Output<ngraph::Node>& transpose_order = transpose->input_value(1);
-    auto transpose_order_dim = transpose_order.get_shape().size();
-
-    if (transpose_order_dim != 1 || transpose_order.get_shape()[0] != order.size())
-        return false;
-
-    auto const_with_order_values = std::dynamic_pointer_cast<ngraph::opset7::Constant>(transpose_order.get_node_shared_ptr());
-    if (!const_with_order_values)
-        return false;
-
-    const auto data = const_with_order_values->cast_vector<size_t>();
-    if (data.empty())
-        return false;
-
-    if (!std::equal(order.begin(), order.end(), data.begin()))
-        return false;
-
-    return true;
-}
-
 static bool VerifyBias(std::shared_ptr<ngraph::opset7::Add> bias, const size_t& filter_count) {
     auto add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias->input_value(0).get_node_shared_ptr());
 
@@ -91,16 +45,6 @@ static bool VerifyBias(std::shared_ptr<ngraph::opset7::Add> bias, const size_t&
     return (add_const && shape_size(add_const->get_shape()) == filter_count);
 }
 
-static std::shared_ptr<ngraph::opset7::StridedSlice> FlatCrop(ngraph::Output<ngraph::Node> input, size_t offset, size_t size) {
-    return std::make_shared<ngraph::opset7::StridedSlice>(
-        input, // data
-        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset}), // begin sice index
-        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset + size}), // end slice index
-        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)1, (size_t)1}), // strides
-        std::vector<int64_t>{1, 0},  // begin mask
-        std::vector<int64_t>{1, 0}); // end mask
-}
-
 static void InsertPadding(ngraph::OutputVector& input_rows_to_concat, size_t size, const std::shared_ptr<ngraph::opset7::Convolution>& conv,
     const std::shared_ptr<ngraph::opset7::Constant> padding_const, size_t biggest_padding) {
 
@@ -226,7 +170,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
 
     ConvData conv_data;
 
-    if (!VerifyAndGetConvParams(std::dynamic_pointer_cast<ngraph::opset7::Convolution>(conv), conv_data))
+    if (!VerifyAndGetConvData(std::dynamic_pointer_cast<ngraph::opset7::Convolution>(conv), conv_data))
         return false;
 
     // We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC)
@@ -246,7 +190,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
     return true;
 }
 
-std::function<bool(ngraph::Output<ngraph::Node>)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank) {
+static std::function<bool(ngraph::Output<ngraph::Node>)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank) {
     return [=](ngraph::Output<ngraph::Node> output) -> bool {
         return ngraph::pattern::consumers_count(expected_count) && ngraph::pattern::rank_equals(expected_rank);
     };
@@ -263,34 +207,35 @@ ConvertPadded2ValidConv::ConvertPadded2ValidConv() {
         ngraph::pattern::consumers_count(1));
     auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, const_input},
         ngraph::pattern::consumers_count(1));
-    auto fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({bias, const_input, const_input, const_input, const_input},
+    auto fq_bias = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({bias, const_input, const_input, const_input, const_input},
         ngraph::pattern::consumers_count(1));
     auto max_pool1 = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({bias},
         ngraph::pattern::consumers_count(1));
-    auto max_pool2 = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({fq},
+    auto max_pool2 = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({fq_bias},
         ngraph::pattern::consumers_count(1));
     auto af1 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
         ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
         ngraph::opset7::Sign, ngraph::opset7::Clamp>({bias}, ngraph::pattern::consumers_count(1));
     auto af2 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
         ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
-        ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq}, ngraph::pattern::consumers_count(1));
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq_bias}, ngraph::pattern::consumers_count(1));
     auto af3 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
         ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
         ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool1}, ngraph::pattern::consumers_count(1));
     auto af4 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
         ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
         ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool2}, ngraph::pattern::consumers_count(1));
-    auto transpose_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq, af1, af2, af3, af4});
+    auto fq_af = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af4, const_input, const_input, const_input, const_input},
+        ngraph::pattern::consumers_count(1));
+    auto transpose_input =
+        std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, fq_af});
     auto trailing_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({transpose_input, const_input},
         consumers_and_rank(1, 4));
 
     ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
         const auto& pattern_map = m.get_pattern_value_map();
-        auto conv_output = conv->output(0).get_node_shared_ptr();
-        IE_ASSERT(conv_output != nullptr);
-
-        auto bias_node = std::dynamic_pointer_cast<ngraph::opset7::Add>(conv_output);
+        auto bias_it = pattern_map.find(bias);
+        auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());
 
         return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(),
             pattern_map.at(trailing_transpose).get_node_shared_ptr(), bias_node);
diff --git a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp
index 9d8a0f10477..55bef912b9c 100644
--- a/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp
+++ b/inference-engine/src/gna_plugin/transformations/convert_padded2valid_conv.hpp
@@ -13,11 +13,11 @@ namespace GNAPluginNS {
  * wrapped with transposes, to a valid convolution with padding added before the leading transpose,
  * POT precessed models are supported (fake quantized layers omitted below for clarity):
  *
- *                                                Padding
- *                                                   |
+ *                                                  Padding
+ *                                                     |
  *   Transpose (NHWC -> NCHW)               Transpose (NHWC -> NCHW)
  *              |                                      |
- *   Convolution with padding               Convolution with padding
+ *   Convolution with padding                  Valid convolution
  *              |                                      |
  *   Broadcast Bias (optional)              Broadcast Bias (optional)
  *              |                                      |
diff --git a/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.cpp b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.cpp
new file mode 100644
index 00000000000..4b313ce8bb0
--- /dev/null
+++ b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.cpp
@@ -0,0 +1,667 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <openvino/cc/ngraph/itt.hpp>
+
+#include "transformations/decompose_2d_conv.hpp"
+
+#include <memory>
+
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <transformations/utils/utils.hpp>
+#include <ngraph/pattern/op/or.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <ie_common.h>
+#include "utils/transformation_helper.hpp"
+#include "backend/gna_limitations.hpp"
+#include "layers/gna_convolution_layer.hpp"
+
+
+using namespace GNAPluginNS;
+
+NGRAPH_RTTI_DEFINITION(Decompose2DConv, "Decompose2DConv", 0);
+NGRAPH_RTTI_DEFINITION(Decompose2DConvTransposedWithBias, "Decompose2DConvTransposedWithBias", 0);
+NGRAPH_RTTI_DEFINITION(Decompose2DConvTransposedWithBiasAF, "Decompose2DConvTransposedWithBiasAF", 0);
+
+struct GraphData {
+    std::shared_ptr<ngraph::opset7::Transpose>leading_transpose;
+    std::shared_ptr<ngraph::opset7::FakeQuantize>fq_conv;
+    std::shared_ptr<ngraph::opset7::Convolution>conv;
+    std::shared_ptr<ngraph::opset7::Transpose>trailing_transpose;
+    std::shared_ptr<ngraph::opset7::FakeQuantize>fq_bias;
+    std::shared_ptr<ngraph::opset7::MaxPool>max_pool;
+    std::shared_ptr<ngraph::op::util::UnaryElementwiseArithmetic>af;
+    std::shared_ptr<ngraph::opset7::FakeQuantize>fq_af;
+    std::shared_ptr<ngraph::Node>last_op_in_sequence_for_replacement;
+    std::shared_ptr<ngraph::Node>bias_const;
+    size_t conv_count;
+    size_t pool_size_width;
+    size_t pool_stride_width;
+    // TODO: currently 2D max pool is not supported
+    //size_t pool_size_height;
+    //size_t pool_stride_height;
+};
+
+static bool VerifyAndGetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data) {
+    const auto& input = conv->input_value(0);
+    const auto& filters = conv->input_value(1);
+
+    // We support only batch == 1
+    if (input.get_shape()[0] != 1) {
+        return false;
+    }
+
+    size_t filter_height = filters.get_shape()[2];
+    size_t filter_width = filters.get_shape()[3];
+
+    if (filter_width > GNALimitations::copyMaxGrouping || filter_height > GNALimitations::copyMaxGrouping) {
+        return false;
+    }
+
+    GetConvData(conv, conv_data);
+
+    IE_ASSERT(conv_data.output_channel_count == conv->get_output_shape(0)[1]);
+
+    return true;
+}
+
+static std::shared_ptr<ngraph::Node> VerifyBiasAndReshapeConst(std::shared_ptr<ngraph::opset7::Add> conv_bias, const ConvData& conv_data) {
+    auto add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(conv_bias->input_value(1).get_node_shared_ptr());
+
+    if (add_const) {
+        auto bias_size = shape_size(add_const->get_shape());
+
+        // The add may be a normal add not conv bias, then we just go further
+        if (bias_size == conv_data.filter_count) {
+            return ngraph::op::util::make_try_fold<ngraph::opset7::Reshape>(add_const,
+                ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{1, bias_size, 1, 1}), false);
+        }
+    }
+    // Bias size does not match (or dynamic bias), can't decompose such convolution
+    return nullptr;
+}
+
+static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7::MaxPool> max_pool) {
+    auto pool_filter = max_pool->get_kernel();
+    auto pool_strides = max_pool->get_strides();
+
+    // Check Max Pool padding and limitations
+    if ((max_pool->get_auto_pad() != ngraph::op::PadType::VALID &&
+        (max_pool->get_auto_pad() != ngraph::op::PadType::EXPLICIT ||
+            max_pool->get_pads_begin() != ngraph::Shape({0, 0}) || max_pool->get_pads_end() != ngraph::Shape({0, 0}))) ||
+        pool_filter.size() != 2 || pool_strides.size() != 2 ||
+        pool_filter[0] > GNALimitations::maxPoolMaxWindowSize)
+        return false;
+
+    graph_data.pool_size_width = pool_filter[1];
+    graph_data.pool_stride_width = pool_strides[1];
+    return true;
+}
+
+static size_t CalculateConvCount(const ConvData& conv_data) {
+    // Check if split of plane due to GNA HW limitations of 768 filter elements is possible
+    size_t conv_count = 1;
+    size_t total_factorized_conv_channel_count = (conv_data.input_channel_count * conv_data.filter_height * conv_data.filter_width);
+    while (total_factorized_conv_channel_count / conv_count > GNALimitations::convFilterMaxSize ||
+        total_factorized_conv_channel_count % conv_count != 0 || conv_data.filter_channel_count % conv_count != 0)
+        conv_count++;
+
+    return conv_count;
+}
+
+static bool ShouldDecompose(GraphData& graph_data, const ConvData& conv_data) {
+    // Calculate the number of splits required
+    graph_data.conv_count = CalculateConvCount(conv_data);
+
+    // Concat (copy) layer limitation allows to split up to a certain limit
+    // Currently we are able to split only convolutions without pooling in horizontal dimension
+    if (graph_data.conv_count > GNALimitations::copyMaxGrouping ||
+        ((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
+        return false;
+
+    // GNA supported features or handled otherwise - there is no need to decompose such convolution
+    if (graph_data.conv_count == 1 && (((conv_data.input_height == 1 || conv_data.input_width == 1) &&
+        conv_data.filter_dilation_width == 1 && conv_data.filter_dilation_height == 1) ||
+        GNAConvolutionLayer::isMappableFrom2DTo1D(conv_data.input_height, conv_data.input_width, conv_data.filter_width, conv_data.filter_stride_width)))
+        return false;
+
+    return true;
+}
+
+static std::vector<std::shared_ptr<ngraph::Node>> Split2DConvFilters(std::shared_ptr<ngraph::opset7::Constant>& filters,
+    const bool& vertical_permute, const bool& horizontal_permute, const size_t& split_channels) {
+
+    if (!horizontal_permute && !vertical_permute && split_channels == 1)
+        return {filters};
+
+    std::vector <std::shared_ptr<ngraph::Node>> result;
+    ngraph::Shape reshape_shape;
+    auto flat_filters = filters->outputs();
+    const auto filter_shape = filters->get_output_shape(0);
+    IE_ASSERT(filter_shape.size() == 4);
+
+    if (split_channels > 1) {
+        const auto axis_node = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
+        const auto split = std::make_shared<ngraph::opset7::Split>(filters, axis_node, split_channels);
+        flat_filters = split->outputs();
+    }
+
+    for (size_t split_index = 0; split_index < split_channels; split_index++) {
+        ngraph::Output<ngraph::Node>& flat_filter = flat_filters[split_index];
+        if (horizontal_permute && !vertical_permute) {
+            result.push_back(std::make_shared<ngraph::opset7::Transpose>(flat_filter,
+                ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{0, 1, 3, 2})));
+        } else {
+            result.push_back(flat_filter.get_node_shared_ptr());
+        }
+    }
+
+    if (vertical_permute && horizontal_permute) {
+        reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[2] * filter_shape[3] / split_channels, 1, 1};
+    } else if (vertical_permute && !horizontal_permute) {
+        reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[2] / split_channels, 1, filter_shape[3]};
+    } else if (!vertical_permute && horizontal_permute) {
+        reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[3] / split_channels, filter_shape[2], 1};
+    } else {
+        reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] / split_channels, filter_shape[2], filter_shape[3]};
+    }
+
+    for (auto &new_filter : result)
+        new_filter = ngraph::op::util::make_try_fold<ngraph::opset7::Reshape>(new_filter,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, reshape_shape), false);
+
+    return result;
+}
+
+static ngraph::OutputVector SplitInput(const GraphData& graph_data, ConvData& conv_data) {
+    // We need to have proper input shape first
+    ngraph::OutputVector split_planes;
+    auto padded_input_plane = std::make_shared<ngraph::opset7::Reshape>(graph_data.leading_transpose->input_value(0),
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2},
+            ngraph::Shape{1, shape_size(graph_data.leading_transpose->input_value(0).get_shape())}), false);
+    copy_runtime_info(graph_data.conv, padded_input_plane);
+
+    if (graph_data.conv_count > 1) {
+        // If we have split input plane and convolutions due to GNA limitation - we must sum their results at the end
+        conv_data.input_channel_count /= graph_data.conv_count;
+
+        auto reshape_before_transpose = std::make_shared<ngraph::opset7::Reshape>(padded_input_plane,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2},
+                {shape_size(padded_input_plane->get_shape()) / graph_data.conv_count, graph_data.conv_count}), false);
+
+        auto transpose_before_channel_wise_split = std::make_shared<ngraph::opset7::Transpose>(reshape_before_transpose,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0));
+
+        const auto axis_node = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0});
+        const auto split = std::make_shared<ngraph::opset7::Split>(transpose_before_channel_wise_split, axis_node, graph_data.conv_count);
+        split_planes = split->outputs();
+    } else {
+        split_planes.push_back(padded_input_plane);
+    }
+
+    return split_planes;
+}
+
+static std::vector<std::shared_ptr<ngraph::Node>> SplitFilters(const GraphData& graph_data, ConvData& conv_data) {
+    // If the input plane exceeds GNA limits and we have split into several convolutions, then we need to split filter data as well;
+    // we also need to take filter height and potential dilation into account when modifying the filters
+
+    // Take account of fake quantize when getting filter values
+    auto filter_values = std::dynamic_pointer_cast<ngraph::opset7::Constant>(graph_data.fq_conv == nullptr ?
+        graph_data.conv->input_value(1).get_node_shared_ptr() : graph_data.fq_conv->input_value(0).get_node_shared_ptr());
+    bool vertical_permute = (conv_data.filter_height > 1);
+    bool horizontal_permute = (conv_data.filter_dilation_width > 1);
+    std::vector<std::shared_ptr<ngraph::Node>> h_1_filters{};
+
+    h_1_filters = Split2DConvFilters(filter_values, vertical_permute, horizontal_permute, graph_data.conv_count);
+
+    for (auto filter : h_1_filters)
+        copy_runtime_info(graph_data.conv, filter);
+
+    return h_1_filters;
+}
+
+static void TransformInput(const GraphData& graph_data, const ConvData& conv_data, ngraph::Output<ngraph::Node>& split_input_plane) {
+    /*
+    *              Padded row - NHWC order
+    *                  |
+    *        Split in vertical dim (filter height)
+    *                / | \
+    *                Concat
+    *                  |
+    *              Transpose
+    */
+
+    // First we need to prepare flat (height = 1) slices of input data proper for flattened (height = 1) filters created later on;
+    // the input datat is overlapping (duplicated)
+    ngraph::OutputVector dilated_input_planes;
+    for (size_t filter_height = 0; filter_height < conv_data.filter_height; filter_height++) {
+        size_t offset;
+
+        if (conv_data.filter_stride_height > 1) {
+            // Prepare strided slices of input data
+            for (size_t output_height = 0; output_height < conv_data.output_height; output_height++) {
+                offset = (filter_height * conv_data.filter_dilation_height + output_height * conv_data.filter_stride_height) *
+                    conv_data.input_width * conv_data.input_channel_count;
+                auto slice = FlatCrop(split_input_plane, offset, conv_data.input_width * conv_data.input_channel_count);
+                copy_runtime_info(graph_data.conv, slice);
+                dilated_input_planes.push_back(slice);
+            }
+        } else {
+            offset = filter_height * conv_data.filter_dilation_height * conv_data.input_width * conv_data.input_channel_count;
+            auto slice = FlatCrop(split_input_plane, offset, conv_data.input_width * conv_data.input_channel_count * conv_data.output_height);
+            copy_runtime_info(graph_data.conv, slice);
+            dilated_input_planes.push_back(slice);
+        }
+    }
+
+    // Interleaving dilated input planes
+    std::shared_ptr<ngraph::Node> dilated_chunks_concat = std::make_shared<ngraph::opset7::Concat>(dilated_input_planes, 0);
+
+    // Additional reshape is required for strided slices of input intended for each filter row
+    if (conv_data.filter_stride_height > 1) {
+        dilated_chunks_concat = std::make_shared<ngraph::opset7::Reshape>(dilated_chunks_concat,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2},
+                {conv_data.filter_height, conv_data.input_width * conv_data.input_channel_count * conv_data.output_height}), false);
+    }
+
+    auto transposed_dilated_chunks = std::make_shared<ngraph::opset7::Transpose>(dilated_chunks_concat,
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0));
+
+    // Flattening of interleaved input planes
+    auto flattened_dilated_transposed_input = std::make_shared<ngraph::opset7::Reshape>(transposed_dilated_chunks,
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2},
+            {(size_t)1, conv_data.input_width * conv_data.input_channel_count * conv_data.output_height * conv_data.filter_height}), false);
+
+    copy_runtime_info(graph_data.conv, {dilated_chunks_concat, flattened_dilated_transposed_input, transposed_dilated_chunks });
+    split_input_plane = flattened_dilated_transposed_input;
+}
+
+static void InsertFQLayer(const std::shared_ptr<ngraph::opset7::FakeQuantize> fqLayer,
+    std::shared_ptr<ngraph::Node> lastNode) {
+    if (fqLayer != nullptr) {
+        lastNode = fqLayer->clone_with_new_inputs({lastNode,
+            fqLayer->input_value(1), fqLayer->input_value(2),
+            fqLayer->input_value(3), fqLayer->input_value(4)});
+        ngraph::copy_runtime_info(fqLayer, lastNode);
+    }
+}
+
+// Valid 1D (decomposed 2D) convolution wrapped with transposes NHWC => NCHW => conv => NCHW => NHWC
+static std::shared_ptr<ngraph::Node> Create1DConv(const GraphData& graph_data, const ConvData& conv_data, const ngraph::Output<ngraph::Node>& input,
+    std::shared_ptr<ngraph::Node> filters, const size_t conv_index, const size_t h_index) {
+        // Transpose NHWC => NCHW
+        std::shared_ptr<ngraph::Node> nchw_input = std::make_shared<ngraph::opset7::Transpose>(input,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 1, 2})->output(0));
+
+        // Fake quantize
+        InsertFQLayer(graph_data.fq_conv, filters);
+
+        // 1D Convolution
+        auto conv = std::make_shared<ngraph::opset7::Convolution>(nchw_input, filters,
+            ngraph::Strides{1, conv_data.filter_stride_width}, ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0},
+            ngraph::Strides{1, 1}, ngraph::op::PadType::VALID);
+        std::string conv_name = graph_data.conv->get_friendly_name() + "_H_" + std::to_string(h_index) + "_CH_" + std::to_string(0);
+        conv->set_friendly_name(conv_name);
+
+        // Bias & fake quantize
+        std::shared_ptr<ngraph::Node> last_conv_block_op = conv;
+        if (graph_data.bias_const && conv_index == 0) {
+            last_conv_block_op = std::make_shared<ngraph::opset7::Add>(conv, graph_data.bias_const);
+            copy_runtime_info(graph_data.conv, last_conv_block_op);
+            InsertFQLayer(graph_data.fq_bias, last_conv_block_op);
+        }
+
+        // Max pooling
+        if ((graph_data.max_pool && graph_data.pool_size_width > 1) || graph_data.pool_stride_width > 1) {
+            last_conv_block_op = std::make_shared<ngraph::opset7::MaxPool>(last_conv_block_op,
+                ngraph::Strides{1, graph_data.pool_stride_width}, ngraph::Shape{0, 0}, ngraph::Shape{0, 0},
+                ngraph::Shape{1, graph_data.pool_size_width}, graph_data.max_pool->get_rounding_type(), ngraph::op::PadType::VALID);
+        }
+
+        // Activation function & fake quantize
+        if (graph_data.af && graph_data.conv_count == 1) {
+            last_conv_block_op = graph_data.af->copy_with_new_inputs({last_conv_block_op});
+            copy_runtime_info(conv, last_conv_block_op);
+            InsertFQLayer(graph_data.fq_af, last_conv_block_op);
+        }
+
+        // Transpose NCHW => NHWC
+        auto nhwc_output = std::make_shared<ngraph::opset7::Transpose>(last_conv_block_op,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 2, 3, 1})->output(0));
+        copy_runtime_info(graph_data.conv, {nchw_input, conv, nhwc_output});
+        return nhwc_output;
+}
+
+static std::shared_ptr<ngraph::Node> CreateDecomposedConv(const GraphData& graph_data, ConvData& conv_data,
+    ngraph::Output<ngraph::Node>& reduced_input_plane, const std::vector<std::shared_ptr<ngraph::Node>>& h_1_filters, const size_t conv_index) {
+    ngraph::OutputVector result_chunks;
+    std::shared_ptr<ngraph::Node> last_op;
+    bool horizontal_permute = (conv_data.filter_dilation_width > 1);
+    size_t h_1_filter_channel_count = (conv_data.input_channel_count * conv_data.filter_height);
+
+    for (size_t output_height = 0; output_height < conv_data.output_height; output_height++) {
+        size_t offset = output_height * conv_data.input_width * h_1_filter_channel_count;
+        auto row = (conv_data.output_height == 1) ? reduced_input_plane :
+            FlatCrop(reduced_input_plane, offset, conv_data.input_width * h_1_filter_channel_count);
+        /*
+            *              Padded row
+            *                  |
+            *        ??? <Dilation !=1> ???
+            *                  |
+            *         Split in vertical dim
+            *                / | \
+            *                Concat
+            *                  |
+            *               Permute
+            *                  |
+            *              Transpose (NHWC => NCHW)
+            *                  |
+            *                1D Conv (Bias | MaxPooling)
+            *                  |
+            *              Transpose (NCHW => NHWC)
+            */
+        auto nhwc_conv_y_input = row;
+
+        if (horizontal_permute) {
+            // Horizontal split - transform input accordingly
+            ngraph::OutputVector dilated_chunks;
+            std::shared_ptr<ngraph::Node> dilated_chunks_concat = nhwc_conv_y_input.get_node_shared_ptr();
+
+            // We need to calculate some parameters in case horizontal stride > 1 is used, because if we use the ones available from the original convolution
+            // we won't take into account the fact horizontal strides will be supported by the newly created 1D convolution, and not by decomposition
+            size_t filter_dilation_width = conv_data.filter_width > 1 ? conv_data.filter_dilation_width : 1;
+            size_t output_width = (conv_data.input_width - (conv_data.filter_width + filter_dilation_width - 2));
+
+            if (conv_data.filter_width > 1) {
+                for (size_t filter_width = 0; filter_width < conv_data.filter_width; filter_width++) {
+                    size_t offset = filter_width * conv_data.filter_dilation_width * h_1_filter_channel_count;
+                    auto slice = FlatCrop(row, offset, h_1_filter_channel_count * output_width);
+                    copy_runtime_info(graph_data.conv, slice);
+                    dilated_chunks.push_back(slice);
+                }
+
+                dilated_chunks_concat = std::make_shared<ngraph::opset7::Concat>(dilated_chunks, 0);
+            }
+
+            auto transposed_dilated_chunks = std::make_shared<ngraph::opset7::Transpose>(dilated_chunks_concat,
+                ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0));
+
+            auto flattened_dilated_conv_input = std::make_shared<ngraph::opset7::Reshape>(transposed_dilated_chunks,
+                ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4},
+                    ngraph::Shape{1, 1, output_width, h_1_filter_channel_count * conv_data.filter_width}), false);
+
+            copy_runtime_info(graph_data.conv, ngraph::NodeVector{flattened_dilated_conv_input, transposed_dilated_chunks, dilated_chunks_concat});
+
+            nhwc_conv_y_input = flattened_dilated_conv_input;
+        } else {
+            // If no horizontal split is done, only reshape is required before decomposed convolution
+            nhwc_conv_y_input = std::make_shared<ngraph::opset7::Reshape>(nhwc_conv_y_input,
+                ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4},
+                    ngraph::Shape{1, 1, conv_data.input_width, h_1_filter_channel_count}), false);
+        }
+
+        // Pointwise convolutions
+        // Valid 1D convolution wrapped with transposes NHWC => NCHW => Conv => NCHW => NHWC
+        // Activation function can be fused with convolution only if it isn't split
+        auto nhwc_y_output = Create1DConv(graph_data, conv_data, nhwc_conv_y_input, h_1_filters[conv_index], conv_index, output_height);
+        result_chunks.push_back(nhwc_y_output);
+        last_op = nhwc_y_output;
+    }
+
+    // Horizontal dimemsion greater than 1
+    if (result_chunks.size() > 1) {
+        // Concat in horizontal dimension
+        // In NHWC index of H is 1
+        auto concatenated_sub_results = std::make_shared<ngraph::opset7::Concat>(result_chunks, 1);
+        copy_runtime_info(graph_data.conv, concatenated_sub_results);
+        last_op = concatenated_sub_results;
+    }
+    return last_op;
+}
+
+static void Decompose(const GraphData& graph_data, ConvData& conv_data) {
+    std::vector<std::shared_ptr<ngraph::Node>> partial_conv_results;
+
+    // Split input due to GNA filter element count limit
+    auto split_planes = SplitInput(graph_data, conv_data);
+    // Split filters due to GNA filter element count limit, 2D convolution shape, or dilations
+    auto h_1_filters = SplitFilters(graph_data, conv_data);
+
+    // Do transformations in each of the splits created above
+    for (size_t conv_index = 0; conv_index < graph_data.conv_count; conv_index++) {
+        ngraph::Output<ngraph::Node>& split_input_plane = split_planes[conv_index];
+
+        // Input data needs to be prepared before 2D convolution decomposition
+        if (conv_data.filter_height > 1 || conv_data.filter_stride_height > 1) {
+            TransformInput(graph_data, conv_data, split_input_plane);
+        }
+
+        auto flat_conv = CreateDecomposedConv(graph_data, conv_data, split_input_plane, h_1_filters, conv_index);
+        partial_conv_results.push_back(flat_conv);
+    }
+
+    std::shared_ptr<ngraph::Node> conv_result = partial_conv_results.front();
+    for (size_t i = 1; i < partial_conv_results.size(); i++) {
+        auto add_result = std::make_shared<ngraph::opset7::Add>(partial_conv_results[i], conv_result);
+        copy_runtime_info(graph_data.conv, add_result);
+        conv_result = add_result;
+    }
+
+    // TODO: Max Pool 2D case
+    //if (graph_data.max_pool && (graph_data.pool_size_height > 1 || graph_data.pool_stride_height > 1)) {
+    //}
+
+    // Activation function after trailing Transpose NCHW->NHWC
+    if (graph_data.af && graph_data.conv_count > 1) {
+        auto af_result = graph_data.af->copy_with_new_inputs({conv_result});
+        copy_runtime_info(graph_data.conv, af_result);
+        conv_result = af_result;
+    }
+    // We need to put the same name as before for the Convolution layer, so its output can be used as network result
+    std::string conv_result_name = graph_data.last_op_in_sequence_for_replacement->get_friendly_name();
+    replace_node(graph_data.last_op_in_sequence_for_replacement, conv_result);
+    conv_result->set_friendly_name(conv_result_name);
+}
+
+static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
+    std::shared_ptr<ngraph::Node> fq_conv,
+    std::shared_ptr<ngraph::Node> conv,
+    std::shared_ptr<ngraph::Node> trailing_transpose,
+    std::shared_ptr<ngraph::Node> bias,
+    std::shared_ptr<ngraph::Node> fq_bias,
+    std::shared_ptr<ngraph::Node> max_pool,
+    std::shared_ptr<ngraph::Node> af,
+    std::shared_ptr<ngraph::Node> fq_af,
+    std::shared_ptr<ngraph::Node> last_op_for_replacement) {
+
+    GraphData graph_data{std::dynamic_pointer_cast<ngraph::opset7::Transpose>(leading_transpose),
+        std::dynamic_pointer_cast<ngraph::opset7::FakeQuantize>(fq_conv),
+        std::dynamic_pointer_cast<ngraph::opset7::Convolution>(conv),
+        std::dynamic_pointer_cast<ngraph::opset7::Transpose>(trailing_transpose),
+        std::dynamic_pointer_cast<ngraph::opset7::FakeQuantize>(fq_bias),
+        std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(max_pool),
+        std::dynamic_pointer_cast<ngraph::op::util::UnaryElementwiseArithmetic>(af),
+        std::dynamic_pointer_cast<ngraph::opset7::FakeQuantize>(fq_af),
+        last_op_for_replacement, nullptr, 1, 1, 1};
+    ConvData conv_data;
+
+    if (!VerifyAndGetConvData(std::dynamic_pointer_cast<ngraph::opset7::Convolution>(conv), conv_data))
+        return false;
+
+    // We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC)
+    // or similar cases, so required network must be in NHWC order like in TF
+    if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(leading_transpose), {0, 3, 1, 2}))
+        return false;
+
+    if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(trailing_transpose), {0, 2, 3, 1}))
+        return false;
+
+    if (bias && !(graph_data.bias_const = VerifyBiasAndReshapeConst(std::dynamic_pointer_cast<ngraph::opset7::Add>(bias), conv_data)))
+        return false;
+
+    if (max_pool && !VerifyMaxPool(graph_data, std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(max_pool)))
+        return false;
+
+    if (!ShouldDecompose(graph_data, conv_data))
+        return false;
+
+    // All checks applied - now we may start decomposition
+    Decompose(graph_data, conv_data);
+
+    return true;
+}
+
+static bool VerifyBias(std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> bias) {
+    auto add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias->input_value(1).get_node_shared_ptr());
+
+    if (!add_const) {
+        add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias->input_value(0).get_node_shared_ptr());
+    }
+
+    if (!add_const) {
+        auto bias_size = shape_size(add_const->get_shape());
+        auto conv_filter_count = conv->input_value(1).get_shape()[0];
+        if (bias_size == conv_filter_count)
+            return true;
+    }
+    return false;
+}
+
+Decompose2DConv::Decompose2DConv() {
+    MATCHER_SCOPE(Decompose2DConv);
+
+    auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto leading_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({ngraph::pattern::any_input(), const_input},
+        consumers_and_rank(1, 4));
+    auto filters_const = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::rank_equals(4));
+    auto fq_conv = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input, const_input, const_input, const_input, const_input},
+        consumers_and_rank(1, 4));
+    auto filters = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{filters_const, fq_conv});
+    auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({leading_transpose, filters},
+        consumers_and_rank(1, 4));
+    auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, const_input},
+        ngraph::pattern::consumers_count(1));
+    auto fq_bias = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({bias, const_input, const_input, const_input, const_input},
+        ngraph::pattern::consumers_count(1));
+    auto max_pool1 = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({bias},
+        ngraph::pattern::consumers_count(1));
+    auto max_pool2 = ngraph::pattern::wrap_type<ngraph::opset7::MaxPool>({fq_bias},
+        ngraph::pattern::consumers_count(1));
+    auto af1 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
+        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({bias}, ngraph::pattern::consumers_count(1));
+    auto af2 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
+        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq_bias}, ngraph::pattern::consumers_count(1));
+    auto af3 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
+        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool1}, ngraph::pattern::consumers_count(1));
+    auto af4 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
+        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool2}, ngraph::pattern::consumers_count(1));
+    auto fq_af = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af4, const_input, const_input, const_input, const_input},
+        ngraph::pattern::consumers_count(1));
+    auto transpose_input =
+        std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, fq_af});
+    auto trailing_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({transpose_input, const_input},
+        consumers_and_rank(1, 4));
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        auto fq_conv_it = pattern_map.find(fq_conv);
+        auto fq_conv_node = (fq_conv_it == std::end(pattern_map) ? nullptr : fq_conv_it->second.get_node_shared_ptr());
+        auto bias_it = pattern_map.find(bias);
+        auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());
+        auto fq_bias_it = pattern_map.find(fq_bias);
+        auto fq_bias_node = (fq_bias_it == std::end(pattern_map) ? nullptr : fq_bias_it->second.get_node_shared_ptr());
+        auto fq_af_it = pattern_map.find(fq_af);
+        auto fq_af_node = (fq_af_it == std::end(pattern_map) ? nullptr : fq_af_it->second.get_node_shared_ptr());
+        auto max_pool1_it = pattern_map.find(max_pool1);
+        auto max_pool2_it = pattern_map.find(max_pool2);
+        auto max_pool_node = (max_pool1_it == std::end(pattern_map) ?
+            ((max_pool2_it == std::end(pattern_map) ? nullptr : max_pool2_it->second.get_node_shared_ptr())) : max_pool1_it->second.get_node_shared_ptr());
+        std::shared_ptr<ngraph::Node> af_node = nullptr;
+        std::vector<ngraph::pattern::PatternValueMap::const_iterator> af_it
+        {pattern_map.find(af1), pattern_map.find(af2), pattern_map.find(af3), pattern_map.find(af4)};
+
+        for (auto const& af : af_it) {
+            if (af != std::end(pattern_map)) {
+                af_node = af->second.get_node_shared_ptr();
+                break;
+            }
+        }
+
+        return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), fq_conv_node, pattern_map.at(conv).get_node_shared_ptr(),
+            pattern_map.at(trailing_transpose).get_node_shared_ptr(), bias_node, fq_bias_node, max_pool_node, af_node, fq_af_node,
+            pattern_map.at(trailing_transpose).get_node_shared_ptr());
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(trailing_transpose, matcher_name);
+    this->register_matcher(m, callback);
+}
+
+Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias() {
+    MATCHER_SCOPE(Decompose2DConvTransposedWithBias);
+
+    auto const_input_i64 = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::type_matches(ngraph::element::i64));
+    auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto leading_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({ngraph::pattern::any_input(), const_input_i64},
+        consumers_and_rank(1, 4));
+    auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>(
+        {leading_transpose, ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::rank_equals(4))},
+        consumers_and_rank(1, 4));
+    auto trailing_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({conv, const_input_i64},
+        consumers_and_rank(1, 4));
+    auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Add>({trailing_transpose, const_input},
+        ngraph::pattern::consumers_count(1));
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        if (!VerifyBias(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr()))
+            return false;
+
+        return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(),
+            pattern_map.at(trailing_transpose).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr(), nullptr, nullptr,
+            nullptr, nullptr, pattern_map.at(bias).get_node_shared_ptr());
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(bias, matcher_name);
+    this->register_matcher(m, callback);
+}
+
+Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF() {
+    MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF);
+
+    auto const_input_i64 = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::type_matches(ngraph::element::i64));
+    auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
+    auto leading_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({ngraph::pattern::any_input(), const_input_i64},
+        consumers_and_rank(1, 4));
+    auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>(
+        {leading_transpose, ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::rank_equals(4))},
+        consumers_and_rank(1, 4));
+    auto trailing_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({conv, const_input_i64},
+        consumers_and_rank(1, 4));
+    auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Add>({trailing_transpose, const_input},
+        ngraph::pattern::consumers_count(1));
+    auto af = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
+        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({bias},
+        ngraph::pattern::consumers_count(1));
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        if (!VerifyBias(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr()))
+            return false;
+
+        return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(),
+            pattern_map.at(trailing_transpose).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr(), nullptr,
+            nullptr, pattern_map.at(af).get_node_shared_ptr(), nullptr, pattern_map.at(af).get_node_shared_ptr());
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(af, matcher_name);
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.hpp b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.hpp
new file mode 100644
index 00000000000..4fbaf47ff72
--- /dev/null
+++ b/inference-engine/src/gna_plugin/transformations/decompose_2d_conv.hpp
@@ -0,0 +1,80 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace GNAPluginNS {
+
+/**
+ * @brief Decompose a 2D convolution, wrapped with transposes,
+ * to a set of valid 1D convolutions with padding added in front of the set:
+ *
+ *                                                  Padding
+ *                                                     |
+ *   Transpose (NHWC -> NCHW)               Transpose (NHWC -> NCHW)
+ *              |                                      |
+ *   Convolution with padding                  Valid convolution
+ *              |                                      |
+ *   Broadcast Bias (optional)              Broadcast Bias (optional)
+ *              |                                      |
+ *    Max Pooling (optional)                 Max Pooling (optional)
+ *              |                                      |
+ * Activation Function (optional)       Activation Function (optional)
+ *              |                                      |
+ *   Transpose (NCHW -> NHWC)               Transpose (NCHW -> NHWC)
+ *
+ */
+class Decompose2DConv : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    Decompose2DConv();
+};
+
+/**
+ * @brief Decomopose a 2D convolution wrapped with transposes, with bias after trailing transpose,
+ * to a set of valid 1D convolutions with padding added in front of the set:
+ *
+ *                                              Padding
+ *                                                 |
+ * Transpose (NHWC -> NCHW)             Transpose (NHWC -> NCHW)
+ *            |                                    |
+ * Convolution with padding                Valid convolution
+ *            |                                    |
+ * Transpose (NCHW -> NHWC)             Transpose (NCHW -> NHWC)
+ *            |                                    |
+ *      Broadcast Bias                       Broadcast Bias
+ *
+ */
+class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    Decompose2DConvTransposedWithBias();
+};
+
+/**
+ * @brief Decomopose a 2D convolution wrapped with transposes, with bias
+ * to a set of valid 1D convolutions with padding added in front of the set:
+ *
+ *                                              Padding
+ *                                                 |
+ * Transpose (NHWC -> NCHW)             Transpose (NHWC -> NCHW)
+ *            |                                    |
+ * Convolution with padding                Valid convolution
+ *            |                                    |
+ * Transpose (NCHW -> NHWC)             Transpose (NCHW -> NHWC)
+ *            |                                    |
+ *      Broadcast Bias                       Broadcast Bias
+ *            |                                    |
+ *   Activation Function                  Activation Function
+ * 
+ */
+class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    Decompose2DConvTransposedWithBiasAF();
+};
+
+} // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp
new file mode 100644
index 00000000000..79fe863a18f
--- /dev/null
+++ b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp
@@ -0,0 +1,75 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include "transformation_helper.hpp"
+
+
+namespace GNAPluginNS {
+
+void GetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data) {
+    conv_data.output_height = conv->get_output_shape(0)[2];
+    conv_data.output_width = conv->get_output_shape(0)[3];
+    conv_data.input_channel_count = conv->input_value(0).get_shape()[1];
+    conv_data.input_height = conv->input_value(0).get_shape()[2];
+    conv_data.input_width = conv->input_value(0).get_shape()[3];
+    conv_data.filter_count = conv->input_value(1).get_shape()[0];
+    conv_data.filter_channel_count = conv->input_value(1).get_shape()[1];
+    conv_data.filter_height = conv->input_value(1).get_shape()[2];
+    conv_data.filter_width = conv->input_value(1).get_shape()[3];
+    conv_data.filter_dilation_height = conv->get_dilations()[0];
+    conv_data.filter_dilation_width = conv->get_dilations()[1];
+    conv_data.filter_stride_height = conv->get_strides()[0];
+    conv_data.filter_stride_width = conv->get_strides()[1];
+    conv_data.output_channel_count = conv_data.filter_count;
+    conv_data.pads_begin_height = conv->get_pads_begin()[0];
+    conv_data.pads_begin_width = conv->get_pads_begin()[1];
+    conv_data.pads_end_height = conv->get_pads_end()[0];
+    conv_data.pads_end_width = conv->get_pads_end()[1];
+    conv_data.padding_type = conv->get_auto_pad();
+    conv_data.element_type = conv->get_element_type();
+}
+
+std::function<bool(ngraph::Output<ngraph::Node>)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank) {
+    return [=](ngraph::Output<ngraph::Node> output) -> bool {
+        return ngraph::pattern::consumers_count(expected_count)(output) && ngraph::pattern::rank_equals(expected_rank)(output);
+    };
+}
+
+bool TransposeOrderMatches(std::shared_ptr<ngraph::opset7::Transpose> transpose, std::vector<size_t> order) {
+    if (!transpose)
+        return false;
+    const ngraph::Output<ngraph::Node>& transpose_order = transpose->input_value(1);
+    auto transpose_order_dim = transpose_order.get_shape().size();
+
+    if (transpose_order_dim != 1 || transpose_order.get_shape()[0] != order.size())
+        return false;
+
+    auto const_with_order_values = std::dynamic_pointer_cast<ngraph::opset7::Constant>(transpose_order.get_node_shared_ptr());
+    if (!const_with_order_values)
+        return false;
+
+    const auto data = const_with_order_values->cast_vector<size_t>();
+    if (data.empty())
+        return false;
+
+    if (!std::equal(order.begin(), order.end(), data.begin()))
+        return false;
+
+    return true;
+}
+
+std::shared_ptr<ngraph::opset7::StridedSlice> FlatCrop(ngraph::Output<ngraph::Node> input, size_t offset, size_t size) {
+    return std::make_shared<ngraph::opset7::StridedSlice>(
+        input,                                                                                                  // data
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset}),          // begin sice index
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset + size}),   // end slice index
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)1, (size_t)1}),       // strides
+        std::vector<int64_t>{1, 0},                                                                             // begin mask
+        std::vector<int64_t>{1, 0});                                                                            // end mask
+}
+
+} // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp
new file mode 100644
index 00000000000..14fca200f7b
--- /dev/null
+++ b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+namespace GNAPluginNS {
+
+struct ConvData {
+    size_t input_height;
+    size_t input_width;
+    size_t input_channel_count;
+    size_t filter_height;
+    size_t filter_width;
+    size_t filter_count;
+    size_t filter_channel_count;
+    size_t filter_dilation_height;
+    size_t filter_dilation_width;
+    size_t filter_stride_height;
+    size_t filter_stride_width;
+    size_t output_height;
+    size_t output_width;
+    size_t output_channel_count;
+    size_t pads_begin_width;
+    size_t pads_begin_height;
+    size_t pads_end_width;
+    size_t pads_end_height;
+    ngraph::op::PadType padding_type;
+    ngraph::element::Type element_type;
+};
+
+/**
+ * @brief gets all convolution related data into a struct for further processing
+ * @param conv convolution node to get data of
+ * @param conv_data convolution data structure to put data into
+ * @return void
+ */
+void GetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data);
+
+/**
+ * @brief ngraph matcher predicate fusing existing predicates for consumers count and rank of a layer
+ * @param expected_count expected consumers count for of node
+ * @param expected_rank expected node rank
+ * @return predicate function wrapper
+ */
+std::function<bool(ngraph::Output<ngraph::Node>)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank);
+
+/**
+ * @brief checks whether transpose matches a given order
+ * @param transpose transpose layer
+ * @param order order of transposition to be compared with
+ * @return true if the order matches, false otherwise
+ */
+bool TransposeOrderMatches(std::shared_ptr<ngraph::opset7::Transpose> transpose, std::vector<size_t> order);
+
+/**
+ * @brief performs a crop of a flattened input tensor
+ * @param input input layer
+ * @param offset offset to start the crop at* 
+ * @param size size of the crop
+ * @return pointer to the newly created slice
+ */
+std::shared_ptr<ngraph::opset7::StridedSlice> FlatCrop(ngraph::Output<ngraph::Node> input, size_t offset, size_t size);
+} // namespace GNAPluginNS
diff --git a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp
index 7171363e783..2b8d2f4f261 100644
--- a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp
@@ -77,7 +77,7 @@ void HeteroInferRequest::SetBlob(const std::string& name, const InferenceEngine:
             if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
                 r->SetBlob(name, data, foundInput->getPreProcess());
             }
-        } catch (const InferenceEngine::NotFound& ex) {}
+        } catch (const InferenceEngine::NotFound&) {}
     }
 }
 
diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
index bf3acd4d466..1152c12392e 100644
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -12,11 +12,11 @@ if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
 endif()
 
 file (GLOB LIBRARY_SRC
-        ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/cpp/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/threading/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/cpp/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/interface/*.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/*.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/threading/*.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/*.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp_interfaces/interface/*.cpp
       )
 
 # TODO: WA for OneHot pass usage in reshape
@@ -30,38 +30,38 @@ set(LEGACY_LIBRARY_SHARED_SRCS
 set_source_files_properties(${LEGACY_LIBRARY_SHARED_SRCS} PROPERTIES
     COMPILE_DEFINITIONS "USE_STATIC_IE")
 
-set(IE_STATIC_DEPENDENT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/file_utils.cpp)
+set(IE_STATIC_DEPENDENT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/file_utils.cpp)
 list(REMOVE_ITEM LIBRARY_SRC ${IE_STATIC_DEPENDENT_FILES})
 
 file (GLOB LIBRARY_HEADERS
-       ${CMAKE_CURRENT_SOURCE_DIR}/*.h
-       ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp
+       ${CMAKE_CURRENT_SOURCE_DIR}/src/*.h
+       ${CMAKE_CURRENT_SOURCE_DIR}/src/*.hpp
       )
 
 if (LINUX)
     file (GLOB LIBRARY_SRC
           ${LIBRARY_SRC}
-          ${CMAKE_CURRENT_SOURCE_DIR}/os/lin/*.cpp)
+          ${CMAKE_CURRENT_SOURCE_DIR}/src/os/lin/*.cpp)
     file (GLOB LIBRARY_HEADERS
          ${LIBRARY_HEADERS}
-         ${CMAKE_CURRENT_SOURCE_DIR}/os/lin/*.hpp)
+         ${CMAKE_CURRENT_SOURCE_DIR}/src/os/lin/*.hpp)
 elseif (UNIX)
     list (APPEND LIBRARY_SRC
-        ${CMAKE_CURRENT_SOURCE_DIR}/os/lin/lin_shared_object_loader.cpp)
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/os/lin/lin_shared_object_loader.cpp)
 endif()
 
 if (WIN32)
     file (GLOB LIBRARY_SRC
          ${LIBRARY_SRC}
-         ${CMAKE_CURRENT_SOURCE_DIR}/os/win/*.cpp)
+         ${CMAKE_CURRENT_SOURCE_DIR}/src/os/win/*.cpp)
     file (GLOB LIBRARY_HEADERS
          ${LIBRARY_HEADERS}
-         ${CMAKE_CURRENT_SOURCE_DIR}/os/win/*.hpp)
+         ${CMAKE_CURRENT_SOURCE_DIR}/src/os/win/*.hpp)
 endif()
 
 if(ENABLE_SSE42)
-    file(GLOB SSE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/*.cpp)
-    file(GLOB SSE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/*.hpp)
+    file(GLOB SSE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/cpu_x86_sse42/*.cpp)
+    file(GLOB SSE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/src/cpu_x86_sse42/*.hpp)
 
     list(APPEND LIBRARY_HEADERS ${SSE_HEADERS})
     list(APPEND LIBRARY_SRC ${SSE_SRC})
@@ -75,9 +75,9 @@ if(ENABLE_SSE42)
     endif()
 endif()
 
-addVersionDefines(ie_version.cpp CI_BUILD_NUMBER)
+addVersionDefines(src/ie_version.cpp CI_BUILD_NUMBER)
 
-set (PUBLIC_HEADERS_DIR "${IE_MAIN_SOURCE_DIR}/include")
+set (PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include/ie")
 
 file (GLOB_RECURSE PUBLIC_HEADERS
        ${PUBLIC_HEADERS_DIR}/*.hpp
@@ -116,7 +116,7 @@ add_library(${TARGET_NAME}_obj OBJECT
             ${PUBLIC_HEADERS})
 
 ie_faster_build(${TARGET_NAME}_obj
-    UNITY PCH PRIVATE "precomp.hpp"
+    UNITY PCH PRIVATE "src/precomp.hpp"
 )
 
 target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API
@@ -128,7 +128,7 @@ target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE $<TARGET_PROPERTY:n
                                                              $<TARGET_PROPERTY:ngraph::frontend_manager::static,INTERFACE_INCLUDE_DIRECTORIES>
                                                              $<TARGET_PROPERTY:xbyak,INTERFACE_INCLUDE_DIRECTORIES>)
 
-target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}"
+target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src"
                                                       "${IE_MAIN_SOURCE_DIR}/src/readers/ir_reader" # for ie_ir_version.hpp
                                                       $<TARGET_PROPERTY:${TARGET_NAME}_legacy,INTERFACE_INCLUDE_DIRECTORIES>
                                                       $<TARGET_PROPERTY:${TARGET_NAME}_transformations,INTERFACE_INCLUDE_DIRECTORIES>
@@ -162,11 +162,12 @@ if (TBBBIND_2_4_FOUND)
 endif()
 
 target_link_libraries(${TARGET_NAME} PRIVATE pugixml::static openvino::itt ${CMAKE_DL_LIBS} Threads::Threads
-                                             ngraph ngraph::frontend_manager::static inference_engine_transformations)
+                                             ngraph::frontend_manager::static inference_engine_transformations
+                                     PUBLIC ngraph)
 
 target_include_directories(${TARGET_NAME} INTERFACE
             $<BUILD_INTERFACE:${PUBLIC_HEADERS_DIR}>
-            $<INSTALL_INTERFACE:${IE_CPACK_IE_DIR}/include>
+            $<INSTALL_INTERFACE:${IE_CPACK_IE_DIR}/include/ie>
     PRIVATE $<TARGET_PROPERTY:${TARGET_NAME}_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
             $<TARGET_PROPERTY:${TARGET_NAME}_legacy,INTERFACE_INCLUDE_DIRECTORIES>)
 
@@ -194,7 +195,7 @@ if (TBBBIND_2_4_FOUND)
     target_link_libraries(${TARGET_NAME}_s PRIVATE ${TBBBIND_2_4_IMPORTED_TARGETS})
 endif()
 
-target_include_directories(${TARGET_NAME}_s PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}"
+target_include_directories(${TARGET_NAME}_s PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/src"
     $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
     "${IE_MAIN_SOURCE_DIR}/src/legacy_api/src")
 
@@ -216,7 +217,7 @@ set_target_properties(${TARGET_NAME} ${TARGET_NAME}_obj ${TARGET_NAME}_s
 
 # Export for build tree
 
-export(TARGETS ${TARGET_NAME} NAMESPACE IE::
+export(TARGETS ngraph ${TARGET_NAME} NAMESPACE IE::
         APPEND FILE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake")
 
 # Export for developer package
@@ -229,7 +230,14 @@ list(APPEND core_components ngraph)
 list(APPEND PATH_VARS "IE_INCLUDE_DIR" "IE_NGRAPH_DIR"
                       "IE_PARALLEL_CMAKE")
 
-if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCHES ${TEMP})
+# define variables for InferenceEngineConfig.cmake
+if(THREADING MATCHES "^(TBB|TBB_AUTO)$")
+    set(IE_TBB_DIR "${TBB_DIR}")
+    list(APPEND PATH_VARS "IE_TBB_DIR")
+endif()
+
+# install only downloaded TBB, system one is not installed
+if(THREADING MATCHES "^(TBB|TBB_AUTO)$" AND TBBROOT MATCHES ${TEMP})
     ie_cpack_add_component(tbb REQUIRED)
     list(APPEND core_components tbb)
 
@@ -249,8 +257,6 @@ if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCH
             COMPONENT tbb)
 
     set(IE_TBB_DIR_INSTALL "external/tbb/cmake")
-    set(IE_TBB_DIR "${TBB_DIR}")
-    list(APPEND PATH_VARS "IE_TBB_DIR")
 
     install(FILES "${TBB}/cmake/TBBConfig.cmake"
                   "${TBB}/cmake/TBBConfigVersion.cmake"
@@ -263,7 +269,7 @@ endif()
 ie_cpack_add_component(core REQUIRED DEPENDS ${core_components})
 ie_cpack_add_component(core_dev REQUIRED core ngraph_dev)
 
-install(DIRECTORY "${PUBLIC_HEADERS_DIR}" DESTINATION ${IE_CPACK_IE_DIR}
+install(DIRECTORY "${PUBLIC_HEADERS_DIR}" DESTINATION ${IE_CPACK_IE_DIR}/include
         COMPONENT core_dev)
 
 install(TARGETS ${TARGET_NAME} EXPORT InferenceEngineTargets
@@ -301,7 +307,7 @@ configure_package_config_file("${OpenVINO_SOURCE_DIR}/cmake/templates/InferenceE
                                INSTALL_DESTINATION "${CMAKE_INSTALL_PREFIX}"
                                PATH_VARS ${PATH_VARS})
 
-set(IE_INCLUDE_DIR "include")
+set(IE_INCLUDE_DIR "include/ie")
 set(IE_NGRAPH_DIR "../ngraph/cmake")
 set(IE_TBB_DIR "${IE_TBB_DIR_INSTALL}")
 set(IE_PARALLEL_CMAKE "share/ie_parallel.cmake")
diff --git a/inference-engine/include/cldnn/cldnn_config.hpp b/inference-engine/src/inference_engine/include/ie/cldnn/cldnn_config.hpp
similarity index 100%
rename from inference-engine/include/cldnn/cldnn_config.hpp
rename to inference-engine/src/inference_engine/include/ie/cldnn/cldnn_config.hpp
diff --git a/inference-engine/include/cpp/ie_cnn_network.h b/inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h
similarity index 99%
rename from inference-engine/include/cpp/ie_cnn_network.h
rename to inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h
index 1fe5d2173f2..ef73b67e504 100644
--- a/inference-engine/include/cpp/ie_cnn_network.h
+++ b/inference-engine/src/inference_engine/include/ie/cpp/ie_cnn_network.h
@@ -20,12 +20,7 @@
 #include "ie_common.h"
 #include "ie_data.h"
 #include "ie_extension.h"
-
-namespace ngraph {
-
-class Function;
-
-}  // namespace ngraph
+#include <ngraph/function.hpp>
 
 namespace InferenceEngine {
 
diff --git a/inference-engine/include/cpp/ie_executable_network.hpp b/inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp
similarity index 100%
rename from inference-engine/include/cpp/ie_executable_network.hpp
rename to inference-engine/src/inference_engine/include/ie/cpp/ie_executable_network.hpp
diff --git a/inference-engine/include/cpp/ie_infer_request.hpp b/inference-engine/src/inference_engine/include/ie/cpp/ie_infer_request.hpp
similarity index 100%
rename from inference-engine/include/cpp/ie_infer_request.hpp
rename to inference-engine/src/inference_engine/include/ie/cpp/ie_infer_request.hpp
diff --git a/inference-engine/include/cpp/ie_memory_state.hpp b/inference-engine/src/inference_engine/include/ie/cpp/ie_memory_state.hpp
similarity index 100%
rename from inference-engine/include/cpp/ie_memory_state.hpp
rename to inference-engine/src/inference_engine/include/ie/cpp/ie_memory_state.hpp
diff --git a/inference-engine/include/details/ie_blob_iterator.hpp b/inference-engine/src/inference_engine/include/ie/details/ie_blob_iterator.hpp
similarity index 100%
rename from inference-engine/include/details/ie_blob_iterator.hpp
rename to inference-engine/src/inference_engine/include/ie/details/ie_blob_iterator.hpp
diff --git a/inference-engine/include/details/ie_exception.hpp b/inference-engine/src/inference_engine/include/ie/details/ie_exception.hpp
similarity index 100%
rename from inference-engine/include/details/ie_exception.hpp
rename to inference-engine/src/inference_engine/include/ie/details/ie_exception.hpp
diff --git a/inference-engine/include/details/ie_pre_allocator.hpp b/inference-engine/src/inference_engine/include/ie/details/ie_pre_allocator.hpp
similarity index 100%
rename from inference-engine/include/details/ie_pre_allocator.hpp
rename to inference-engine/src/inference_engine/include/ie/details/ie_pre_allocator.hpp
diff --git a/inference-engine/include/details/ie_so_loader.h b/inference-engine/src/inference_engine/include/ie/details/ie_so_loader.h
similarity index 100%
rename from inference-engine/include/details/ie_so_loader.h
rename to inference-engine/src/inference_engine/include/ie/details/ie_so_loader.h
diff --git a/inference-engine/include/details/ie_so_pointer.hpp b/inference-engine/src/inference_engine/include/ie/details/ie_so_pointer.hpp
similarity index 100%
rename from inference-engine/include/details/ie_so_pointer.hpp
rename to inference-engine/src/inference_engine/include/ie/details/ie_so_pointer.hpp
diff --git a/inference-engine/include/gna/gna_config.hpp b/inference-engine/src/inference_engine/include/ie/gna/gna_config.hpp
similarity index 100%
rename from inference-engine/include/gna/gna_config.hpp
rename to inference-engine/src/inference_engine/include/ie/gna/gna_config.hpp
diff --git a/inference-engine/include/gpu/details/gpu_context_helpers.hpp b/inference-engine/src/inference_engine/include/ie/gpu/details/gpu_context_helpers.hpp
similarity index 100%
rename from inference-engine/include/gpu/details/gpu_context_helpers.hpp
rename to inference-engine/src/inference_engine/include/ie/gpu/details/gpu_context_helpers.hpp
diff --git a/inference-engine/include/gpu/gpu_config.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp
similarity index 100%
rename from inference-engine/include/gpu/gpu_config.hpp
rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp
diff --git a/inference-engine/include/gpu/gpu_context_api_dx.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_dx.hpp
similarity index 100%
rename from inference-engine/include/gpu/gpu_context_api_dx.hpp
rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_dx.hpp
diff --git a/inference-engine/include/gpu/gpu_context_api_ocl.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_ocl.hpp
similarity index 100%
rename from inference-engine/include/gpu/gpu_context_api_ocl.hpp
rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_ocl.hpp
diff --git a/inference-engine/include/gpu/gpu_context_api_va.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_va.hpp
similarity index 100%
rename from inference-engine/include/gpu/gpu_context_api_va.hpp
rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_context_api_va.hpp
diff --git a/inference-engine/include/gpu/gpu_ocl_wrapper.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_ocl_wrapper.hpp
similarity index 100%
rename from inference-engine/include/gpu/gpu_ocl_wrapper.hpp
rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_ocl_wrapper.hpp
diff --git a/inference-engine/include/gpu/gpu_params.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_params.hpp
similarity index 100%
rename from inference-engine/include/gpu/gpu_params.hpp
rename to inference-engine/src/inference_engine/include/ie/gpu/gpu_params.hpp
diff --git a/inference-engine/include/hetero/hetero_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/hetero/hetero_plugin_config.hpp
similarity index 100%
rename from inference-engine/include/hetero/hetero_plugin_config.hpp
rename to inference-engine/src/inference_engine/include/ie/hetero/hetero_plugin_config.hpp
diff --git a/inference-engine/include/ie_allocator.hpp b/inference-engine/src/inference_engine/include/ie/ie_allocator.hpp
similarity index 100%
rename from inference-engine/include/ie_allocator.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_allocator.hpp
diff --git a/inference-engine/include/ie_api.h b/inference-engine/src/inference_engine/include/ie/ie_api.h
similarity index 100%
rename from inference-engine/include/ie_api.h
rename to inference-engine/src/inference_engine/include/ie/ie_api.h
diff --git a/inference-engine/include/ie_blob.h b/inference-engine/src/inference_engine/include/ie/ie_blob.h
similarity index 100%
rename from inference-engine/include/ie_blob.h
rename to inference-engine/src/inference_engine/include/ie/ie_blob.h
diff --git a/inference-engine/include/ie_common.h b/inference-engine/src/inference_engine/include/ie/ie_common.h
similarity index 100%
rename from inference-engine/include/ie_common.h
rename to inference-engine/src/inference_engine/include/ie/ie_common.h
diff --git a/inference-engine/include/ie_compound_blob.h b/inference-engine/src/inference_engine/include/ie/ie_compound_blob.h
similarity index 100%
rename from inference-engine/include/ie_compound_blob.h
rename to inference-engine/src/inference_engine/include/ie/ie_compound_blob.h
diff --git a/inference-engine/include/ie_core.hpp b/inference-engine/src/inference_engine/include/ie/ie_core.hpp
similarity index 100%
rename from inference-engine/include/ie_core.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_core.hpp
diff --git a/inference-engine/include/ie_data.h b/inference-engine/src/inference_engine/include/ie/ie_data.h
similarity index 100%
rename from inference-engine/include/ie_data.h
rename to inference-engine/src/inference_engine/include/ie/ie_data.h
diff --git a/inference-engine/include/ie_extension.h b/inference-engine/src/inference_engine/include/ie/ie_extension.h
similarity index 99%
rename from inference-engine/include/ie_extension.h
rename to inference-engine/src/inference_engine/include/ie/ie_extension.h
index 8014d658d80..97184fd5ba4 100644
--- a/inference-engine/include/ie_extension.h
+++ b/inference-engine/src/inference_engine/include/ie/ie_extension.h
@@ -14,6 +14,7 @@
 #include <string>
 #include <vector>
 
+#include <ngraph/opsets/opset.hpp>
 #include "ie_iextension.h"
 #include "details/ie_so_pointer.hpp"
 
diff --git a/inference-engine/include/ie_icnn_network.hpp b/inference-engine/src/inference_engine/include/ie/ie_icnn_network.hpp
similarity index 99%
rename from inference-engine/include/ie_icnn_network.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_icnn_network.hpp
index ec640691ecc..62ef93824ee 100644
--- a/inference-engine/include/ie_icnn_network.hpp
+++ b/inference-engine/src/inference_engine/include/ie/ie_icnn_network.hpp
@@ -18,11 +18,7 @@
 #include "ie_data.h"
 #include "ie_input_info.hpp"
 
-namespace ngraph {
-
-class Function;
-
-}  // namespace ngraph
+#include <ngraph/function.hpp>
 
 namespace InferenceEngine {
 
diff --git a/inference-engine/include/ie_iexecutable_network.hpp b/inference-engine/src/inference_engine/include/ie/ie_iexecutable_network.hpp
similarity index 100%
rename from inference-engine/include/ie_iexecutable_network.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_iexecutable_network.hpp
diff --git a/inference-engine/include/ie_iextension.h b/inference-engine/src/inference_engine/include/ie/ie_iextension.h
similarity index 98%
rename from inference-engine/include/ie_iextension.h
rename to inference-engine/src/inference_engine/include/ie/ie_iextension.h
index d001b999081..be327c15376 100644
--- a/inference-engine/include/ie_iextension.h
+++ b/inference-engine/src/inference_engine/include/ie/ie_iextension.h
@@ -19,6 +19,7 @@
 #include "ie_layouts.h"
 #include "ie_blob.h"
 #include "ie_version.hpp"
+#include <ngraph/opsets/opset.hpp>
 
 /**
  * @def INFERENCE_EXTENSION_API(TYPE)
@@ -30,13 +31,6 @@
 #define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE)
 #endif
 
-namespace ngraph {
-
-class OpSet;
-class Node;
-
-}  // namespace ngraph
-
 namespace InferenceEngine {
 
 /**
diff --git a/inference-engine/include/ie_iinfer_request.hpp b/inference-engine/src/inference_engine/include/ie/ie_iinfer_request.hpp
similarity index 100%
rename from inference-engine/include/ie_iinfer_request.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_iinfer_request.hpp
diff --git a/inference-engine/include/ie_input_info.hpp b/inference-engine/src/inference_engine/include/ie/ie_input_info.hpp
similarity index 100%
rename from inference-engine/include/ie_input_info.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_input_info.hpp
diff --git a/inference-engine/include/ie_layouts.h b/inference-engine/src/inference_engine/include/ie/ie_layouts.h
similarity index 97%
rename from inference-engine/include/ie_layouts.h
rename to inference-engine/src/inference_engine/include/ie/ie_layouts.h
index 31c42e1d02a..42fe8fbca2c 100644
--- a/inference-engine/include/ie_layouts.h
+++ b/inference-engine/src/inference_engine/include/ie/ie_layouts.h
@@ -304,6 +304,14 @@ public:
      */
     static Layout getLayoutByDims(const SizeVector& dims);
 
+    /**
+     * @brief Returns the standard layout for the specified tensor rank
+     *
+     * @param rank of the requested layout
+     * @return the standard memory layout
+     */
+    static Layout getLayoutByRank(size_t rank);
+
 private:
     /**
      * Memory layout
diff --git a/inference-engine/include/ie_locked_memory.hpp b/inference-engine/src/inference_engine/include/ie/ie_locked_memory.hpp
similarity index 100%
rename from inference-engine/include/ie_locked_memory.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_locked_memory.hpp
diff --git a/inference-engine/include/ie_parallel.hpp b/inference-engine/src/inference_engine/include/ie/ie_parallel.hpp
similarity index 100%
rename from inference-engine/include/ie_parallel.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_parallel.hpp
diff --git a/inference-engine/include/ie_parameter.hpp b/inference-engine/src/inference_engine/include/ie/ie_parameter.hpp
similarity index 99%
rename from inference-engine/include/ie_parameter.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_parameter.hpp
index 4aa6760d474..425673f45b0 100644
--- a/inference-engine/include/ie_parameter.hpp
+++ b/inference-engine/src/inference_engine/include/ie/ie_parameter.hpp
@@ -21,12 +21,6 @@
 
 #include "ie_blob.h"
 
-namespace ngraph {
-
-class Variant;
-
-}  // namespace ngraph
-
 namespace InferenceEngine {
 
 /**
diff --git a/inference-engine/include/ie_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp
similarity index 100%
rename from inference-engine/include/ie_plugin_config.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp
diff --git a/inference-engine/include/ie_precision.hpp b/inference-engine/src/inference_engine/include/ie/ie_precision.hpp
similarity index 100%
rename from inference-engine/include/ie_precision.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_precision.hpp
diff --git a/inference-engine/include/ie_preprocess.hpp b/inference-engine/src/inference_engine/include/ie/ie_preprocess.hpp
similarity index 100%
rename from inference-engine/include/ie_preprocess.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_preprocess.hpp
diff --git a/inference-engine/include/ie_remote_context.hpp b/inference-engine/src/inference_engine/include/ie/ie_remote_context.hpp
similarity index 100%
rename from inference-engine/include/ie_remote_context.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_remote_context.hpp
diff --git a/inference-engine/include/ie_transformations.hpp b/inference-engine/src/inference_engine/include/ie/ie_transformations.hpp
similarity index 100%
rename from inference-engine/include/ie_transformations.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_transformations.hpp
diff --git a/inference-engine/include/ie_version.hpp b/inference-engine/src/inference_engine/include/ie/ie_version.hpp
similarity index 100%
rename from inference-engine/include/ie_version.hpp
rename to inference-engine/src/inference_engine/include/ie/ie_version.hpp
diff --git a/inference-engine/include/inference_engine.hpp b/inference-engine/src/inference_engine/include/ie/inference_engine.hpp
similarity index 100%
rename from inference-engine/include/inference_engine.hpp
rename to inference-engine/src/inference_engine/include/ie/inference_engine.hpp
diff --git a/inference-engine/include/multi-device/multi_device_config.hpp b/inference-engine/src/inference_engine/include/ie/multi-device/multi_device_config.hpp
similarity index 100%
rename from inference-engine/include/multi-device/multi_device_config.hpp
rename to inference-engine/src/inference_engine/include/ie/multi-device/multi_device_config.hpp
diff --git a/inference-engine/include/vpu/hddl_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/hddl_config.hpp
similarity index 100%
rename from inference-engine/include/vpu/hddl_config.hpp
rename to inference-engine/src/inference_engine/include/ie/vpu/hddl_config.hpp
diff --git a/inference-engine/include/vpu/hddl_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/hddl_plugin_config.hpp
similarity index 100%
rename from inference-engine/include/vpu/hddl_plugin_config.hpp
rename to inference-engine/src/inference_engine/include/ie/vpu/hddl_plugin_config.hpp
diff --git a/inference-engine/include/vpu/myriad_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/myriad_config.hpp
similarity index 100%
rename from inference-engine/include/vpu/myriad_config.hpp
rename to inference-engine/src/inference_engine/include/ie/vpu/myriad_config.hpp
diff --git a/inference-engine/include/vpu/myriad_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/myriad_plugin_config.hpp
similarity index 100%
rename from inference-engine/include/vpu/myriad_plugin_config.hpp
rename to inference-engine/src/inference_engine/include/ie/vpu/myriad_plugin_config.hpp
diff --git a/inference-engine/include/vpu/vpu_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/vpu_config.hpp
similarity index 100%
rename from inference-engine/include/vpu/vpu_config.hpp
rename to inference-engine/src/inference_engine/include/ie/vpu/vpu_config.hpp
diff --git a/inference-engine/include/vpu/vpu_plugin_config.hpp b/inference-engine/src/inference_engine/include/ie/vpu/vpu_plugin_config.hpp
similarity index 100%
rename from inference-engine/include/vpu/vpu_plugin_config.hpp
rename to inference-engine/src/inference_engine/include/ie/vpu/vpu_plugin_config.hpp
diff --git a/inference-engine/src/inference_engine/blob_factory.cpp b/inference-engine/src/inference_engine/src/blob_factory.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/blob_factory.cpp
rename to inference-engine/src/inference_engine/src/blob_factory.cpp
diff --git a/inference-engine/src/inference_engine/blob_transform.cpp b/inference-engine/src/inference_engine/src/blob_transform.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/blob_transform.cpp
rename to inference-engine/src/inference_engine/src/blob_transform.cpp
diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp
similarity index 96%
rename from inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp
rename to inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp
index 1f05ca0098c..f53894e7d2d 100644
--- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp
+++ b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.cpp
@@ -35,6 +35,9 @@
 
 #include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
 
+#include <transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp>
+#include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
+
 #include "ie_ngraph_utils.hpp"
 #include "exec_graph_info.hpp"
 #include "ie_itt.hpp"
@@ -88,12 +91,12 @@ void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph::
 
 void CNNNetworkNGraphImpl::validateFunctionNames() const {
     // nGraph function parameters and pre-Results operations should have unique names
-    std::unordered_set<std::string> unique_names;
+    std::unordered_map<std::string, std::shared_ptr<ngraph::Node>> unique_names;
     for (const auto& param : _ngraph_function->get_parameters()) {
         if (unique_names.count(param->get_friendly_name())) {
             IE_THROW() << "Function contains several inputs with one friendly name!";
         }
-        unique_names.insert(param->get_friendly_name());
+        unique_names.insert({param->get_friendly_name(), param});
     }
     for (const auto& result : _ngraph_function->get_results()) {
         const auto& parent = result->get_input_node_shared_ptr(0);
@@ -101,10 +104,10 @@ void CNNNetworkNGraphImpl::validateFunctionNames() const {
         if (parent->get_output_size() > 1) {
             name += "." + std::to_string(result->get_input_source_output(0).get_index());
         }
-        if (unique_names.count(name) && !ngraph::op::is_parameter(parent)) {
-            IE_THROW() << "Function contains several inputs and outputs with one friendly name!";
+        if (unique_names.count(name) && !ngraph::op::is_parameter(parent) && parent != unique_names.at(name)) {
+            IE_THROW() << "Function contains several inputs and outputs with one friendly name: " << name;
         }
-        unique_names.insert(name);
+        unique_names.insert({name, parent});
     }
 }
 
@@ -364,13 +367,10 @@ CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialShape>&
 
     bool parameter_replaced = false;
     for (size_t i = 0; i < params.size(); i++) {
-        const auto& param = params[i];
+        auto& param = params[i];
         if (inputShapes.find(param->get_friendly_name()) == inputShapes.end())
             continue;
-        ::ngraph::PartialShape shape(inputShapes.at(param->get_friendly_name()));
-        auto newParam = std::make_shared<::ngraph::op::Parameter>(param->get_element_type(), shape);
-        newParam->set_friendly_name(param->get_friendly_name());
-        _ngraph_function->replace_parameter(i, newParam);
+        param->set_partial_shape(inputShapes.at(param->get_friendly_name()));
         parameter_replaced = true;
     }
     if (parameter_replaced)
@@ -392,6 +392,8 @@ CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialShape>&
                 ::ngraph::pass::Manager manager;
                 // resolves dynamism by replacing dynamic operation with static version
                 manager.register_pass<::ngraph::pass::ConvertNMS5ToLegacyMatcher>(false);
+                manager.register_pass<::ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
+                manager.register_pass<::ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>();
                 manager.register_pass<::ngraph::pass::DisableConvertConstantFoldingOnConstPath>();
                 manager.register_pass<::ngraph::pass::ConstantFolding>();
                 // OneHotToLegacy changes output precision
diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp b/inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/cnn_network_ngraph_impl.hpp
rename to inference-engine/src/inference_engine/src/cnn_network_ngraph_impl.hpp
diff --git a/inference-engine/src/inference_engine/compilation_context.cpp b/inference-engine/src/inference_engine/src/compilation_context.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/compilation_context.cpp
rename to inference-engine/src/inference_engine/src/compilation_context.cpp
diff --git a/inference-engine/src/inference_engine/compilation_context.hpp b/inference-engine/src/inference_engine/src/compilation_context.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/compilation_context.hpp
rename to inference-engine/src/inference_engine/src/compilation_context.hpp
diff --git a/inference-engine/src/inference_engine/cpp/exception2status.hpp b/inference-engine/src/inference_engine/src/cpp/exception2status.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpp/exception2status.hpp
rename to inference-engine/src/inference_engine/src/cpp/exception2status.hpp
diff --git a/inference-engine/src/inference_engine/cpp/ie_cnn_network.cpp b/inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpp/ie_cnn_network.cpp
rename to inference-engine/src/inference_engine/src/cpp/ie_cnn_network.cpp
diff --git a/inference-engine/src/inference_engine/cpp/ie_executable_network.cpp b/inference-engine/src/inference_engine/src/cpp/ie_executable_network.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpp/ie_executable_network.cpp
rename to inference-engine/src/inference_engine/src/cpp/ie_executable_network.cpp
diff --git a/inference-engine/src/inference_engine/cpp/ie_executable_network_base.hpp b/inference-engine/src/inference_engine/src/cpp/ie_executable_network_base.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpp/ie_executable_network_base.hpp
rename to inference-engine/src/inference_engine/src/cpp/ie_executable_network_base.hpp
diff --git a/inference-engine/src/inference_engine/cpp/ie_infer_async_request_base.hpp b/inference-engine/src/inference_engine/src/cpp/ie_infer_async_request_base.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpp/ie_infer_async_request_base.hpp
rename to inference-engine/src/inference_engine/src/cpp/ie_infer_async_request_base.hpp
diff --git a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp b/inference-engine/src/inference_engine/src/cpp/ie_infer_request.cpp
similarity index 99%
rename from inference-engine/src/inference_engine/cpp/ie_infer_request.cpp
rename to inference-engine/src/inference_engine/src/cpp/ie_infer_request.cpp
index 9e68666b7a3..f94a3b6ba1c 100644
--- a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp
+++ b/inference-engine/src/inference_engine/src/cpp/ie_infer_request.cpp
@@ -127,7 +127,7 @@ void InferRequest::SetCompletionCallbackImpl(std::function<void(InferRequest, St
                 statusCode = [&] {
                     try {
                         std::rethrow_exception(exceptionPtr);
-                    } CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception& ex) {
+                    } CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception&) {
                         return GENERAL_ERROR;
                     } catch (...) {
                         return UNEXPECTED;
@@ -148,7 +148,7 @@ void InferRequest::SetCompletionCallbackImpl(IInferRequest::CompletionCallback c
                 statusCode = [&] {
                     try {
                         std::rethrow_exception(exceptionPtr);
-                    } CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception& ex) {
+                    } CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception&) {
                         return GENERAL_ERROR;
                     } catch (...) {
                         return UNEXPECTED;
diff --git a/inference-engine/src/inference_engine/cpp/ie_plugin.hpp b/inference-engine/src/inference_engine/src/cpp/ie_plugin.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpp/ie_plugin.hpp
rename to inference-engine/src/inference_engine/src/cpp/ie_plugin.hpp
diff --git a/inference-engine/src/inference_engine/cpp/ie_variable_state.cpp b/inference-engine/src/inference_engine/src/cpp/ie_variable_state.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpp/ie_variable_state.cpp
rename to inference-engine/src/inference_engine/src/cpp/ie_variable_state.cpp
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp
rename to inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iexecutable_network_internal.cpp
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iinfer_request_internal.cpp b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iinfer_request_internal.cpp
rename to inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iinfer_request_internal.cpp
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.cpp b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iplugin_internal.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.cpp
rename to inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_iplugin_internal.cpp
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_ivariable_state_internal.cpp b/inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_ivariable_state_internal.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpp_interfaces/interface/ie_ivariable_state_internal.cpp
rename to inference-engine/src/inference_engine/src/cpp_interfaces/interface/ie_ivariable_state_internal.cpp
diff --git a/inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.cpp b/inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.cpp
rename to inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.cpp
diff --git a/inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.hpp b/inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.hpp
rename to inference-engine/src/inference_engine/src/cpu_x86_sse42/blob_transform_sse42.hpp
diff --git a/inference-engine/src/inference_engine/file_utils.cpp b/inference-engine/src/inference_engine/src/file_utils.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/file_utils.cpp
rename to inference-engine/src/inference_engine/src/file_utils.cpp
diff --git a/inference-engine/src/inference_engine/ie_blob_common.cpp b/inference-engine/src/inference_engine/src/ie_blob_common.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_blob_common.cpp
rename to inference-engine/src/inference_engine/src/ie_blob_common.cpp
diff --git a/inference-engine/src/inference_engine/ie_cache_guard.cpp b/inference-engine/src/inference_engine/src/ie_cache_guard.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_cache_guard.cpp
rename to inference-engine/src/inference_engine/src/ie_cache_guard.cpp
diff --git a/inference-engine/src/inference_engine/ie_cache_guard.hpp b/inference-engine/src/inference_engine/src/ie_cache_guard.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_cache_guard.hpp
rename to inference-engine/src/inference_engine/src/ie_cache_guard.hpp
diff --git a/inference-engine/src/inference_engine/ie_cache_manager.hpp b/inference-engine/src/inference_engine/src/ie_cache_manager.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_cache_manager.hpp
rename to inference-engine/src/inference_engine/src/ie_cache_manager.hpp
diff --git a/inference-engine/src/inference_engine/ie_common.cpp b/inference-engine/src/inference_engine/src/ie_common.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_common.cpp
rename to inference-engine/src/inference_engine/src/ie_common.cpp
diff --git a/inference-engine/src/inference_engine/ie_compound_blob.cpp b/inference-engine/src/inference_engine/src/ie_compound_blob.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_compound_blob.cpp
rename to inference-engine/src/inference_engine/src/ie_compound_blob.cpp
diff --git a/inference-engine/src/inference_engine/ie_core.cpp b/inference-engine/src/inference_engine/src/ie_core.cpp
similarity index 99%
rename from inference-engine/src/inference_engine/ie_core.cpp
rename to inference-engine/src/inference_engine/src/ie_core.cpp
index 3c101ea4271..e62674d1a46 100644
--- a/inference-engine/src/inference_engine/ie_core.cpp
+++ b/inference-engine/src/inference_engine/src/ie_core.cpp
@@ -325,7 +325,7 @@ class Core::Impl : public ICore, public std::enable_shared_from_this<ICore> {
                               plugin.ImportNetwork(networkStream, config);
                 networkIsImported = true;
             });
-        } catch (const HeaderException& ex) {
+        } catch (const HeaderException&) {
             // For these exceptions just remove old cache and set that import didn't work
             cacheManager->removeCacheEntry(blobId);
             networkIsImported = false;
diff --git a/inference-engine/src/inference_engine/ie_data.cpp b/inference-engine/src/inference_engine/src/ie_data.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_data.cpp
rename to inference-engine/src/inference_engine/src/ie_data.cpp
diff --git a/inference-engine/src/inference_engine/ie_itt.hpp b/inference-engine/src/inference_engine/src/ie_itt.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_itt.hpp
rename to inference-engine/src/inference_engine/src/ie_itt.hpp
diff --git a/inference-engine/src/inference_engine/ie_layouts.cpp b/inference-engine/src/inference_engine/src/ie_layouts.cpp
similarity index 99%
rename from inference-engine/src/inference_engine/ie_layouts.cpp
rename to inference-engine/src/inference_engine/src/ie_layouts.cpp
index b566693c155..a9308877e7d 100644
--- a/inference-engine/src/inference_engine/ie_layouts.cpp
+++ b/inference-engine/src/inference_engine/src/ie_layouts.cpp
@@ -161,8 +161,8 @@ bool TensorDesc::operator!=(const TensorDesc& rhs) const {
     return !(*this == rhs);
 }
 
-Layout TensorDesc::getLayoutByDims(const SizeVector& dims) {
-    switch (dims.size()) {
+Layout TensorDesc::getLayoutByRank(size_t rank) {
+    switch (rank) {
     case 0:
         return Layout::SCALAR;
     case 1:
@@ -180,6 +180,10 @@ Layout TensorDesc::getLayoutByDims(const SizeVector& dims) {
     }
 }
 
+Layout TensorDesc::getLayoutByDims(const SizeVector& dims) {
+    return getLayoutByRank(dims.size());
+}
+
 size_t TensorDesc::offset(const SizeVector& v) const {
     if (layout == Layout::ANY) IE_THROW() << "Cannot calculate offset for any format!";
 
diff --git a/inference-engine/src/inference_engine/ie_memcpy.cpp b/inference-engine/src/inference_engine/src/ie_memcpy.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_memcpy.cpp
rename to inference-engine/src/inference_engine/src/ie_memcpy.cpp
diff --git a/inference-engine/src/inference_engine/ie_network_reader.cpp b/inference-engine/src/inference_engine/src/ie_network_reader.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_network_reader.cpp
rename to inference-engine/src/inference_engine/src/ie_network_reader.cpp
diff --git a/inference-engine/src/inference_engine/ie_network_reader.hpp b/inference-engine/src/inference_engine/src/ie_network_reader.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_network_reader.hpp
rename to inference-engine/src/inference_engine/src/ie_network_reader.hpp
diff --git a/inference-engine/src/inference_engine/ie_ngraph_utils.cpp b/inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_ngraph_utils.cpp
rename to inference-engine/src/inference_engine/src/ie_ngraph_utils.cpp
diff --git a/inference-engine/src/inference_engine/ie_system_conf.cpp b/inference-engine/src/inference_engine/src/ie_system_conf.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_system_conf.cpp
rename to inference-engine/src/inference_engine/src/ie_system_conf.cpp
diff --git a/inference-engine/src/inference_engine/ie_transformations.cpp b/inference-engine/src/inference_engine/src/ie_transformations.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_transformations.cpp
rename to inference-engine/src/inference_engine/src/ie_transformations.cpp
diff --git a/inference-engine/src/inference_engine/ie_version.cpp b/inference-engine/src/inference_engine/src/ie_version.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/ie_version.cpp
rename to inference-engine/src/inference_engine/src/ie_version.cpp
diff --git a/inference-engine/src/inference_engine/os/lin/lin_shared_object_loader.cpp b/inference-engine/src/inference_engine/src/os/lin/lin_shared_object_loader.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/os/lin/lin_shared_object_loader.cpp
rename to inference-engine/src/inference_engine/src/os/lin/lin_shared_object_loader.cpp
diff --git a/inference-engine/src/inference_engine/os/lin/lin_system_conf.cpp b/inference-engine/src/inference_engine/src/os/lin/lin_system_conf.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/os/lin/lin_system_conf.cpp
rename to inference-engine/src/inference_engine/src/os/lin/lin_system_conf.cpp
diff --git a/inference-engine/src/inference_engine/os/win/win_shared_object_loader.cpp b/inference-engine/src/inference_engine/src/os/win/win_shared_object_loader.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/os/win/win_shared_object_loader.cpp
rename to inference-engine/src/inference_engine/src/os/win/win_shared_object_loader.cpp
diff --git a/inference-engine/src/inference_engine/os/win/win_system_conf.cpp b/inference-engine/src/inference_engine/src/os/win/win_system_conf.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/os/win/win_system_conf.cpp
rename to inference-engine/src/inference_engine/src/os/win/win_system_conf.cpp
diff --git a/inference-engine/src/inference_engine/precision_utils.cpp b/inference-engine/src/inference_engine/src/precision_utils.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/precision_utils.cpp
rename to inference-engine/src/inference_engine/src/precision_utils.cpp
diff --git a/inference-engine/src/inference_engine/precomp.hpp b/inference-engine/src/inference_engine/src/precomp.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/precomp.hpp
rename to inference-engine/src/inference_engine/src/precomp.hpp
diff --git a/inference-engine/src/inference_engine/system_allocator.cpp b/inference-engine/src/inference_engine/src/system_allocator.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/system_allocator.cpp
rename to inference-engine/src/inference_engine/src/system_allocator.cpp
diff --git a/inference-engine/src/inference_engine/system_allocator.hpp b/inference-engine/src/inference_engine/src/system_allocator.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/system_allocator.hpp
rename to inference-engine/src/inference_engine/src/system_allocator.hpp
diff --git a/inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp b/inference-engine/src/inference_engine/src/threading/ie_cpu_streams_executor.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/threading/ie_cpu_streams_executor.cpp
rename to inference-engine/src/inference_engine/src/threading/ie_cpu_streams_executor.cpp
diff --git a/inference-engine/src/inference_engine/threading/ie_executor_manager.cpp b/inference-engine/src/inference_engine/src/threading/ie_executor_manager.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/threading/ie_executor_manager.cpp
rename to inference-engine/src/inference_engine/src/threading/ie_executor_manager.cpp
diff --git a/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp b/inference-engine/src/inference_engine/src/threading/ie_istreams_executor.cpp
similarity index 98%
rename from inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp
rename to inference-engine/src/inference_engine/src/threading/ie_istreams_executor.cpp
index 1a2993f3365..702a0beecee 100644
--- a/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp
+++ b/inference-engine/src/inference_engine/src/threading/ie_istreams_executor.cpp
@@ -123,11 +123,11 @@ Parameter IStreamsExecutor::Config::GetConfig(const std::string& key) {
             break;
         }
     } else if (key == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) {
-        return {_streams};
+        return {std::to_string(_streams)};
     } else if (key == CONFIG_KEY(CPU_THREADS_NUM)) {
-        return {_threads};
+        return {std::to_string(_threads)};
     } else if (key == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)) {
-        return {_threadsPerStream};
+        return {std::to_string(_threadsPerStream)};
     } else {
         IE_THROW() << "Wrong value for property key " << key;
     }
diff --git a/inference-engine/src/inference_engine/threading/ie_itask_executor.cpp b/inference-engine/src/inference_engine/src/threading/ie_itask_executor.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/threading/ie_itask_executor.cpp
rename to inference-engine/src/inference_engine/src/threading/ie_itask_executor.cpp
diff --git a/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.cpp b/inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.cpp
rename to inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.cpp
diff --git a/inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.hpp b/inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/threading/ie_parallel_custom_arena.hpp
rename to inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.hpp
diff --git a/inference-engine/src/inference_engine/threading/ie_thread_affinity.cpp b/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/threading/ie_thread_affinity.cpp
rename to inference-engine/src/inference_engine/src/threading/ie_thread_affinity.cpp
diff --git a/inference-engine/src/inference_engine/threading/ie_thread_affinity.hpp b/inference-engine/src/inference_engine/src/threading/ie_thread_affinity.hpp
similarity index 100%
rename from inference-engine/src/inference_engine/threading/ie_thread_affinity.hpp
rename to inference-engine/src/inference_engine/src/threading/ie_thread_affinity.hpp
diff --git a/inference-engine/src/inference_engine/xml_parse_utils.cpp b/inference-engine/src/inference_engine/src/xml_parse_utils.cpp
similarity index 100%
rename from inference-engine/src/inference_engine/xml_parse_utils.cpp
rename to inference-engine/src/inference_engine/src/xml_parse_utils.cpp
diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt
index 28afe337e38..9de8bf16910 100644
--- a/inference-engine/src/legacy_api/CMakeLists.txt
+++ b/inference-engine/src/legacy_api/CMakeLists.txt
@@ -39,7 +39,7 @@ target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE
 target_include_directories(${TARGET_NAME}_obj PRIVATE
     ${PUBLIC_HEADERS_DIR}
     ${CMAKE_CURRENT_SOURCE_DIR}/src
-    ${IE_MAIN_SOURCE_DIR}/src/inference_engine # For CNNNetworkNGraphImpl
+    ${IE_MAIN_SOURCE_DIR}/src/inference_engine/src # For CNNNetworkNGraphImpl
     $<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>
     $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>
     $<TARGET_PROPERTY:ngraph::ngraph,INTERFACE_INCLUDE_DIRECTORIES>
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp
index 7057fc1f597..e59ec61c8f4 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp
@@ -19,7 +19,7 @@ namespace ngraph {
 namespace pass {
 namespace low_precision {
 
-class LP_TRANSFORMATIONS_API Exception : std::exception {
+class LP_TRANSFORMATIONS_API Exception : public std::exception {
     std::shared_ptr<std::ostringstream> buffer;
     mutable std::string buffer_str;
 public:
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp
index da226fe263b..fee17230569 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp
@@ -16,6 +16,7 @@ public:
     NGRAPH_RTTI_DECLARATION;
     MultiplyTransformation(const Params& params = Params());
     bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
+    bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
 };
 
 } // namespace low_precision
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
index 77218320dba..3229c9814f0 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
@@ -151,7 +151,7 @@ public:
 
     static bool isQuantizeSupported(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize);
 
-    static FakeQuantizeDequantization getDequantization(const std::shared_ptr<Node>& node, const size_t parentIndex = 0ul, const bool inPlace = false);
+    static FakeQuantizeDequantization getDequantization(const std::shared_ptr<const Node>& node, const size_t parentIndex = 0ul, const bool inPlace = false);
 
     static FakeQuantizeDequantization getDequantizationBelow(const std::shared_ptr<Node>& node, const bool convertIsMandatory = false);
 
@@ -186,7 +186,7 @@ public:
 
     static size_t getParentOutputIndex(const std::shared_ptr<ngraph::Node>& parent, const std::shared_ptr<ngraph::Node>& child);
 
-    static FakeQuantizeDequantizationValues createEmptyValues(const FakeQuantizeDequantization& dequantization);
+    static FakeQuantizeDequantizationValues createEmptyValues(const FakeQuantizeDequantization& dequantization, const element::Type precision);
 
     static bool isZeroConst(const std::shared_ptr<Node>& node);
     static bool checkZeroPoint(const std::shared_ptr<Node>& node, const DataPrecision& dataPrecision = DataPrecision());
diff --git a/inference-engine/src/low_precision_transformations/src/add.cpp b/inference-engine/src/low_precision_transformations/src/add.cpp
index 4ecd8464370..55a101c101f 100644
--- a/inference-engine/src/low_precision_transformations/src/add.cpp
+++ b/inference-engine/src/low_precision_transformations/src/add.cpp
@@ -152,28 +152,25 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
             newAddOrSubtract = newMultiply;
         }
     } else {
-        // dequantizations are on both branches
+        // low precision with dequantization operations on at least one branch
         const int emptyPathIndex = fullPathIndex == 0 ? 1 : 0;
 
-        FakeQuantizeDequantization dequantizationEmptyPath = NetworkHelper::getDequantization(add, emptyPathIndex);
-        if (updatePrecisions && !dequantizationEmptyPath.empty() && !dequantizationEmptyPath.isLowPrecision()) {
-            return false;
+        if (updatePrecisions) {
+            const FakeQuantizeDequantization dequantizationEmptyPath = NetworkHelper::getDequantization(add, emptyPathIndex);
+            if (!dequantizationEmptyPath.empty() && !dequantizationEmptyPath.isLowPrecision()) {
+                return false;
+            }
         }
 
-        FakeQuantizeDequantization dequantizationFullPath = NetworkHelper::getDequantization(add, fullPathIndex);
-        if (updatePrecisions && !dequantizationFullPath.empty() && !dequantizationFullPath.isLowPrecision()) {
-            return false;
-        }
-
-        dequantizationEmptyPath = NetworkHelper::foldDequantization(addNode, emptyPathIndex);
+        const FakeQuantizeDequantization dequantizationEmptyPath = NetworkHelper::foldDequantization(addNode, emptyPathIndex);
         std::shared_ptr<Node> subtractEmptyPathValues;
         std::shared_ptr<Node> multiplyEmptyPathValues;
-        std::tie(subtractEmptyPathValues, multiplyEmptyPathValues) = NetworkHelper::createEmptyValues(dequantizationEmptyPath);
+        std::tie(subtractEmptyPathValues, multiplyEmptyPathValues) = NetworkHelper::createEmptyValues(dequantizationEmptyPath, deqPrecision);
 
-        dequantizationFullPath = NetworkHelper::foldDequantization(addNode, fullPathIndex);
+        const FakeQuantizeDequantization dequantizationFullPath = NetworkHelper::foldDequantization(addNode, fullPathIndex);
         std::shared_ptr<Node> subtractFullPathValues;
         std::shared_ptr<Node> multiplyFullPathValues;
-        std::tie(subtractFullPathValues, multiplyFullPathValues) = NetworkHelper::createEmptyValues(dequantizationFullPath);
+        std::tie(subtractFullPathValues, multiplyFullPathValues) = NetworkHelper::createEmptyValues(dequantizationFullPath, deqPrecision);
 
         // calculation
         // before: Y = (SC1 * (X1 - SH1)) + (SC2 * (X2 - SH2))
@@ -196,11 +193,24 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
         OutputVector inputs{ {}, {} };
         auto fullPathInput = dequantizationFullPath.convert == nullptr ? dequantizationFullPath.data : dequantizationFullPath.convert;
 
+        // inputs[0]    inputs[1]
+        //     \          /
+        //      \        /
+        //   newAddOrSubtract
+        //          |
+        //     newMultiply
+
         inputs[emptyPathIndex] = dequantizationEmptyPath.data;
         inputs[fullPathIndex] = std::make_shared<DequantizationMultiply>(
             newSubtractFullPathValues == nullptr ?
                 fullPathInput :
-                std::make_shared<DequantizationSubtract>(fullPathInput, newSubtractFullPathValues),
+                std::make_shared<DequantizationSubtract>(
+                    // precision on branch with dequantization operations can be different with dequantization precision,
+                    // for example: FP16 model with FP32 dequantization
+                    fullPathInput.get_element_type() != newSubtractFullPathValues->get_element_type() ?
+                        std::make_shared<opset1::Convert>(fullPathInput, newSubtractFullPathValues->get_element_type()) :
+                        fullPathInput,
+                    newSubtractFullPathValues),
             newMultiplyFullPathValues);
 
         newAddOrSubtract = std::make_shared<op::TypeRelaxed<opset1::Add>>(
diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp
index 0863dcb3f09..6adeb1f413c 100644
--- a/inference-engine/src/low_precision_transformations/src/concat.cpp
+++ b/inference-engine/src/low_precision_transformations/src/concat.cpp
@@ -5,9 +5,7 @@
 #include "low_precision/concat.hpp"
 
 #include <algorithm>
-#include <map>
 #include <memory>
-#include <string>
 #include <utility>
 #include <vector>
 
@@ -189,7 +187,6 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context
     const auto outPShape = concat->get_output_partial_shape(0);
     const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, outPShape.rank());
 
-    // TODO: LPT: to support current flow: #58269
     if (normalizedAxis != 1ul) {
         return false;
     }
@@ -198,8 +195,6 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context
         return false;
     }
 
-    const bool perTensorQuantizationIsRequired = normalizedAxis != 1ul;
-
     element::Type precision;
     for (size_t i = 0ul; i < concat->get_input_size(); i++) {
         const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(concat, i);
@@ -212,12 +207,6 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context
         } else if (precision != dequantization.data.get_element_type()) {
             return false;
         }
-
-        if (perTensorQuantizationIsRequired &&
-            (((dequantization.subtractConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.subtractConstant)) ||
-            ((dequantization.multiplyConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.multiplyConstant)))) {
-            return false;
-        }
     }
     return true;
 }
diff --git a/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp b/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp
index b1b7674631b..54e87798a64 100644
--- a/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/eltwise_base_transformation.cpp
@@ -54,15 +54,10 @@ bool EltwiseBaseTransformation::canBeTransformed(const TransformationContext& co
         return false;
     }
 
+    // at least one branch quantization is mandatory
     if ((dequantization1.data.get_node() == nullptr) ||
-        (dequantization1.empty() && !is_type<opset1::Constant>(dequantization1.data.get_node_shared_ptr()) &&
-                                    !is_type<opset1::Constant>(dequantization2.data.get_node_shared_ptr()))) {
-        return false;
-    }
-
-    if ((dequantization2.data.get_node() == nullptr) ||
-        (dequantization2.empty() && !is_type<opset1::Constant>(dequantization2.data.get_node_shared_ptr()) &&
-                                    !is_type<opset1::Constant>(dequantization1.data.get_node_shared_ptr()))) {
+        (dequantization2.data.get_node() == nullptr) ||
+        (dequantization1.empty() && dequantization2.empty())) {
         return false;
     }
 
@@ -101,15 +96,39 @@ static bool isBranchHaveMultipleConsumers(const std::shared_ptr<Node> branchData
 // return branch index with FP32 precision after eltwise transformation
 int EltwiseBaseTransformation::getNotEmpty(const std::shared_ptr<Node>& eltwise) const {
     const FakeQuantizeDequantization dequantization1 = pass::low_precision::NetworkHelper::getDequantization(eltwise, 0ul);
-    if (dequantization1.empty() || as_type<opset1::Constant>(dequantization1.data.get_node())) {
+    if (as_type<opset1::Constant>(dequantization1.data.get_node())) {
         return -1;
     }
 
     const FakeQuantizeDequantization dequantization2 = pass::low_precision::NetworkHelper::getDequantization(eltwise, 1ul);
-    if (dequantization2.empty() || as_type<opset1::Constant>(dequantization2.data.get_node())) {
+    if (as_type<opset1::Constant>(dequantization2.data.get_node())) {
         return -1;
     }
 
+    if (!dequantization1.empty() && dequantization1.isLowPrecision() && (dequantization2.empty() || !dequantization2.isLowPrecision())) {
+        return 1;
+    }
+
+    if ((dequantization1.empty() || !dequantization1.isLowPrecision()) && !dequantization2.empty() && dequantization2.isLowPrecision()) {
+        return 0;
+    }
+
+    if (!updatePrecisions) {
+        // If result is still not defined, then handle special cases for updatePrecisions == false, assumption for one branch quantization:
+        //    1. branch with dequantization operations is quantized,
+        //    2. empty branch is not quantized.
+        // As result: move dequantization operations to empty branch.
+        // Note: keep comparisions uppper as is: low precision can be used in updatePrecisions == false case
+        // if FakeQuantize operations were decomposed before LPT.
+        if (!dequantization1.empty() && dequantization2.empty()) {
+            return 1;
+        }
+
+        if (dequantization1.empty() || !dequantization2.empty()) {
+            return 0;
+        }
+    }
+
     const std::shared_ptr<opset1::FakeQuantize> fakeQuantize1 =
         as_type_ptr<opset1::FakeQuantize>(dequantization1.data.get_node_shared_ptr());
     const std::shared_ptr<opset1::FakeQuantize> fakeQuantize2 =
diff --git a/inference-engine/src/low_precision_transformations/src/multiply.cpp b/inference-engine/src/low_precision_transformations/src/multiply.cpp
index d95fe2812c3..923f77a7d20 100644
--- a/inference-engine/src/low_precision_transformations/src/multiply.cpp
+++ b/inference-engine/src/low_precision_transformations/src/multiply.cpp
@@ -41,7 +41,7 @@ MultiplyTransformation::MultiplyTransformation(const Params& params) : EltwiseBa
 
 bool MultiplyTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
     auto multiply = m.get_match_root();
-    if (!LayerTransformation::canBeTransformed(context, multiply)) {
+    if (!canBeTransformed(context, multiply)) {
         return false;
     }
 
@@ -116,7 +116,7 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
         dequantizationEmptyPath = NetworkHelper::foldDequantization(multiply, emptyPathIndex);
         std::shared_ptr<Node> subtractValuesEmptyPath;
         std::shared_ptr<Node> multiplyValuesEmptyPath;
-        std::tie(subtractValuesEmptyPath, multiplyValuesEmptyPath) = NetworkHelper::createEmptyValues(dequantizationEmptyPath);
+        std::tie(subtractValuesEmptyPath, multiplyValuesEmptyPath) = NetworkHelper::createEmptyValues(dequantizationEmptyPath, deqPrecision);
 
         // check if empty path shifts are not zero
         if (!NetworkHelper::isZeroConst(subtractValuesEmptyPath)) {
@@ -126,7 +126,7 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
         dequantizationFullPath = NetworkHelper::foldDequantization(multiply, fullPathIndex);
         std::shared_ptr<Node> subtractValuesFullPath;
         std::shared_ptr<Node> multiplyValuesFullPath;
-        std::tie(subtractValuesFullPath, multiplyValuesFullPath) = NetworkHelper::createEmptyValues(dequantizationFullPath);
+        std::tie(subtractValuesFullPath, multiplyValuesFullPath) = NetworkHelper::createEmptyValues(dequantizationFullPath, deqPrecision);
 
 
         // before: Y = (SC1 * (X1 - SH1)) * (SC2 * X2)
@@ -160,6 +160,24 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
     return true;
 }
 
+bool MultiplyTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const {
+    FakeQuantizeDequantization dequantization1 = pass::low_precision::NetworkHelper::getDequantization(layer, 0ul);
+    FakeQuantizeDequantization dequantization2 = pass::low_precision::NetworkHelper::getDequantization(layer, 1ul);
+
+    if ((dequantization1.data.get_node() == nullptr) ||
+        (dequantization1.empty() && !is_type<opset1::Constant>(dequantization1.data.get_node_shared_ptr()) &&
+                                    !is_type<opset1::Constant>(dequantization2.data.get_node_shared_ptr()))) {
+        return false;
+    }
+
+    if ((dequantization2.data.get_node() == nullptr) ||
+        (dequantization2.empty() && !is_type<opset1::Constant>(dequantization2.data.get_node_shared_ptr()) &&
+                                    !is_type<opset1::Constant>(dequantization1.data.get_node_shared_ptr()))) {
+        return false;
+    }
+    return EltwiseBaseTransformation::canBeTransformed(context, layer);
+}
+
 } // namespace low_precision
 } // namespace pass
 } // namespace ngraph
diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
index 3f49e8b327c..879bd24dc04 100644
--- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp
+++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
@@ -1268,7 +1268,7 @@ bool NetworkHelper::isQuantizeSupported(const std::shared_ptr<opset1::FakeQuanti
     return QuantizationDetails::outputLayoutIsSupported(fakeQuantize) && QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels());
 }
 
-FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_ptr<Node>& node, const size_t parentIndex, const bool inPlace) {
+FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_ptr<const Node>& node, const size_t parentIndex, const bool inPlace) {
     auto getDataIndex = [](const std::shared_ptr<ngraph::Node>& node) {
         if (is_type<opset1::Constant>(node->get_input_node_ptr(1))) {
             return 0ul;
@@ -1285,7 +1285,7 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt
         return 1ul;
     };
 
-    Output<Node> dataNode = inPlace ? node->output(0) : node->input_value(parentIndex);
+    Output<Node> dataNode = inPlace ? std::const_pointer_cast<Node>(node)->output(0) : node->input_value(parentIndex);
 
     const std::shared_ptr<ngraph::opset1::Multiply> multiply = as_type_ptr<ngraph::opset1::Multiply>(dataNode.get_node_shared_ptr());
     std::shared_ptr<opset1::Constant> multiplyConstant;
@@ -1440,22 +1440,20 @@ std::shared_ptr<opset1::Constant> NetworkHelper::normalizeDequantizationShape(co
     return normalizedConstant;
 }
 
-FakeQuantizeDequantizationValues NetworkHelper::createEmptyValues(const FakeQuantizeDequantization& dequantization) {
-    std::shared_ptr<Node> parent = dequantization.convert ? dequantization.convert : dequantization.data.get_node_shared_ptr();
+FakeQuantizeDequantizationValues NetworkHelper::createEmptyValues(const FakeQuantizeDequantization& dequantization, const element::Type precision) {
+    const std::shared_ptr<Node> multiplyConstant = dequantization.multiply ?
+        dequantization.multiplyConstant->get_element_type() != precision ?
+            foldConvert(dequantization.multiplyConstant, precision) :
+            dequantization.multiplyConstant :
+        std::make_shared<opset1::Constant>(precision, Shape({}), std::vector<float>({ 1.f }));
 
-    std::shared_ptr<Node> multiply1Const = dequantization.multiply ?
-        dequantization.multiply->get_input_node_shared_ptr(1)->clone_with_new_inputs({}) :
-        std::make_shared<opset1::Constant>(parent->get_output_element_type(0), Shape({}), std::vector<float>({ 1.f }));
+    const std::shared_ptr<Node> subtractConstant = dequantization.subtract ?
+        dequantization.subtractConstant->get_element_type() != precision ?
+            foldConvert(dequantization.subtractConstant, precision) :
+            dequantization.subtractConstant :
+        std::make_shared<opset1::Constant>(precision, Shape({}), std::vector<float>({ 0.f }));
 
-    std::shared_ptr<Node> subtract1Const = dequantization.subtract ?
-        (dequantization.subtractConvert == nullptr ?
-            dequantization.subtractConstant->clone_with_new_inputs({}) :
-            foldConvert(dequantization.subtractConstant, dequantization.subtractConvert->get_element_type())) :
-        std::make_shared<opset1::Constant>(parent->get_output_element_type(0), Shape({}), std::vector<float>({ 0.f }));
-
-    subtract1Const->set_output_type(0, multiply1Const->get_output_element_type(0), subtract1Const->get_output_partial_shape(0));
-
-    return FakeQuantizeDequantizationValues(subtract1Const, multiply1Const);
+    return FakeQuantizeDequantizationValues(subtractConstant, multiplyConstant);
 }
 
 bool NetworkHelper::isZeroConst(const std::shared_ptr<Node>& node) {
diff --git a/inference-engine/src/low_precision_transformations/src/reshape.cpp b/inference-engine/src/low_precision_transformations/src/reshape.cpp
index f478928537e..b94e62320e4 100644
--- a/inference-engine/src/low_precision_transformations/src/reshape.cpp
+++ b/inference-engine/src/low_precision_transformations/src/reshape.cpp
@@ -38,131 +38,80 @@ ReshapeTransformation::ReshapeTransformation(const Params& params) : LayerTransf
 }
 
 void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& reshape) {
+    // Reshape dequantization operation Constant.
+    //    1. Calculate result dequantization Constant shape for broadcast based on original dequantization Constant shape and Reshape output.
+    //    For example: dequantization shape {1, 3, 1, 1}, output Reshape shape {1, 12, 3, 3}, result for broadcast: {1, 3, 4, 1},
+    //    where '4' calculated for temporary broadcast before reshape.
+    //    2. Broadcast dequantization Constant, if channels are changed
+    //    3. Reshape and replace
+    auto replaceConstant = [](const std::shared_ptr<opset1::Reshape>& reshape, const std::shared_ptr<opset1::Constant>& originalConstant) {
+        // reshape for element-wise constant is not required
+        auto constantShape = originalConstant->get_shape();
+        if (shape_size(constantShape) == 1ul) {
+            if (!constantShape.empty()) {
+                const auto newConstant = NetworkHelper::toScalar(originalConstant);
+                replace_node(originalConstant, newConstant);
+            }
+            return;
+        }
+
+        auto const reshapeInputRank = reshape->get_input_partial_shape(0).rank();
+        assert(reshapeInputRank.is_static());
+        if (constantShape.size() > 1ul) {
+            while (constantShape.size() < static_cast<size_t>(reshapeInputRank.get_length())) {
+                constantShape.insert(constantShape.begin(), 1ul);
+            }
+        }
+
+        const auto reshapeOutputPShape = reshape->output(0).get_partial_shape();
+        const auto reshapeOutputRank = reshapeOutputPShape.rank();
+        assert(reshapeOutputRank.is_static());
+        assert(reshapeOutputRank.get_length() >= 2);
+        assert(reshapeOutputPShape[1].is_static());
+        assert(static_cast<size_t>(reshapeOutputPShape[1].get_length()) >= constantShape[1]);
+        assert(reshapeOutputPShape[1].get_length() % constantShape[1] == 0);
+        const size_t dimensionsToBroadcast = reshapeOutputPShape[1].get_length() / constantShape[1];
+        if (dimensionsToBroadcast == 0ul) {
+            return;
+        }
+
+        Shape newOperationConstantBroadcastedShape = originalConstant->output(0).get_shape();
+        // add dimensions to broadcast values
+        if (newOperationConstantBroadcastedShape.size() == 2ul) {
+            newOperationConstantBroadcastedShape.push_back(dimensionsToBroadcast);
+        } else {
+            newOperationConstantBroadcastedShape[2] = dimensionsToBroadcast;
+        }
+        const std::shared_ptr<Node> broadcastedConstant = fold<opset1::Broadcast>(
+            originalConstant,
+            std::make_shared<opset1::Constant>(
+                element::i32,
+                Shape({ newOperationConstantBroadcastedShape.size() }),
+                newOperationConstantBroadcastedShape));
+
+        std::vector<int> newReshapeConstValues(reshapeOutputRank.get_length(), 1ul);
+        newReshapeConstValues[1] = reshapeOutputPShape[1].get_length();
+        const std::shared_ptr<opset1::Constant> newReshapeConstant = std::make_shared<opset1::Constant>(
+            element::i32,
+            Shape({ newReshapeConstValues.size() }),
+            newReshapeConstValues);
+
+        const std::shared_ptr<Node> resultConstant = fold<opset1::Reshape>(
+            broadcastedConstant,
+            newReshapeConstant,
+            reshape->get_special_zero());
+
+        replace_node(originalConstant, resultConstant);
+    };
+
     const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(reshape, 0);
-    if (dequantization.multiplyConstant->get_shape().size() > 1ul) {
-        // Reshape Subtract or Multiply operation Constant.
-        //    1. modify reshape parameters to avoid reshape by spatial dimensions
-        //    2. broadcast element-wise constant if channels are changed
-        //    3. reshape element-wise constant with modified reshape parameters
-        auto replaceConstant = [](const std::shared_ptr<opset1::Reshape>& reshape, const std::shared_ptr<Node>& op) {
-            const size_t constantIndex = as_type<ngraph::opset1::Constant>(op->get_input_node_ptr(1)) ? 1 : 0;
-            const auto originalConstant = as_type_ptr<opset1::Constant>(op->get_input_node_shared_ptr(constantIndex));
-            const auto constantShape = originalConstant->get_shape();
 
-            // reshape for element-wise constant is not required
-            if (shape_size(constantShape) == 1ul) {
-                if (constantShape.size() > 1ul) {
-                    const Shape newConstShape = Shape(reshape->get_output_partial_shape(0).rank().get_length(), 1ul);
-                    const auto newConstant = opset1::Constant::create(
-                        originalConstant->get_element_type(), newConstShape, originalConstant->cast_vector<float>());
-                    replace_node(op->get_input_node_shared_ptr(constantIndex), newConstant);
-                }
+    if (dequantization.subtract != nullptr) {
+        replaceConstant(reshape, dequantization.subtractConstant);
+    }
 
-                return;
-            }
-
-            // simple broadcast operation Constant shape to shape on activations
-            auto newOperationConstantShape = constantShape;
-            auto const reshapeInputPShape = reshape->get_input_partial_shape(0);
-            PartialShape newOperationConstantBroadcastedShape(reshapeInputPShape);
-            newOperationConstantBroadcastedShape[0] = 1ul;
-
-            if ((reshapeInputPShape.rank().get_length() - newOperationConstantShape.size()) == 1ul) {
-                newOperationConstantShape.insert(newOperationConstantShape.begin(), 1ul);
-            }
-            const std::shared_ptr<opset1::Constant> newOperationConstant = std::make_shared<opset1::Constant>(
-                op->input(constantIndex).get_element_type(),
-                newOperationConstantShape,
-                originalConstant->cast_vector<float>());
-
-            // reshape -1 value handling
-            auto getOverallValue = [](const Shape& shape, const std::vector<int>& reshapeValues, const bool specialZero) -> size_t {
-                size_t overallValue = shape_size(shape);
-                for (size_t i = 0; i < reshapeValues.size(); ++i) {
-                    auto reshapeValue = reshapeValues[i];
-                    if ((reshapeValue == 1ul) || (reshapeValue == -1) || ((reshapeValue == 0ul) && !specialZero)) {
-                        continue;
-                    }
-
-                    if ((reshapeValue == 0ul) && specialZero) {
-                        reshapeValue = shape[i];
-                    }
-
-                    overallValue = overallValue / reshapeValue;
-                }
-                return overallValue;
-            };
-
-            // modify reshape constant for element-wise constant reshape
-            // element-wise constant doesn't have spatial dimensions, as result we should remove spatial dimensions from reshape parameters
-            const std::vector<int> reshapeConstValues = as_type_ptr<opset1::Constant>(reshape->get_input_node_shared_ptr(1))->cast_vector<int>();
-
-            size_t overallValue = 0;
-            for (size_t i = 0; i < reshapeConstValues.size(); ++i) {
-                if (reshapeConstValues[i] == -1) {
-                    overallValue = getOverallValue(
-                        reshapeInputPShape.to_shape(),
-                        reshapeConstValues,
-                        as_type_ptr<opset1::Reshape>(reshape)->get_special_zero());
-                    break;
-                }
-            }
-
-            std::vector<int> newReshapeConstValues(reshapeConstValues);
-            for (int i = static_cast<int>(newReshapeConstValues.size() - 1); i >= 0; --i) {
-                if (static_cast<int64_t>(newOperationConstantShape.size()) <= i) {
-                    // new dimension was added
-                    newReshapeConstValues[i] = 1;
-                } else if (newOperationConstantShape[i] == 1ul) {
-                    // keep the same
-                    newReshapeConstValues[i] = 1;
-                } else if (newReshapeConstValues[i] == -1) {
-                    // modified reshape parameters are different, but value instead '-1' has to be equal as original reshape
-                    newReshapeConstValues[i] = overallValue;
-                }
-            }
-
-            const std::shared_ptr<opset1::Constant> newReshapeConstant = std::make_shared<opset1::Constant>(
-                reshape->input(1).get_element_type(),
-                Shape({ newReshapeConstValues.size() }),
-                newReshapeConstValues);
-
-            // if channels are different then broadcast spatial dimensions to reshape channels correctly
-            // limitation which has to be covered by canBeTransformed:
-            //    1. spatial dimensions have to be absent or equal to 1 after reshape
-            //    2. only second dimension can be changed
-
-            const bool shouldBroadcast = (shape_size(newReshapeConstValues) != 1ul) && (reshapeConstValues[1] != 0) &&
-                (((reshapeConstValues[1] != -1) &&
-                    (static_cast<int64_t>(newOperationConstantShape[1]) != reshapeConstValues[1])) ||
-                ((reshapeConstValues[1] == -1) &&
-                    (newOperationConstantShape[1] != overallValue)));
-
-            const std::shared_ptr<Node> broadcastedConstant = shouldBroadcast ?
-                fold<opset1::Broadcast>(
-                    newOperationConstant,
-                    std::make_shared<opset1::Constant>(
-                        element::i32,
-                        Shape({static_cast<size_t>(newOperationConstantBroadcastedShape.rank().get_length())}),
-                        // TODO: investigate behaviour
-                        newOperationConstantBroadcastedShape.to_shape())) :
-                newOperationConstant;
-
-            const std::shared_ptr<Node> resultConstant = fold<opset1::Reshape>(
-                broadcastedConstant,
-                newReshapeConstant,
-                reshape->get_special_zero());
-
-            replace_node(op->get_input_node_shared_ptr(constantIndex), resultConstant);
-        };
-
-        if (dequantization.subtract != nullptr) {
-            replaceConstant(reshape, dequantization.subtract);
-        }
-
-        if (dequantization.multiply != nullptr) {
-            replaceConstant(reshape, dequantization.multiply);
-        }
+    if (dequantization.multiply != nullptr) {
+        replaceConstant(reshape, dequantization.multiplyConstant);
     }
 }
 
@@ -186,7 +135,7 @@ bool ReshapeTransformation::isPrecisionPreserved(std::shared_ptr<Node> op) const
     return true;
 }
 
-size_t getLastNotBroadcastedChannel(const Shape& shape) {
+size_t getLastNotBroadcastedDimension(const Shape& shape) {
     for (int i = static_cast<int>(shape.size()) - 1; i >= 0; --i) {
         if (shape[i] != 1ul) {
             return i;
@@ -195,7 +144,7 @@ size_t getLastNotBroadcastedChannel(const Shape& shape) {
     return 0;
 }
 
-size_t getFirstChangedChannel(const PartialShape& shape1, const PartialShape& shape2) {
+size_t getFirstChangedDimension(const PartialShape& shape1, const PartialShape& shape2) {
     const size_t minSize = std::min(shape1.rank().get_length(), shape2.rank().get_length());
     size_t i = 0;
     for (; i < minSize; ++i) {
@@ -216,11 +165,15 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex
         return false;
     }
 
-    // TODO: LPT: to support current flow: #58269
-    //if (((dequantization.subtractConstant != nullptr) && NetworkHelper::isScalarLike(dequantization.subtractConstant)) ||
-    //    ((dequantization.multiplyConstant != nullptr) && NetworkHelper::isScalarLike(dequantization.multiplyConstant))) {
-    //    return true;
-    //}
+    if (((dequantization.subtract == nullptr) || NetworkHelper::isScalarLike(dequantization.subtractConstant)) &&
+        ((dequantization.multiply == nullptr) || NetworkHelper::isScalarLike(dequantization.multiplyConstant))) {
+        return true;
+    }
+
+    const PartialShape outputPShape = op->get_output_partial_shape(0);
+    if (outputPShape[1].is_dynamic()) {
+        return false;
+    }
 
     const Shape subtractShape = dequantization.subtract == nullptr ? Shape{} : dequantization.subtractConstant->get_shape();
     Shape subtractShapeWithBatch = subtractShape;
@@ -245,26 +198,23 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex
         multiplyShapeWithBatch.insert(multiplyShapeWithBatch.begin(), 1ul);
     }
 
-    const PartialShape outputPShape = op->get_output_partial_shape(0);
-    // if we have per-channel dq, dynamic shape, and "-1" reshape value - don't transform
-    if (outputPShape.is_dynamic() && (shape_size(subtractShape) > 1ul || shape_size(multiplyShape) > 1ul)) {
-        const auto reshapeConstant = as_type_ptr<opset1::Constant>(op->get_input_node_shared_ptr(1))->cast_vector<int>();
-        if (std::any_of(reshapeConstant.cbegin(), reshapeConstant.cend(), [](const int value) { return value == -1; })) {
-            return false;
-        }
+    const size_t outputChannel = static_cast<size_t>(outputPShape[1].get_length());
+    if (!subtractShapeWithBatch.empty() && (outputChannel < subtractShapeWithBatch[1])) {
+        return false;
+    }
+    if (!multiplyShapeWithBatch.empty() && (outputChannel < multiplyShapeWithBatch[1])) {
+        return false;
+    }
+
+    if (outputPShape.is_static() &&
+        ((!subtractShapeWithBatch.empty() && ((outputChannel % subtractShapeWithBatch[1]) != 0)) ||
+        (!multiplyShapeWithBatch.empty() && (outputChannel % multiplyShapeWithBatch[1] != 0)))) {
+        return false;
     }
 
     return canBeTransformed(subtractShapeWithBatch, multiplyShapeWithBatch, inputPShape, outputPShape);
 }
 
-size_t getChannelVolume(const PartialShape& shape) {
-    size_t volume = 1ul;
-    for (int i = 2; i < shape.rank().get_length(); ++i) {
-        volume = volume * shape[i].get_length();
-    }
-    return volume;
-}
-
 bool ReshapeTransformation::canBeTransformed(
     const ngraph::Shape& subtractShape,
     const ngraph::Shape& multiplyShape,
@@ -277,68 +227,15 @@ bool ReshapeTransformation::canBeTransformed(
         return false;
     }
 
-    // TODO: story 38439
-    if ((inputRank == 4ul) && (outputRank == 2ul)) {
-        auto checkSpatialDimensions = [](const Shape& dequantizationConstShape) {
-            for (size_t i = (dequantizationConstShape.size() - 2); i < dequantizationConstShape.size(); ++i) {
-                if (dequantizationConstShape[i] != 1ul) {
-                    return false;
-                }
-            }
-            return true;
-        };
+    const size_t lastNotBroadcastedDimension = std::max(getLastNotBroadcastedDimension(subtractShape), getLastNotBroadcastedDimension(multiplyShape));
+    const size_t firstChangedDimension = getFirstChangedDimension(inputShape, outputShape);
+    // LPT supports channel on the second dimension natively <= reshape transformation supports more shapes for this case
+    if ((lastNotBroadcastedDimension == 1ul) && (firstChangedDimension == 1ul)) {
+        return true;
+    }
 
-        if (((subtractShape.size() >= 3ul) && (!checkSpatialDimensions(subtractShape))) ||
-            ((multiplyShape.size() >= 3ul) && (!checkSpatialDimensions(multiplyShape)))) {
-            return false;
-        }
-
-        if (inputRank > 1ul) {
-            if (inputShape[1].is_dynamic()) {
-                return false;
-            }
-        } else {
-            if (inputShape[0].is_dynamic()) {
-                return false;
-            }
-        }
-
-        if (outputRank > 1ul) {
-            if (outputShape[1].is_dynamic()) {
-                return false;
-            }
-        } else {
-            if (outputShape[0].is_dynamic()) {
-                return false;
-            }
-        }
-
-        // custom validation for Layout::NCHW => Layout::NC
-        const size_t inputChannelsCount = inputRank > 1ul ? inputShape[1].get_length() : inputShape[0].get_length();
-        const size_t outputChannelsCount = outputRank > 1ul ? outputShape[1].get_length() : outputShape[0].get_length();
-        for (size_t i = 2; i < inputRank; ++i) {
-            if (inputShape[i].is_dynamic()) {
-                return false;
-            }
-        }
-
-        if ((inputShape[0] != outputShape[0]) || ((inputChannelsCount * getChannelVolume(inputShape)) != outputChannelsCount)) {
-            return false;
-        }
-    } else {
-        if (ngraph::shape_size(subtractShape) > 1 || ngraph::shape_size(multiplyShape) > 1) {
-            for (size_t i = 0; i < 2ul; ++i) {
-                if (inputShape[i] != outputShape[i]) {
-                    return false;
-                }
-            }
-        }
-
-        const size_t lastNotBroadcastedChannel = std::max(getLastNotBroadcastedChannel(subtractShape), getLastNotBroadcastedChannel(multiplyShape));
-        const size_t firstChangedChannel = getFirstChangedChannel(inputShape, outputShape);
-        if (lastNotBroadcastedChannel >= firstChangedChannel) {
-            return false;
-        }
+    if (lastNotBroadcastedDimension >= firstChangedDimension) {
+        return false;
     }
 
     return true;
diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp
index e20fed518e4..cb786a8af36 100644
--- a/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp
+++ b/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp
@@ -161,8 +161,8 @@ void VariantWrapper<IntervalsAlignmentAttributePtr>::merge(
 
         resultSharedValue->preferablePrecisions.insert(sharedValue->preferablePrecisions.begin(), sharedValue->preferablePrecisions.end());
 
-        const auto resultSize = abs(resultSharedValue->minInterval.high - resultSharedValue->minInterval.low);
-        const auto size = abs(sharedValue->minInterval.high - sharedValue->minInterval.low);
+        const auto resultSize = std::abs(resultSharedValue->minInterval.high - resultSharedValue->minInterval.low);
+        const auto size = std::abs(sharedValue->minInterval.high - sharedValue->minInterval.low);
         if (resultSize > size) {
             resultSharedValue->minInterval = sharedValue->minInterval;
 
diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp
new file mode 100644
index 00000000000..6041e1f3f7b
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp
@@ -0,0 +1,247 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cpu_blocked_memory_desc.h"
+#include "mkldnn_memory.h"
+#include "utils/cpu_utils.hpp"
+
+using namespace MKLDNNPlugin;
+
+BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector<size_t>& dims) : MemoryDesc(dims, Blocked) , precision(prc) {
+    order.resize(dims.size());
+    std::iota(order.begin(), order.end(), 0);
+    blockedDims = dims;
+    offsetPadding = 0;
+    offsetPaddingToData.resize(dims.size(), 0);
+    strides.resize(order.size());
+    strides[strides.size() - 1] = 1;
+    for (size_t i = 2; i <= order.size(); i++) {
+        strides[strides.size() - i] = strides[strides.size() - (i - 1)] * blockedDims[blockedDims.size() - (i - 1)];
+    }
+}
+
+BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector<size_t>& dims, const std::vector<size_t>& blockedDims,
+                  const std::vector<size_t>& order, size_t offsetPadding, const std::vector<size_t>& offsetPaddingToData,
+                  const std::vector<size_t>& strides) : MemoryDesc(dims, Blocked), precision(prc) {
+    if (std::any_of(order.begin(), order.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
+        IE_THROW() << "BlockedMemoryDesc do not support undefined order.";
+    }
+
+    if (std::any_of(blockedDims.begin() + dims.size(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
+        IE_THROW() << "BlockedMemoryDesc doesn't support undefined blockedDims.";
+    }
+
+    this->order = order;
+    this->blockedDims = blockedDims;
+    this->offsetPadding = offsetPadding;
+
+    if (offsetPaddingToData.empty() && !order.empty()) {
+        this->offsetPaddingToData.resize(order.size());
+        this->offsetPaddingToData[order.size() - 1] = 0;
+        for (size_t i = 2; i <= order.size(); i++) {
+            this->offsetPaddingToData[order.size() - i] = 0;
+        }
+    } else {
+        this->offsetPaddingToData = offsetPaddingToData;
+    }
+
+    if (strides.empty() && !order.empty()) {
+        if (std::any_of(this->blockedDims.begin(), this->blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
+            this->strides.resize(order.size(), Shape::UNDEFINED_DIM);
+        } else {
+            this->strides.resize(order.size());
+            this->strides[order.size() - 1] = 1;
+            for (size_t i = 2; i <= order.size(); i++) {
+                this->strides[order.size() - i] = this->strides[order.size() - (i - 1)] * this->blockedDims[blockedDims.size() - (i - 1)];
+            }
+        }
+    } else {
+        this->strides = strides;
+    }
+
+    if (!everyone_is(this->order.size(), this->blockedDims.size(), this->offsetPaddingToData.size(), this->strides.size())) {
+        IE_THROW() << "Order, blocked dims, offset padding to data and strides must have equals size";
+    }
+}
+
+bool BlockedMemoryDesc::isDefined() const {
+    bool defined = true;
+    defined = defined && std::none_of(blockedDims.cbegin(), blockedDims.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; });
+    defined = defined && std::none_of(strides.cbegin(), strides.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; });
+    defined = defined && std::none_of(order.cbegin(), order.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; });
+    defined = defined && std::none_of(offsetPaddingToData.cbegin(), offsetPaddingToData.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; });
+    defined = defined && offsetPadding != Shape::UNDEFINED_DIM;
+
+    return defined;
+}
+
+bool BlockedMemoryDesc::isCompatible(const MemoryDesc& rhs) const {
+    const MemoryDesc* pRhs = &rhs;
+    if (auto blockingDesc = dynamic_cast<const BlockedMemoryDesc*>(pRhs)) {
+        return isCompatible(*blockingDesc);
+    } else if (auto mkldnnDesc = dynamic_cast<const MKLDNNMemoryDesc*>(pRhs)) {
+        return mkldnnDesc->isCompatible(*this);
+    } else {
+        return false;
+    }
+}
+
+bool BlockedMemoryDesc::isCompatible(const BlockedMemoryDesc& rhs) const {
+    if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision())
+        return false;
+
+    if (!dimsEqualWeak(this->getBlockDims(), rhs.getBlockDims())) {
+        return false;
+    }
+
+    if (!dimsEqualWeak(this->getOffsetPaddingToData(), rhs.getOffsetPaddingToData())) {
+        return false;
+    }
+
+    // this check needed to avoid inserting unnecessary reorders if the memory is used in place and the batch size is equal to 1
+    size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 :
+            Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1
+    if (!dimsEqualWeak(this->getStrides(), rhs.getStrides(), skipAxis)) {
+        return false;
+    }
+
+    if (!dimsEqualWeak(this->getOrder(), rhs.getOrder())) {
+        return false;
+    }
+
+    return dimsEqualWeak(this->getOffsetPadding(), rhs.getOffsetPadding());
+}
+
+bool BlockedMemoryDesc::isCompatible(const MKLDNNMemoryDesc& rhs) const {
+    return rhs.isCompatible(*this);
+}
+
+size_t BlockedMemoryDesc::getMemSizeImp() const {
+    int64_t e_size = getOffsetPadding() + 1;  // size in bytes (from begin of data to last element)
+    for (int j = 0; j < getBlockDims().size(); j++)
+        e_size += (getBlockDims()[j] - 1) * getStrides()[j];
+
+
+    e_size *= getPrecision() == InferenceEngine::Precision::BIN ? 1 : getPrecision().size();
+
+    return e_size;
+}
+
+size_t BlockedMemoryDesc::getOffset(const InferenceEngine::SizeVector& v) const {
+    InferenceEngine::SizeVector off_v = v;
+
+    size_t n_blocked_dims = order.size();
+    if (blockedDims.size() != n_blocked_dims || strides.size() != n_blocked_dims) {
+        IE_THROW() << "Cannot calculate offset. Incorrect primitive descriptor!";
+    }
+    InferenceEngine::SizeVector blockedShift(n_blocked_dims);
+    for (size_t i = 1; i <= n_blocked_dims; i++) {
+        blockedShift[n_blocked_dims - i] = off_v[order[n_blocked_dims - i]] % blockedDims[n_blocked_dims - i];
+        off_v[order[n_blocked_dims - i]] /= blockedDims[n_blocked_dims - i];
+    }
+    size_t offset = getOffsetPadding();
+    for (size_t d = 0; d < n_blocked_dims; ++d) {
+        const size_t p = blockedShift[d] + getOffsetPaddingToData()[d];
+        offset += p * strides[d];
+    }
+    return offset;
+}
+
+size_t BlockedMemoryDesc::getElementOffset(size_t elemNumber) const {
+    // TODO [DS]: rewrite to support dynamic shapes
+    auto& dims = shape.getStaticDims();
+    size_t n_dims = dims.size();
+    InferenceEngine::SizeVector pos(n_dims);
+    for (size_t rd = 1; rd <= n_dims; ++rd) {
+        const size_t d = n_dims - rd;
+        const size_t cur_dim = dims[d];
+        pos[d] = elemNumber % cur_dim;
+        elemNumber /= cur_dim;
+    }
+    return getOffset(pos);
+}
+
+bool BlockedMemoryDesc::hasLayoutType(LayoutType layoutType) const {
+    switch (layoutType) {
+        case LayoutType::ncsp:
+            return isPlainFormat();
+        case LayoutType::nspc:
+            return isTailCFormat();
+        case LayoutType::nCsp8c:
+            return isBlockedCFormat(8);
+        case LayoutType::nCsp16c:
+            return isBlockedCFormat(16);
+        default:
+            return false;
+    }
+}
+
+bool BlockedMemoryDesc::isPlainFormat() const {
+    if (shape.getRank() != order.size()) {
+        return false;
+    }
+    for (size_t i = 0; i < order.size(); ++i) {
+        if (order[i] != i) {
+            return false;
+        }
+    }
+    return true;
+}
+
+bool BlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const {
+    if ((order.size() - shape.getRank()) != 1) {
+        return false;
+    }
+    for (size_t i = 0; i < order.size() - 1; ++i) {
+        if (order[i] != i) {
+            return false;
+        }
+    }
+    if (order.back() != 1) {
+        return false;
+    }
+    if (blockedDims.back() != blk_size) {
+        return false;
+    }
+    return true;
+}
+
+bool BlockedMemoryDesc::isTailCFormat() const {
+    if (shape.getRank() < 3) {
+        return false;
+    }
+    if (shape.getRank() != order.size()) {
+        return false;
+    }
+    if (!std::is_sorted(order.begin(), --order.end())) {
+        return false;
+    }
+    if (order.back() != 1) {
+        return false;
+    }
+    return true;
+}
+
+std::string BlockedMemoryDesc::serializeFormat() const {
+    std::stringstream result;
+    char startLetter = 'a';
+    std::unordered_map<size_t, size_t> mapAxisBlockSize;
+    for (size_t i = shape.getRank(); i < order.size(); ++i) {
+        mapAxisBlockSize.insert({order[i], blockedDims[i]});
+    }
+
+    for (size_t i = 0; i < shape.getRank(); ++i) {
+        char nextLetter = startLetter + order[i];
+        if (mapAxisBlockSize.count(i)) {
+            nextLetter = toupper(nextLetter);
+        }
+        result << nextLetter;
+    }
+
+    for (auto& item : mapAxisBlockSize) {
+        result << item.second << char(startLetter + item.first);
+    }
+
+    return result.str();
+}
diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h
new file mode 100644
index 00000000000..2c5b8a7d53c
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h
@@ -0,0 +1,100 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "cpu_memory_desc.h"
+
+namespace MKLDNNPlugin {
+
+class MKLDNNMemoryDesc;
+
+class BlockedMemoryDesc : public MemoryDesc {
+public:
+    BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector<size_t>& dims);
+
+    BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector<size_t>& dims, const std::vector<size_t>& blockedDims,
+                      const std::vector<size_t>& order, size_t offsetPadding = 0, const std::vector<size_t>& offsetPaddingToData = {},
+                      const std::vector<size_t>& strides = {});
+
+    MemoryDescPtr clone() const override {
+        return MKLDNNPlugin::make_unique<BlockedMemoryDesc>(*this);
+    }
+
+    bool isDefined() const override;
+
+    bool isCompatible(const MemoryDesc& rhs) const override;
+
+    bool isCompatible(const BlockedMemoryDesc& rhs) const;
+
+    bool isCompatible(const MKLDNNMemoryDesc& rhs) const;
+
+    InferenceEngine::Precision getPrecision() const override {
+        return precision;
+    }
+
+    void setPrecision(InferenceEngine::Precision prc) override {
+        precision = std::move(prc);
+    }
+
+    const std::vector<size_t>& getBlockDims() const {
+        return blockedDims;
+    }
+
+    /**
+     * @brief Returns the vector of order
+     *
+     * @return order
+     */
+    const std::vector<size_t>& getOrder() const {
+        return order;
+    }
+
+    /**
+     * @brief Returns the per-dimension offset vector
+     *
+     * @return offsets
+     */
+    const std::vector<size_t>& getOffsetPaddingToData() const {
+        return offsetPaddingToData;
+    }
+    /**
+     * @brief Returns the offset to the current memory block
+     *
+     * @return offset
+     */
+    size_t getOffsetPadding() const {
+        return offsetPadding;
+    }
+
+    /**
+     * @brief Returns strides for each dimension
+     *
+     * @return strides
+     */
+    const std::vector<size_t>& getStrides() const {
+        return strides;
+    }
+
+    bool hasLayoutType(LayoutType layoutType) const override;
+
+    std::string serializeFormat() const override;
+
+private:
+    size_t getElementOffset(size_t elemNumber) const override;
+    size_t getMemSizeImp() const override;
+    size_t getOffset(const InferenceEngine::SizeVector& v) const;
+    bool isPlainFormat() const;
+    bool isBlockedCFormat(size_t blk_size) const;
+    bool isTailCFormat() const;
+
+private:
+    InferenceEngine::Precision precision;
+    std::vector<size_t> blockedDims;
+    std::vector<size_t> strides;
+    std::vector<size_t> order;
+    std::vector<size_t> offsetPaddingToData;
+    size_t offsetPadding;
+};
+} // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h b/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h
new file mode 100644
index 00000000000..31d2b4b2091
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h
@@ -0,0 +1,110 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <ie_precision.hpp>
+#include "cpu_shape.h"
+#include "utils/general_utils.h"
+
+namespace MKLDNNPlugin {
+
+enum MemoryDescType {
+    Blocked,
+    Mkldnn
+};
+
+enum class LayoutType : unsigned {
+    nspc,      // general per channels format
+    ncsp,      // general planar
+    nCsp8c,    // general channels blocked by 8
+    nCsp16c    // general channels blocked by 16
+};
+
+class MemoryDesc {
+public:
+    MemoryDescType getType() const {
+        return type;
+    }
+
+    const Shape& getShape() const {
+        return shape;
+    }
+
+    virtual ~MemoryDesc() = default;
+
+    virtual InferenceEngine::Precision getPrecision() const = 0;
+
+    virtual void setPrecision(InferenceEngine::Precision prc) = 0;
+
+    virtual std::unique_ptr<MemoryDesc> clone() const = 0;
+
+    virtual bool isCompatible(const MemoryDesc& rhs) const = 0;
+
+    // Checks that all dimensions, offsets, strides, etc are defined (!= UNDEFINED_DIM)
+    virtual bool isDefined() const = 0;
+
+    virtual bool hasLayoutType(LayoutType layoutType) const = 0;
+
+    virtual std::string serializeFormat() const = 0;
+
+    /**
+     * @brief Get minimal required memory size in bytes.
+     * @return return minimal required memory size in bytes or UNDEFINED_SIZE in case undefined descriptor
+     */
+    size_t getCurrentSize() const {
+        size_t retVal = UNDEFINED_SIZE;
+        if (isDefined()) {
+            retVal = getMemSizeImp();
+        }
+        return retVal;
+    }
+
+    template <typename T,
+            typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
+            typename std::enable_if<std::is_base_of<MemoryDesc, T>::value, int>::type = 0>
+    T* as() {
+        T* casted = dynamic_cast<T*>(this);
+        if (!casted)
+            IE_THROW() << "Cannot dynamically cast MemoryDesc";
+        return casted;
+    }
+
+    template <typename T,
+            typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
+            typename std::enable_if<std::is_base_of<MemoryDesc, T>::value, int>::type = 0>
+    const T* as() const {
+        const T* casted = dynamic_cast<const T*>(this);
+        if (!casted)
+            IE_THROW() << "Cannot dynamically cast MemoryDesc";
+        return casted;
+    }
+
+    static constexpr size_t UNDEFINED_SIZE = std::numeric_limits<size_t>::max();
+
+protected:
+    MemoryDesc(const Shape& shape, MemoryDescType type)
+            : shape(shape), type(type) {}
+
+    MemoryDesc(const std::vector<size_t>& dims, MemoryDescType type)
+            : shape(dims), type(type) {}
+
+    virtual size_t getMemSizeImp() const = 0;
+
+    // Get offset to the n'th element. Returns physical index of the element by the logical one considering padding, layout, blocking etc.
+    virtual size_t getElementOffset(size_t elemNumber) const = 0;
+
+    MemoryDescType type;
+    Shape shape;
+
+    friend class BlobDumper;
+    // WA: optimizedNspc2Ncsp used getElementOffset inside implementation
+    friend class MKLDNNSplitNode;
+};
+
+using MemoryDescPtr = std::unique_ptr<MemoryDesc>;
+using MemoryDescConstPtr = std::unique_ptr<const MemoryDesc>;
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp
new file mode 100644
index 00000000000..cc04db7f26f
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp
@@ -0,0 +1,395 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cpu_memory_desc.h"
+#include "cpu_memory_desc_utils.h"
+#include "mkldnn_memory.h"
+#include "utils/general_utils.h"
+#include "utils/cpu_utils.hpp"
+#include <limits>
+#include <vector>
+#include <numeric>
+#include <blob_factory.hpp>
+
+using namespace mkldnn;
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+namespace MKLDNNPlugin {
+
+/**
+ * Convert to  BlockedDescriptor
+ *
+ * mkl:  IOhw_4i16o4i    dims {32, 64, 128, 128}
+ *   strides               // the order of outer dims is encoded here
+ *   inner_blks   4 16 4
+ *   inner_idxs   1  0 1
+ *
+ * IE tensor desc has more expressive ability. Any oneDNN blocked tensor can be covreted.
+ * How to convert into IE representation:
+ *    0. Detect a new_outer_order of outer_dims via descending strides.
+ *    1. IE strides :  concatenate strides in new_outer_order and inner strides.
+ *    2. IE dims    :  concatenate outer dims in new_outer_order with auto padding and inner blocks
+ *    3. IE order   :  concatenate new_outer_order and inner_idxs
+ */
+BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc) {
+    mkldnn::memory::desc desc = inpDesc;
+    const auto dims = desc.dims();
+
+    if (desc.data.format_kind != dnnl_blocked)
+        IE_THROW() << "Conversion is not possible";
+
+    const auto &blk_desc = desc.data.format_desc.blocking;
+
+    const size_t outer_ndims = dims.size();
+    const size_t inner_ndims = blk_desc.inner_nblks;
+    const size_t total_ndims = outer_ndims + inner_ndims;
+
+    // strides of inner dims. In case of 4i16o4i will be {64, 4, 1}
+    std::vector<size_t> inner_strides(inner_ndims, 1);
+    for (size_t i = 1; i < blk_desc.inner_nblks; i++) {
+        inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i];
+    }
+
+    // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1}
+    std::vector<size_t> total_block_per_dim(outer_ndims, 1);
+    for (int i = 0; i < inner_ndims; i++) {
+        total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i];
+    }
+    std::vector<size_t> outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims);
+    for (size_t i = 0; i < outer_block_dims.size(); i++) {
+        outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]);
+    }
+
+    // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3}
+    std::vector<size_t> outer_order(outer_ndims);
+    std::iota(outer_order.begin(), outer_order.end(), 0);
+    std::sort(outer_order.begin(), outer_order.end(),
+              [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) {
+                  return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) ||
+                         (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]);
+              });
+
+    // IE blocked order
+    // [new_outer_order] U [inner_idxs]
+    SizeVector ie_blk_order(total_ndims, 0);
+    std::copy(outer_order.begin(), outer_order.end(), ie_blk_order.begin());
+    std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, ie_blk_order.begin() + dims.size());
+
+    // IE blocked strides
+    // [outer_strides via new_outer_order] U [inner_strides]
+    SizeVector ie_blk_strides(total_ndims, 0);
+    std::copy(inner_strides.rbegin(), inner_strides.rend(), ie_blk_strides.rbegin());
+    std::transform(outer_order.begin(), outer_order.end(), ie_blk_strides.begin(),
+                   [&] (size_t i) { return blk_desc.strides[i]; });
+
+    // IE blocked dims
+    // [dims via new_outer_order with auto pad] U [inner_blk_dims]
+    SizeVector ie_blk_dims(total_ndims, 0);
+    std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks,
+              ie_blk_dims.end() - blk_desc.inner_nblks);
+    std::transform(outer_order.begin(), outer_order.end(), ie_blk_dims.begin(),
+                   [&] (size_t i) { return outer_block_dims[i]; });
+
+    // IE offset padded to data. Same as for oneDNN
+    SizeVector ie_blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims};
+    size_t ie_blk_offset0 = desc.data.offset0;
+
+    // TODO: The tensor desc implementation allow to specify offset_to_data for inner blocked dims.
+    //       Which is not obvious behavior. It required offset_to_data.size == total_ndims, so will
+    //       fill it with zero.
+    ie_blk_offset_to_data.insert(ie_blk_offset_to_data.end(), inner_ndims, 0);
+
+    BlockedMemoryDesc res(MKLDNNMemory::convertToIePrec(desc.data_type()), SizeVector {begin(dims), end(dims)}, ie_blk_dims,
+                          ie_blk_order, ie_blk_offset0, ie_blk_offset_to_data, ie_blk_strides);
+    return res;
+}
+
+
+InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDesc& desc) {
+    if (auto blockingDesc = dynamic_cast<const BlockedMemoryDesc*>(&desc)) {
+        return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(),
+                                           {blockingDesc->getBlockDims(), blockingDesc->getOrder(), blockingDesc->getOffsetPadding(),
+                                            blockingDesc->getOffsetPaddingToData(), blockingDesc->getStrides()});
+    } else if (auto mkldnnDesc = dynamic_cast<const MKLDNNMemoryDesc*>(&desc)) {
+        auto blockingDesc = convertToBlockedDescriptor(*mkldnnDesc);
+        return InferenceEngine::TensorDesc(blockingDesc.getPrecision(), blockingDesc.getShape().getStaticDims(),
+                                           {blockingDesc.getBlockDims(), blockingDesc.getOrder(), blockingDesc.getOffsetPadding(),
+                                            blockingDesc.getOffsetPaddingToData(), blockingDesc.getStrides()});
+    }
+
+    IE_THROW() << "Cannot convert MemoryDesc to InferenceEngine::TensorDesc";
+
+    return InferenceEngine::TensorDesc();
+}
+
+MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const MemoryDesc& desc) {
+    if (MemoryDescType::Blocked == desc.getType()) {
+        return convertToMKLDNNMemoryDesc(*(desc.as<BlockedMemoryDesc>()));
+    } else if (MemoryDescType::Mkldnn == desc.getType()) {
+        return *(desc.as<MKLDNNMemoryDesc>());
+    } else {
+        IE_THROW() << "Cannot convert MemoryDesc to MKLDNNMemoryDesc";
+    }
+}
+
+MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc) {
+    dnnl_memory_desc_t mkldnnDesc;
+
+    // scalar case
+    if (desc.getShape().getRank() == 0) {
+        mkldnn::memory::desc convertedDesc;
+        convertedDesc.data.format_kind = dnnl_blocked;
+        convertedDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(desc.getPrecision()));
+        convertedDesc.data.ndims = 1;
+        convertedDesc.data.dims[0] = 1;
+        convertedDesc.data.padded_dims[0] = 1;
+        convertedDesc.data.format_desc.blocking.strides[0] = 1;
+        convertedDesc.data.padded_offsets[0] = 0;
+        convertedDesc.data.offset0 = desc.getOffsetPadding();
+        return MKLDNNMemoryDesc(convertedDesc);
+    }
+
+    auto dims = desc.getShape().getStaticDims();
+
+    auto ie_blkdDims = desc.getBlockDims();
+    auto ie_order = desc.getOrder();
+    auto ie_offsetsToData = desc.getOffsetPaddingToData();
+    auto ie_strides = desc.getStrides();
+
+    size_t outer_ndims = dims.size();
+    size_t inner_ndims = ie_order.size() - dims.size();
+
+    bool is_descending_strides = true;
+    for (int i = 1; i < ie_strides.size(); i++) {
+        is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]);
+    }
+
+    // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims
+    //       and may be we can achieve correct "descending strides" form which allow conversion.
+    if (!is_descending_strides)
+        IE_THROW() << "Unsupported case for conversion";
+
+    std::vector<size_t> outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension
+    for (size_t i = 0; i < outer_ndims; i++) {
+        outer_order[ie_order[i]] = i;
+    }
+    bool outer_is_correct_permutation_of_n =
+            std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end();
+
+    if (!outer_is_correct_permutation_of_n)
+        IE_THROW() << "Unsupported case for conversion";
+
+    bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1);  // stride 1 - is dense case, 0 - broad casted
+    for (int i = outer_ndims; i < ie_strides.size() - 1; i++) {
+        inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]);
+    }
+
+    if (!inner_block_are_dense)
+        IE_THROW() << "Unsupported case for conversion";
+
+    bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(),
+                                                 [](size_t pad) { return  pad == 0; });
+
+    if (!inner_pad_offsets_is_zero)
+        IE_THROW() << "Unsupported case for conversion";
+
+    // Fill general memory desc fields
+    mkldnnDesc.format_kind = dnnl_blocked;
+    mkldnnDesc.extra.flags = 0;
+    mkldnnDesc.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(desc.getPrecision()));
+    mkldnnDesc.ndims = dims.size();
+    mkldnnDesc.offset0 = desc.getOffsetPadding();
+    std::copy(dims.begin(), dims.end(), mkldnnDesc.dims);
+    std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, mkldnnDesc.padded_offsets);
+    std::fill(mkldnnDesc.padded_dims, mkldnnDesc.padded_dims + outer_ndims, 1);
+    for (size_t i = 0; i < ie_order.size(); i++) {
+        auto idx = ie_order[i];
+        mkldnnDesc.padded_dims[idx] *= ie_blkdDims[i];
+    }
+
+    // Fill blocking desc
+    auto &dnn_blk_desc = mkldnnDesc.format_desc.blocking;
+    dnn_blk_desc.inner_nblks = inner_ndims;
+    std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks);
+    std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs);
+    for (size_t i = 0; i < outer_ndims; i++) {
+        dnn_blk_desc.strides[i] = ie_strides[outer_order[i]];
+    }
+
+    return MKLDNNMemoryDesc(mkldnnDesc);
+}
+
+
+/**
+ * Construct from IE::TensorDesc
+ * @param tDesc
+ *
+ * IE  IOhw_4i16o4i   dims(N) = {32, 64, 128, 128}
+ *   blockedDims  {4, 2, 128, 128, 4, 16, 4}                      // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides.
+ *   strides      {8388608, 4194304,  32768, 256, 64,  4, 1}      // strides for blockedDims, growing sequence
+ *   order        {1, 0,   2,   3, 1,  0, 1}                      // matching to original dims
+ *
+ *   All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims)
+ *
+ *   Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of
+ *   real dims spliting.
+ *      for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4]
+ *      but not [0]<=>[4] because it breacke spliting original dims into internal blocked dims
+ *   Normalization of representation: Make strides growing but keep layout same as original. Not all
+ *   layout allow us to meet normalize form of tensor desc.
+ *
+ *   Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N]
+ */
+MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const InferenceEngine::TensorDesc& tDesc) {
+    mkldnn::memory::desc mkldnnDesc({}, mkldnn::memory::data_type::undef, mkldnn::memory::format_tag::undef);
+    auto dims = tDesc.getDims();
+
+    // TODO: implicit conversion of dims is no good...
+    if (tDesc.getLayout() == Layout::SCALAR) {
+        mkldnnDesc.data.format_kind = dnnl_blocked;
+        mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision()));
+        mkldnnDesc.data.ndims = 1;
+        mkldnnDesc.data.dims[0] = 1;
+        mkldnnDesc.data.padded_dims[0] = 1;
+        mkldnnDesc.data.format_desc.blocking.strides[0] = 1;
+        mkldnnDesc.data.padded_offsets[0] = 0;
+        mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding();
+        return MKLDNNMemoryDesc(mkldnnDesc);
+    }
+
+    if (tDesc.getLayout() == Layout::ANY) {
+        mkldnnDesc.data.format_kind = dnnl_format_kind_any;
+        mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision()));
+        mkldnnDesc.data.ndims = dims.size();
+        std::copy(dims.begin(), dims.end(), mkldnnDesc.data.dims);
+        std::copy(dims.begin(), dims.end(), mkldnnDesc.data.padded_dims);
+        mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding();
+        std::fill(mkldnnDesc.data.padded_offsets, mkldnnDesc.data.padded_offsets + dims.size(), 0);
+        return MKLDNNMemoryDesc(mkldnnDesc);
+    }
+
+    auto ie_blkdDims = tDesc.getBlockingDesc().getBlockDims();
+    auto ie_order = tDesc.getBlockingDesc().getOrder();
+    auto ie_offsetsToData = tDesc.getBlockingDesc().getOffsetPaddingToData();
+    auto ie_strides = tDesc.getBlockingDesc().getStrides();
+
+    size_t outer_ndims = dims.size();
+    size_t inner_ndims = ie_order.size() - dims.size();
+
+    bool is_descending_strides = true;
+    for (int i = 1; i < ie_strides.size(); i++) {
+        is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]);
+    }
+
+    // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims
+    //       and may be we can achieve correct "descending strides" form which allow conversion.
+    if (!is_descending_strides)
+        IE_THROW() << "Unsupported case for conversion";
+
+    std::vector<size_t> outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension
+    for (size_t i = 0; i < outer_ndims; i++) {
+        outer_order[ie_order[i]] = i;
+    }
+    bool outer_is_correct_permutation_of_n =
+            std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end();
+
+    if (!outer_is_correct_permutation_of_n)
+        IE_THROW() << "Unsupported case for conversion";
+
+    bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1);  // stride 1 - is dense case, 0 - broad casted
+    for (int i = outer_ndims; i < ie_strides.size() - 1; i++) {
+        inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]);
+    }
+
+    if (!inner_block_are_dense)
+        IE_THROW() << "Unsupported case for conversion";
+
+    bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(),
+                                                 [](size_t pad) { return  pad == 0; });
+
+    if (!inner_pad_offsets_is_zero)
+        IE_THROW() << "Unsupported case for conversion";
+
+    // Fill general memory desc fields
+    mkldnnDesc.data.format_kind = dnnl_blocked;
+    mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision()));
+    mkldnnDesc.data.ndims = dims.size();
+    mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding();
+    std::copy(dims.begin(), dims.end(), mkldnnDesc.data.dims);
+    std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, mkldnnDesc.data.padded_offsets);
+    std::fill(mkldnnDesc.data.padded_dims, mkldnnDesc.data.padded_dims + outer_ndims, 1);
+    for (size_t i = 0; i < ie_order.size(); i++) {
+        auto idx = ie_order[i];
+        mkldnnDesc.data.padded_dims[idx] *= ie_blkdDims[i];
+    }
+
+    // Fill blocking desc
+    auto &dnn_blk_desc = mkldnnDesc.data.format_desc.blocking;
+    dnn_blk_desc.inner_nblks = inner_ndims;
+    std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks);
+    std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs);
+    for (size_t i = 0; i < outer_ndims; i++) {
+        dnn_blk_desc.strides[i] = ie_strides[outer_order[i]];
+    }
+
+    return MKLDNNMemoryDesc(mkldnnDesc);
+}
+
+BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MemoryDesc &desc) {
+    if (desc.getType() == MemoryDescType::Blocked) {
+        return *(desc.as<BlockedMemoryDesc>());
+    } else if (desc.getType() == MemoryDescType::Mkldnn) {
+        return MemoryDescUtils::convertToBlockedDescriptor(*(desc.as<MKLDNNMemoryDesc>()));
+    } else {
+        IE_THROW() << "Cannot convert to blocked memory descriptor. Unsupported memory desc type";
+    }
+}
+
+MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const MKLDNNMemoryDesc& desc) {
+    if (desc.getFormatKind() != dnnl_format_kind_t::dnnl_blocked)
+        IE_THROW() << "applyUndefinedOffset doesn't support not dnnl_blocked MKLDNNMemoryDesc";
+
+    mkldnn::memory::desc retDesc = desc;
+    retDesc.data.offset0 = Shape::UNDEFINED_DIM;
+    return MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(retDesc);
+}
+
+MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const BlockedMemoryDesc &desc) {
+    std::vector<size_t> strides;
+    std::vector<size_t> offsetPaddingToData;
+
+    strides.resize(desc.getBlockDims().size(), Shape::UNDEFINED_DIM);
+    offsetPaddingToData.resize(desc.getBlockDims().size(), 0);
+    size_t offsetPadding = Shape::UNDEFINED_DIM;
+
+    return MKLDNNPlugin::make_unique<BlockedMemoryDesc>(desc.getPrecision(), desc.getShape().getDims(), desc.getBlockDims(),
+                                                        desc.getOrder(), offsetPadding, offsetPaddingToData, strides);
+}
+
+MemoryDescPtr MemoryDescUtils::resetOffset(const MemoryDesc* desc) {
+    if (MemoryDescType::Blocked == desc->getType()) {
+        auto blockedDesc = desc->as<BlockedMemoryDesc>();
+        return MKLDNNPlugin::make_unique<BlockedMemoryDesc>(blockedDesc->getPrecision(), blockedDesc->getShape().getDims(),
+                                              blockedDesc->getBlockDims(), blockedDesc->getOrder());
+    } else if (MemoryDescType::Mkldnn == desc->getType()) {
+        auto mkldnnDesc = desc->as<MKLDNNMemoryDesc>();
+        mkldnn::memory::desc retDesc = *mkldnnDesc;
+        retDesc.data.offset0 = 0;
+        return MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(retDesc);
+    } else {
+        IE_THROW() << "resetOffset support Blocked and Mkldnn descpriptors only";
+    }
+}
+
+InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) {
+    // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor
+    auto& memDesc = mem.GetDesc();
+    InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc);
+
+    desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc());
+    return MKLDNNPlugin::isEmptyTensorDesc(desc) ? make_blob_with_precision(desc) : make_blob_with_precision(desc, mem.GetData());
+}
+
+} // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h
new file mode 100644
index 00000000000..5cc6b0fc103
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h
@@ -0,0 +1,88 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_layouts.h>
+#include <ie_blob.h>
+
+namespace MKLDNNPlugin {
+class MKLDNNMemoryDesc;
+class BlockedMemoryDesc;
+class MKLDNNMemory;
+
+class MemoryDescUtils {
+public:
+    /**
+     * @brief Converts MemoryDesc to InferenceEngine::TensorDesc
+     * @param desc MemoryDesc to be converted
+     * @return converted InferenceEngine::TensorDesc
+     */
+    static InferenceEngine::TensorDesc convertToTensorDesc(const MemoryDesc& desc);
+
+    /**
+     * @brief Converts MemoryDesc to MKLDNNMemoryDesc
+     * @param desc MemoryDesc to be converted
+     * @return converted MKLDNNMemoryDesc
+     */
+    static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const MemoryDesc& desc);
+
+    /**
+     * @brief Converts BlockedMemoryDesc to MKLDNNMemoryDesc
+     * @param desc BlockedMemoryDesc to be converted
+     * @return converted MKLDNNMemoryDesc
+     */
+    static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc);
+
+    /**
+     * @brief Converts InferenceEngine::TensorDesc to MKLDNNMemoryDesc
+     * @param desc InferenceEngine::TensorDesc to be converted
+     * @return converted MKLDNNMemoryDesc
+     */
+    static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const InferenceEngine::TensorDesc& desc);
+
+    /**
+     * @brief Converts MemoryDesc to BlockedMemoryDesc
+     * @param desc MemoryDesc to be converted
+     * @return converted BlockedMemoryDesc
+     */
+    static BlockedMemoryDesc convertToBlockedDescriptor(const MemoryDesc& desc);
+
+    /**
+     * @brief Converts MKLDNNMemoryDesc to BlockedMemoryDesc
+     * @param desc MKLDNNMemoryDesc to be converted
+     * @return converted BlockedMemoryDesc
+     */
+    static BlockedMemoryDesc convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc);
+
+    /**
+     * @brief Creates MKLDNNMemoryDesc with offset0 of UNDEFINED_DIM size
+     * @param desc modifiable MKLDNNMemoryDesc
+     * @return pointer to MKLDNNMemoryDesc
+     */
+    static MemoryDescPtr applyUndefinedOffset(const MKLDNNMemoryDesc& desc);
+
+    /**
+     * @brief Creates BlockedMemoryDesc with offsetPadding, strides of UNDEFINED_DIM size and offsetPaddingToData of 0 size
+     * @param desc modifiable BlockedMemoryDesc
+     * @return pointer to BlockedMemoryDesc
+     */
+    static MemoryDescPtr applyUndefinedOffset(const BlockedMemoryDesc& desc);
+
+    /**
+     * @brief Creates MemoryDesc with offsetPadding of 0 size
+     * @param desc modifiable MemoryDesc
+     * @return pointer to MemoryDesc
+     */
+    static MemoryDescPtr resetOffset(const MemoryDesc* desc);
+
+    /**
+     * @brief Creates InferenceEngine::Blob from MKLDNNMemory
+     * @param desc MKLDNNMemory from which will be created InferenceEngine::Blob
+     * @return pointer to InferenceEngine::Blob
+     */
+    static InferenceEngine::Blob::Ptr interpretAsBlob(const MKLDNNMemory& mem);
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/cpu_shape.h b/inference-engine/src/mkldnn_plugin/cpu_shape.h
new file mode 100644
index 00000000000..fd063c2dc18
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/cpu_shape.h
@@ -0,0 +1,159 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "perf_count.h"
+#include <vector>
+#include <utility>
+#include <ie_common.h>
+#include <ngraph/partial_shape.hpp>
+#include "mkldnn_dims.h"
+
+namespace MKLDNNPlugin {
+
+class Shape {
+public:
+    Shape() = default;
+
+    explicit Shape(const ngraph::PartialShape& shape) {
+        minDims = shape.get_min_shape();
+        maxDims = shape.get_max_shape();
+        type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic;
+
+        initDims();
+    }
+
+    explicit Shape(const InferenceEngine::SizeVector& shape) {
+        minDims = shape;
+        maxDims = shape;
+        type = ShapeType::Static;
+
+        initDims();
+    }
+
+    /**
+     * @brief 
+     * for static shape
+     * maxDims = [2, 3, 4, 5]
+     * minDims = [2, 3, 4, 5]
+     * dims = [2, 3, 4, 5]
+     * @return return lower bound of shape = [2, 3, 4, 5]
+     * for dynamic shape
+     * maxDims = [6, 6, 6, 6]
+     * minDims = [1, 1, 1, 1]
+     * dims = [UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM]
+     * @return return lower bound of shape = [1, 1, 1, 1]
+     */
+    const std::vector<size_t>& getMinDims() const {
+        return minDims;
+    }
+
+    /**
+     * @brief 
+     * for static shape
+     * maxDims = [2, 3, 4, 5]
+     * minDims = [2, 3, 4, 5]
+     * dims = [2, 3, 4, 5]
+     * @return return upper bound of shape = [2, 3, 4, 5]
+     * for dynamic shape
+     * maxDims = [6, 6, 6, 6]
+     * minDims = [1, 1, 1, 1]
+     * dims = [UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM]
+     * @return return upper bound of shape = [6, 6, 6, 6]
+     */
+    const std::vector<size_t>& getMaxDims() const {
+        return maxDims;
+    }
+
+    /**
+     * @brief return defined shape or throw exception for dynamic case 
+     * @return return shape
+     */
+    const std::vector<size_t>& getStaticDims() const {
+        if (type != ShapeType::Static) {
+            IE_THROW() << "Cannot get dims for non static shape";
+        }
+
+        return minDims;
+    }
+
+    /**
+     * @brief 
+     * for static shape
+     * maxDims = [2, 3, 4, 5]
+     * minDims = [2, 3, 4, 5]
+     * dims = [2, 3, 4, 5]
+     * @return return defined shape = [2, 3, 4, 5]
+     * for dynamic shape
+     * maxDims = [2, 3, 6, 6]
+     * minDims = [2, 3, 1, 1]
+     * dims = [2, 3, UNDEFINED_DIM, UNDEFINED_DIM]
+     * @return return shape with defined and undefined dims = [2, 3, UNDEFINED_DIM, UNDEFINED_DIM]
+     */
+    const std::vector<size_t>& getDims() const {
+        return dims;
+    }
+    bool isStatic() const {
+        return type == ShapeType::Static;
+    }
+
+    size_t getRank() const {
+        return minDims.size();
+    }
+
+    size_t getElementsCount() const {
+        if (type != ShapeType::Static) {
+            IE_THROW() << "Cannot get elements count for non static shape";
+        }
+
+        size_t size = 1;
+
+        for (int i = 0; i < minDims.size(); i++) {
+            size *= minDims[i];
+        }
+
+        return size;
+    }
+
+    ngraph::PartialShape toPartialShape() const {
+        std::vector<ngraph::Dimension> nGraphDims;
+        nGraphDims.reserve(minDims.size());
+        for (int i = 0; i < minDims.size(); i++) {
+            nGraphDims.emplace_back(minDims[i], maxDims[i]);
+        }
+        return ngraph::PartialShape(nGraphDims);
+    }
+
+    bool operator == (const Shape& rhs) const {
+        return minDims == rhs.minDims && maxDims == rhs.maxDims;
+    }
+
+    bool operator != (const Shape& rhs) const {
+        return !(*this == rhs);
+    }
+
+    enum : size_t {
+        UNDEFINED_DIM = 0xffffffffffffffff
+    };
+
+private:
+    void initDims() {
+        dims.resize(minDims.size());
+        for (int i = 0; i < minDims.size(); i++) {
+            dims[i] = minDims[i] == maxDims[i] ? minDims[i] : UNDEFINED_DIM;
+        }
+    }
+
+    enum class ShapeType {
+        Static,
+        Dynamic
+    } type {ShapeType::Static};
+
+    std::vector<size_t> minDims;
+    std::vector<size_t> maxDims;
+    std::vector<size_t> dims;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/cpu_types.h b/inference-engine/src/mkldnn_plugin/cpu_types.h
index e5bc8af0b5c..7c820c4db50 100644
--- a/inference-engine/src/mkldnn_plugin/cpu_types.h
+++ b/inference-engine/src/mkldnn_plugin/cpu_types.h
@@ -16,6 +16,7 @@ enum Type {
     Deconvolution,
     Lrn,
     Pooling,
+    AdaptivePooling,
     FullyConnected,
     Softmax,
     Split,
@@ -85,7 +86,9 @@ enum Type {
     ExperimentalDetectronPriorGridGenerator,
     ExperimentalDetectronGenerateProposalsSingleImage,
     ExtractImagePatches,
-    NonMaxSuppression
+    NonMaxSuppression,
+    MatrixNms,
+    MulticlassNms
 };
 
 enum Algorithm {
@@ -95,6 +98,10 @@ enum Algorithm {
     PoolingMax,
     PoolingAvg,
 
+    // Adaptive pooling algorithms
+    AdaptivePoolingMax,
+    AdaptivePoolingAvg,
+
     // Convolution algorithms
     ConvolutionCommon,
     ConvolutionGrouped,
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
index 1415dc1ae95..34261b1ac87 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
@@ -6,7 +6,6 @@
 #include "mkldnn_node.h"
 #include "mkldnn_extension_utils.h"
 #include <blob_factory.hpp>
-#include "utils/cpu_utils.hpp"
 
 using namespace mkldnn;
 namespace MKLDNNPlugin {
@@ -29,7 +28,7 @@ const MKLDNNNodePtr MKLDNNEdge::getChild() const {
 }
 
 bool MKLDNNEdge::isUseExternalMemory() const {
-    return externalMemoryPtr;
+    return useExternalMemory;
 }
 
 bool MKLDNNEdge::isDropped() const {
@@ -77,7 +76,7 @@ bool MKLDNNEdge::needReorder() {
     int inNumber = getInputNum();
     bool in_place = inPlace();
     bool childCanChangeMem = childSPD->getConfig().outConfs.empty();
-    for (const auto conf : childSPD->getConfig().outConfs) {
+    for (const auto& conf : childSPD->getConfig().outConfs) {
         if (conf.inPlace == outNumber && outNumber >= 0)
             childCanChangeMem = true;
     }
@@ -89,7 +88,7 @@ bool MKLDNNEdge::needReorder() {
             int outNumber = edge->getOutputNum();
             if (childSPD->getConfig().outConfs.empty())
                 count++;
-            for (const auto conf : childSPD->getConfig().outConfs) {
+            for (const auto& conf : childSPD->getConfig().outConfs) {
                 if (conf.inPlace == outNumber)
                     count++;
             }
@@ -114,7 +113,7 @@ bool MKLDNNEdge::needReorder() {
             outNumber >= 0 && outNumber < childSPD->getConfig().inConfs.size() && childSPD->getConfig().inConfs[outNumber].inPlace >= 0)
             canBeInPlaceConflicts = true;
     }
-    return canBeInPlaceConflicts || !MKLDNNExtensionUtils::initTensorsAreEqual(getInputDesc(), getOutputDesc());
+    return canBeInPlaceConflicts || !getInputDesc().isCompatible(getOutputDesc());
 }
 
 void MKLDNNEdge::reuse(MKLDNNMemoryPtr ptr) {
@@ -124,35 +123,6 @@ void MKLDNNEdge::reuse(MKLDNNMemoryPtr ptr) {
     status = Status::Allocated;
 }
 
-const InferenceEngine::TensorDesc& MKLDNNEdge::getInputDescRO() const {
-    return inputDesc;
-}
-
-InferenceEngine::TensorDesc MKLDNNEdge::getInputDesc() {
-    if (inputDesc.getLayout() == InferenceEngine::Layout::ANY) {
-        inputDesc = getSpecifiedInputDesc({});
-    }
-    return inputDesc;
-}
-
-const InferenceEngine::TensorDesc& MKLDNNEdge::getOutputDescRO() const {
-    return outputDesc;
-}
-
-InferenceEngine::TensorDesc MKLDNNEdge::getOutputDesc() {
-    if (outputDesc.getLayout() == InferenceEngine::Layout::ANY) {
-        outputDesc = getSpecifiedOutputDesc({});
-    }
-    return outputDesc;
-}
-
-InferenceEngine::TensorDesc MKLDNNEdge::getDesc() {
-    if (!MKLDNNExtensionUtils::initTensorsAreEqual(getInputDesc(), getOutputDesc()))
-        IE_THROW() << "Cannot get descriptor for edge: " << getParent()->getName() << "->"
-                           << getChild()->getName();
-    return getInputDesc();
-}
-
 int MKLDNNEdge::getInputNum() const {
     return parent_port;
 }
@@ -168,45 +138,29 @@ void MKLDNNEdge::allocate(const void* mem_ptr) {
     if (memoryPtr)
         IE_THROW() << "Unexpected behaviour: status == NeedAllocation but memory is already allocated.";
 
-    auto inputDesc = getInputDesc();
-    auto outputDesc = getOutputDesc();
-    if (!MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, inputDesc) ||
-            (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 &&
-            (inputDesc.getPrecision() != outputDesc.getPrecision() ||
-             inputDesc.getBlockingDesc() != outputDesc.getBlockingDesc())))
-        IE_THROW() << "Cannot allocate memory. Nodes have primitive descriptors with different formats.";
-    if (inputDesc.getLayout() == InferenceEngine::Layout::ANY)
-        IE_THROW() << "Cannot get input descriptor!";
+    auto& inputDesc = getInputDesc();
+    auto& outputDesc = getOutputDesc();
+    if (!inputDesc.isDefined() || !outputDesc.isDefined())
+        IE_THROW() << "Cannot allocate memory for undefined descriptors.";
+    if (!inputDesc.isCompatible(outputDesc))
+        IE_THROW() << "Cannot allocate memory for incompatible descriptors.";
 
     auto parentPtr = getParent();
     memoryPtr.reset(new MKLDNNMemory(parentPtr->getEngine()));
-    memoryPtr->Create(MKLDNNMemoryDesc(inputDesc), mem_ptr, false);  // no pads zeroing
+
+    memoryPtr->Create(inputDesc, mem_ptr, false);  // no pads zeroing
     status = Status::Allocated;
 }
 
-std::string MKLDNNEdge::name() {
-    auto tensorDescToStr = [](InferenceEngine::TensorDesc const & desc) {
-        std::string name = desc.getPrecision().name();
-
-        auto blockingDesc = desc.getBlockingDesc();
-        auto dims = blockingDesc.getBlockDims();
-
-        if (!dims.empty()) {
-            name += "[";
-            for (size_t i = 1; i < dims.size(); ++i) {
-                name += std::to_string(dims[i - 1]) + ",";
-            }
-            name += std::to_string(dims.back()) + "]";
-        }
-
-        return name;
-    };
-
+std::string MKLDNNEdge::name() const {
     auto parentPtr = getParent();
     auto childPtr = getChild();
 
-    return parentPtr->getName() + std::to_string(parent_port) + tensorDescToStr(getInputDesc())
-            + "<->" + childPtr->getName() + std::to_string(child_port);
+    std::stringstream result;
+
+    result << parentPtr->getName() << " port " << parent_port << " <-> " << childPtr->getName() << " port " << child_port;
+
+    return  result.str();
 }
 
 void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) {
@@ -221,7 +175,7 @@ void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) {
 
         auto ptr = weightsCache->findOrCreate(name(), alloc, false);
         memoryPtr = *ptr;
-        externalMemoryPtr = true;
+        useExternalMemory = true;
         status = Status::Allocated;
     } else {
         allocate();
@@ -242,10 +196,13 @@ void MKLDNNEdge::changeStatus(MKLDNNEdge::Status state) {
     status = state;
 }
 
-const MKLDNNDims& MKLDNNEdge::getDims() {
-    if (!dims.ndims()) {
-        MKLDNNDims outDims;
-        MKLDNNDims inDims;
+// TODO [DS]: remove while DynamicShapes migration
+// TODO [DS]: How should we validate shape compatibility?
+// TODO [DS]: Why do we allow uninitialized shape?
+const Shape& MKLDNNEdge::getShape() {
+    if (!shape.getRank()) {
+        Shape inShape;
+        Shape outShape;
         auto childPtr = getChild();
         auto parentPtr = getParent();
 
@@ -254,8 +211,8 @@ const MKLDNNDims& MKLDNNEdge::getDims() {
             IE_THROW() << "Error cannot find input data for " << child.lock()->getName()
                                << " from " << parent.lock()->getName();
         }
-        if (inNum < childPtr->inDims.size()) {
-            outDims = childPtr->inDims[inNum];
+        if (inNum < childPtr->inputShapes.size()) {
+            outShape = childPtr->inputShapes[inNum];
         }
 
         int outNum = getInputNum();
@@ -263,84 +220,34 @@ const MKLDNNDims& MKLDNNEdge::getDims() {
             IE_THROW() << "Error cannot find output data for " << parent.lock()->getName()
                                << " to " << child.lock()->getName();
         }
-        if (outNum >= parentPtr->outDims.size())
+        if (outNum >= parentPtr->outputShapes.size())
             outNum = 0;
-        if (outNum < parentPtr->outDims.size()) {
-            inDims = parentPtr->outDims[outNum];
+        if (outNum < parentPtr->outputShapes.size()) {
+            inShape = parentPtr->outputShapes[outNum];
         }
 
-        if (inDims.ndims() && outDims.ndims() && inDims.ndims() != outDims.ndims() && inDims.size() != outDims.size())
+        if (inShape.getRank() && outShape.getRank() && inShape.getRank() != outShape.getRank() && inShape.getElementsCount() != outShape.getElementsCount())
             IE_THROW() << "Nodes " << getParent()->getName() << " and " << getChild()->getName()
                                << " have incompatible dimensions!";
 
-        if (outDims.ndims() != 0) {
-            dims = outDims;
-        } else if (inDims.ndims() != 0) {
-            dims = inDims;
+        if (outShape.getRank() != 0) {
+            shape = outShape;
+        } else if (inShape.getRank() != 0) {
+            shape = inShape;
         } else {
-            dims = MKLDNNDims({(size_t)1});
+            shape = Shape(InferenceEngine::SizeVector({1}));
         }
 
 
-        if (!(outDims.ndims() == 0 && inDims.ndims() == 0) && !dims.ndims())
+        if (!(outShape.getRank() == 0 && inShape.getRank() == 0) && !shape.getRank())
             IE_THROW() << "Cannot detect right dims for nodes " << getParent()->getName()
                                << " and " << getChild()->getName();
     }
-    return dims;
+
+    return shape;
 }
 
-bool MKLDNNEdge::nodeCanChangeDesc(const MKLDNNNodePtr &node) const {
-    PrimitiveDescInfo * selectedPd = node->getSelectedPrimitiveDescriptor();
-    if (selectedPd == nullptr)
-        IE_THROW() << "Primitive descriptor for node " << node->getName() << " is not selected.";
-
-    for (auto &inputDesc : selectedPd->getConfig().inConfs) {
-        if (inputDesc.desc.getLayout() != InferenceEngine::Layout::ANY) {
-            return true;
-        }
-    }
-
-    for (auto &outDesc : selectedPd->getConfig().outConfs) {
-        if (outDesc.desc.getLayout() != InferenceEngine::Layout::ANY) {
-            return true;
-        }
-    }
-
-    MKLDNNDims inputDims;
-    for (size_t i = 0; i < node->getParentEdges().size(); i++) {
-        if (inputDims.size() == 1 && inputDims.ndims() == 0) {
-            inputDims = node->getParentEdgeAt(i)->getDims();
-            continue;
-        }
-
-        if (inputDims.ndims() != node->getParentEdgeAt(i)->getDims().ndims()) {
-            return true;
-        }
-    }
-    for (size_t i = 0; i < node->getChildEdges().size(); i++) {
-        if (inputDims.size() == 1 && inputDims.ndims() == 0) {
-            inputDims = node->getChildEdgeAt(i)->getDims();
-            continue;
-        }
-
-        if (inputDims.ndims() != node->getChildEdgeAt(i)->getDims().ndims()) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-/// In we have {any, any, any} -> {any} or {any} -> {any, any, any} or {any} -> {any} it means that
-/// layer doesn't change memory format
-/// We don't support {any, any, nchw} -> {any}
-InferenceEngine::TensorDesc MKLDNNEdge::getSpecifiedInputDesc(std::map<memory::format_tag, size_t> formats, size_t enterCountUp, size_t enterCountDown) {
-    InferenceEngine::TensorDesc inDesc;
-
-    if (inputDesc.getLayout() != InferenceEngine::Layout::ANY) {
-        return inputDesc;
-    }
-
+const MemoryDesc& MKLDNNEdge::getInputDesc() const {
     auto parentPtr = getParent();
     if (parentPtr->getSelectedPrimitiveDescriptor() == nullptr)
         IE_THROW() << "Primitive descriptor for node " << parentPtr->getName() << " is not selected.";
@@ -349,248 +256,48 @@ InferenceEngine::TensorDesc MKLDNNEdge::getSpecifiedInputDesc(std::map<memory::f
     if (inputIdx < 0)
         IE_THROW() << "Edge cannot be found for node" << parentPtr->getName() << ".";
 
-    if (inputIdx >= parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size())
+    auto& outConfs = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs;
+    if (outConfs.empty())
+        IE_THROW() << "Node " << parentPtr->getName() << " has empty output config list.";
+
+    if (inputIdx >= outConfs.size())
         inputIdx = 0;
-    inDesc = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc;
 
-    if (inDesc.getLayout() != InferenceEngine::Layout::ANY) {
-        return inDesc;
-    }
-
-    bool isFormatChanging = nodeCanChangeDesc(parentPtr);
-
-    if (!isFormatChanging && inputIdx < parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() &&
-            parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc.getLayout() != InferenceEngine::Layout::ANY) {
-        inDesc = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc;
-        parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc = inDesc;
-        return inDesc;
-    }
-
-    for (size_t i = 0; i < parentPtr->getChildEdges().size(); i++) {
-        auto childEdge = parentPtr->getChildEdgeAt(i);
-        auto child = childEdge->getChild();
-        int childIdx = childEdge->getOutputNum();
-        if (!child->getSelectedPrimitiveDescriptor() || childIdx < 0 ||
-                childEdge->getDims().ndims() != getDims().ndims()) {
-            continue;
-        }
-        if (child->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() <= childIdx)
-            childIdx = 0;
-        memory::format_tag childInDesc = MKLDNNMemoryDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[childIdx].desc).getFormat();
-        if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) {
-            if (formats.find(childInDesc) == formats.end())
-                formats[childInDesc] = 1;
-            else
-                formats[childInDesc] += 1;
-            continue;
-        }
-        if (nodeCanChangeDesc(child))
-            continue;
-
-        if (enterCountUp < 2) {
-            childInDesc = MKLDNNMemoryDesc(childEdge->getSpecifiedOutputDesc(formats, enterCountUp, ++enterCountDown)).getFormat();
-            if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) {
-                if (formats.find(childInDesc) == formats.end())
-                    formats[childInDesc] = 1;
-                else
-                    formats[childInDesc] += 1;
-            }
-        }
-    }
-
-    if (!isFormatChanging) {
-        for (size_t i = 0; i < parentPtr->getParentEdges().size(); i++) {
-            auto parentEdge = parentPtr->getParentEdgeAt(i);
-            auto parent = parentEdge->getParent();
-            int parentIdx = parentEdge->getInputNum();
-            if (!parent->getSelectedPrimitiveDescriptor() || parentIdx < 0 ||
-                    parentEdge->getDims().ndims() != getDims().ndims()) {
-                continue;
-            }
-            if (parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() <= parentIdx) {
-                parentIdx = 0;
-            }
-            memory::format_tag parentOutDesc = MKLDNNMemoryDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[parentIdx].desc).getFormat();
-            if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) {
-                if (formats.find(parentOutDesc) == formats.end())
-                    formats[parentOutDesc] = 1;
-                else
-                    formats[parentOutDesc] += 1;
-                continue;
-            }
-            if (nodeCanChangeDesc(parent))
-                continue;
-
-            if (enterCountUp < 2) {
-                parentOutDesc = MKLDNNMemoryDesc(parentEdge->getSpecifiedInputDesc(formats, ++enterCountUp, enterCountDown)).getFormat();
-                if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) {
-                    if (formats.find(parentOutDesc) == formats.end())
-                        formats[parentOutDesc] = 1;
-                    else
-                        formats[parentOutDesc] += 1;
-                }
-            }
-        }
-    }
-
-    size_t maxFormatCount = 0;
-    memory::format_tag desc =  MKLDNNMemory::GetPlainFormat(getDims());
-    for (auto &it : formats) {
-        if (maxFormatCount < it.second && MKLDNNMemory::isConsistant(getDims(), it.first)) {
-            maxFormatCount = it.second;
-            desc = it.first;
-        }
-    }
-
-    auto inDataType = MKLDNNMemoryDesc(parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc).getDataType();
-    parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc = MKLDNNMemoryDesc(getDims(), inDataType, desc);
-    if (!isFormatChanging && inputIdx < parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() &&
-            parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc.getLayout() == InferenceEngine::Layout::ANY) {
-        parentPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[inputIdx].desc =
-                MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(getDims(), inDataType, desc));
-    }
-
-    return MKLDNNMemoryDesc(getDims(), inDataType, desc);
+    return *(outConfs[inputIdx].desc);
 }
 
-InferenceEngine::TensorDesc MKLDNNEdge::getSpecifiedOutputDesc(std::map<memory::format_tag, size_t> formats, size_t enterCountUp, size_t enterCountDown) {
-    InferenceEngine::TensorDesc outDesc;
-
-    if (outputDesc.getLayout() != InferenceEngine::Layout::ANY) {
-        return outputDesc;
-    }
-
+const MemoryDesc& MKLDNNEdge::getOutputDesc() const {
     auto childPtr = getChild();
-    auto parentPtr = getParent();
 
     if (childPtr->getSelectedPrimitiveDescriptor() == nullptr)
         IE_THROW() << "Primitive descriptor for node " << childPtr->getName() << " is not selected.";
 
     int outputIdx = getOutputNum();
-    int inputIdx = getInputNum();
     if (outputIdx < 0) {
         IE_THROW() << "Edge cannot be found for node" << childPtr->getName() << ".";
     }
-    if (outputIdx >= childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size())
+    auto& inConfs = childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs;
+    if (inConfs.empty())
+        IE_THROW() << "Node " << childPtr->getName() << " has empty input config list.";
+
+    if (outputIdx >= inConfs.size())
         outputIdx = 0;
-    outDesc = childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc;
 
-    if (outDesc.getLayout() != InferenceEngine::Layout::ANY) {
-        return outDesc;
-    }
+    return *(inConfs[outputIdx].desc);
+}
 
-    if (inputIdx >= parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size())
-        inputIdx = 0;
+const MemoryDesc& MKLDNNEdge::getDesc() const {
+    if (!getInputDesc().isCompatible(getOutputDesc()))
+        IE_THROW() << "Cannot get descriptor for edge: " << getParent()->getName() << "->"
+                   << getChild()->getName();
 
-    bool isFormatChanging = nodeCanChangeDesc(childPtr);
-
-    if ((!isFormatChanging && outputIdx < childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() &&
-            childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc.getLayout() != InferenceEngine::Layout::ANY) ||
-            (isFormatChanging && inputIdx >= 0 &&
-                    parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc.getLayout() != InferenceEngine::Layout::ANY)) {
-        auto inputDataType = childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc.getPrecision();
-        if (!isFormatChanging)
-            outDesc = childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc;
-        else
-            outDesc = parentPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[inputIdx].desc;
-        childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc = InferenceEngine::TensorDesc(inputDataType, getDims().ToSizeVector(),
-                                                    {outDesc.getBlockingDesc().getBlockDims(),
-                                                     outDesc.getBlockingDesc().getOrder()});
-        return childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc;
-    }
-
-    for (size_t i = 0; i < childPtr->getParentEdges().size(); i++) {
-        auto parentEdge = childPtr->getParentEdgeAt(i);
-        auto parent = parentEdge->getParent();
-        int parentIdx = parentEdge->getInputNum();
-        if (!parent->getSelectedPrimitiveDescriptor() || parentIdx < 0 ||
-                parentEdge->getDims().ndims() != getDims().ndims()) {
-            continue;
-        }
-        if (parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() <= parentIdx) {
-            parentIdx = 0;
-        }
-        memory::format_tag parentOutDesc = MKLDNNMemoryDesc(parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[parentIdx].desc).getFormat();
-        if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) {
-            if (formats.find(parentOutDesc) == formats.end())
-                formats[parentOutDesc] = 1;
-            else
-                formats[parentOutDesc] += 1;
-            continue;
-        }
-        if (nodeCanChangeDesc(parent))
-            continue;
-
-        if (enterCountDown < 2) {
-            parentOutDesc = MKLDNNMemoryDesc(parentEdge->getSpecifiedInputDesc(formats, ++enterCountUp, enterCountDown)).getFormat();
-            if (parentOutDesc != memory::format_tag::any && parentOutDesc != memory::format_tag::undef) {
-                if (formats.find(parentOutDesc) == formats.end())
-                    formats[parentOutDesc] = 1;
-                else
-                    formats[parentOutDesc] += 1;
-            }
-        }
-    }
-
-    if (!isFormatChanging) {
-        for (size_t i = 0; i < childPtr->getChildEdges().size(); i++) {
-            auto childEdge = childPtr->getChildEdgeAt(i);
-            auto child = childEdge->getChild();
-            int childIdx = childEdge->getOutputNum();
-            if (!child->getSelectedPrimitiveDescriptor() || childIdx < 0 ||
-                    childEdge->getDims().ndims() != getDims().ndims()) {
-                continue;
-            }
-            if (child->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size() <= childIdx) {
-                childIdx = 0;
-            }
-            memory::format_tag childInDesc = MKLDNNMemoryDesc(child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[childIdx].desc).getFormat();
-            if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) {
-                if (formats.find(childInDesc) == formats.end())
-                    formats[childInDesc] = 1;
-                else
-                    formats[childInDesc] += 1;
-                continue;
-            }
-            if (nodeCanChangeDesc(child))
-                continue;
-
-            if (enterCountDown < 2) {
-                childInDesc = MKLDNNMemoryDesc(childEdge->getSpecifiedOutputDesc(formats, enterCountUp, ++enterCountDown)).getFormat();
-                if (childInDesc != memory::format_tag::any && childInDesc != memory::format_tag::undef) {
-                    if (formats.find(childInDesc) == formats.end())
-                        formats[childInDesc] = 1;
-                    else
-                        formats[childInDesc] += 1;
-                }
-            }
-        }
-    }
-
-    size_t maxFormatCount = 0;
-    memory::format_tag format =  MKLDNNMemory::GetPlainFormat(getDims());
-    for (auto &it : formats) {
-        if (maxFormatCount < it.second && MKLDNNMemory::isConsistant(getDims(), it.first)) {
-            maxFormatCount = it.second;
-            format = it.first;
-        }
-    }
-
-    auto inDataType = MKLDNNMemoryDesc(childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[getOutputNum()].desc).getDataType();
-    childPtr->getSelectedPrimitiveDescriptor()->getConfig().inConfs[outputIdx].desc = MKLDNNMemoryDesc(getDims(), inDataType, format);
-    if (!isFormatChanging && outputIdx < childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() &&
-            childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc.getLayout() == InferenceEngine::Layout::ANY) {
-        childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc =
-                MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(getDims(), inDataType, format));
-    }
-
-    return childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc;
+    return getInputDesc();
 }
 
 const MKLDNNMemory &MKLDNNEdge::getMemory() {
     if (status == Status::NotAllocated) {
         memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine()));
-        memoryPtr->Create(MKLDNNMemoryDesc(getDesc()), getSharedEdge()->getMemoryPtr()->GetData());
+        memoryPtr->Create(getDesc(), getSharedEdge()->getMemoryPtr()->GetData());
         memoryFromEdge.reset();
         changeStatus(Status::Allocated);
     }
@@ -601,7 +308,7 @@ const MKLDNNMemory &MKLDNNEdge::getMemory() {
 MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() {
     if (status == Status::NotAllocated) {
         memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine()));
-        memoryPtr->Create(MKLDNNMemoryDesc(getDesc()), getSharedEdge()->getMemoryPtr()->GetData());
+        memoryPtr->Create(getDesc(), getSharedEdge()->getMemoryPtr()->GetData());
         memoryFromEdge.reset();
         changeStatus(Status::Allocated);
     }
@@ -609,19 +316,6 @@ MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() {
     return memoryPtr;
 }
 
-InferenceEngine::Blob::Ptr MKLDNNEdge::getBlob() {
-    if (!memoryPtr)
-        IE_THROW() << "Cannot get blob! Edge isn't initialized.";
-    InferenceEngine::TensorDesc desc = getDesc();
-
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
-        desc = InferenceEngine::TensorDesc(desc.getPrecision(), dims.ToSizeVector(), desc.getLayout());
-    else
-        desc = InferenceEngine::TensorDesc(desc.getPrecision(), dims.ToSizeVector(), desc.getBlockingDesc());
-
-    return isEmptyTensorDesc(desc) ? make_blob_with_precision(desc) : make_blob_with_precision(desc, memoryPtr->GetData());
-}
-
 void MKLDNNEdge::sharedMemFrom(const MKLDNNEdgePtr &edge) {
     memoryFromEdge = edge;
     status = Status::NotAllocated;
@@ -633,7 +327,7 @@ void MKLDNNEdge::validate() {
     getMemory();
     getParent();
     getChild();
-    getDims();
+    getShape();
 
     if (status != Status::Allocated) {
         IE_THROW() << "Error memory is not allocated!";
@@ -644,8 +338,7 @@ void MKLDNNEdge::validate() {
 MKLDNNEdgePtr MKLDNNEdge::getSharedEdge() const {
     auto memoryFromEdgePtr = memoryFromEdge.lock();
     if (!memoryFromEdgePtr) {
-        IE_THROW() << "Cannot get memory ptr for edge(" << getParent()->getName() << "->"
-                           << getChild()->getName() << "). The pointer on the edge with memory is empty!";
+        IE_THROW() << "Cannot get memory ptr for edge( " << name() << " ). The pointer on the edge with memory is empty!";
     }
     return memoryFromEdgePtr;
 }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
index 63e2a16414d..5e6f4d23542 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
@@ -5,11 +5,9 @@
 #pragma once
 
 #include <ie_blob.h>
-#include <memory>
-#include "mkldnn_memory.h"
-#include "mkldnn_dims.h"
+#include "cpu_shape.h"
+#include "cpu_memory_desc.h"
 #include "mkldnn_weights_cache.hpp"
-#include "mkldnn/ie_mkldnn.h"
 
 #include <map>
 #include <memory>
@@ -53,10 +51,7 @@ public:
     const std::shared_ptr<MKLDNNNode> getParent() const;
     const std::shared_ptr<MKLDNNNode> getChild() const;
 
-    InferenceEngine::Blob::Ptr getBlob();
-    InferenceEngine::TensorDesc getDesc();
-
-    const MKLDNNDims &getDims();
+    const Shape &getShape();
     const MKLDNNMemory& getMemory();
     MKLDNNMemoryPtr& getMemoryPtr();
 
@@ -73,34 +68,23 @@ public:
     MKLDNNEdgePtr getSharedEdge() const;
     MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const;
 
-    const InferenceEngine::TensorDesc& getInputDescRO() const;
-    const InferenceEngine::TensorDesc& getOutputDescRO() const;
-
 private:
-    std::string name();
+    std::string name() const;
 
     std::weak_ptr<MKLDNNNode> parent;
     std::weak_ptr<MKLDNNNode> child;
     int parent_port;
     int child_port;
 
-    bool externalMemoryPtr = false;
+    bool useExternalMemory = false;
     MKLDNNEdgeWeakPtr memoryFromEdge;
-    MKLDNNDims dims;
+    Shape shape;
     MKLDNNMemoryPtr memoryPtr;
     Status status = Status::Uninitialized;
 
-    InferenceEngine::TensorDesc getInputDesc();
-    InferenceEngine::TensorDesc getOutputDesc();
-    InferenceEngine::TensorDesc getSpecifiedInputDesc(std::map<mkldnn::memory::format_tag, size_t> formats,
-                                                      size_t enterCountUp = 1, size_t enterCountDown = 0);
-    InferenceEngine::TensorDesc getSpecifiedOutputDesc(std::map<mkldnn::memory::format_tag, size_t> formats,
-                                                       size_t enterCountUp = 0, size_t enterCountDown = 1);
-
-    InferenceEngine::TensorDesc inputDesc;
-    InferenceEngine::TensorDesc outputDesc;
-
-    bool nodeCanChangeDesc(const std::shared_ptr<MKLDNNPlugin::MKLDNNNode>& node) const;
+    const MemoryDesc& getInputDesc() const;
+    const MemoryDesc& getOutputDesc() const;
+    const MemoryDesc& getDesc() const;
 
     enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN, LOOK_NO_RECURRENT = 4 };
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
index 2d7d4e5e6b6..d1c851645b1 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
@@ -32,7 +32,7 @@ uint8_t MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type dataType)
     }
 }
 
-memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision prec) {
+memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) {
     switch (prec) {
         case InferenceEngine::Precision::FP32:
             return memory::data_type::f32;
@@ -47,6 +47,8 @@ memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::P
             return memory::data_type::u8;
         case InferenceEngine::Precision::BIN:
             return memory::data_type::bin;
+        case InferenceEngine::Precision::UNSPECIFIED:
+            return memory::data_type::undef;
         default: {
             IE_THROW() << "The plugin does not support " << prec.name();
         }
@@ -67,155 +69,18 @@ InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::d
             return InferenceEngine::Precision::U8;
         case memory::data_type::bin:
             return InferenceEngine::Precision::BIN;
+        case memory::data_type::undef:
+            return InferenceEngine::Precision::UNSPECIFIED;
         default: {
             IE_THROW() << "Unsupported data type.";
         }
     }
 }
 
-InferenceEngine::TensorDesc MKLDNNExtensionUtils::getUninitTensorDesc(const InferenceEngine::TensorDesc &desc) {
-    std::vector<size_t> notInitArr;
-    std::vector<size_t> zeroArr;
-    for (size_t i = 0; i < desc.getBlockingDesc().getBlockDims().size(); i++) {
-        notInitArr.push_back(std::numeric_limits<size_t>::max());
-        zeroArr.push_back(0);
-    }
-    // MKLDNN doesn't support offset_padding_to_data[i] != 0 (assert(src_d_blk.offset_padding_to_data[d] == 0);)
-    return desc.getLayout() == InferenceEngine::Layout::ANY ? desc :
-           InferenceEngine::TensorDesc(desc.getPrecision(), desc.getDims(),
-                                       {desc.getBlockingDesc().getBlockDims(), desc.getBlockingDesc().getOrder(),
-                                        std::numeric_limits<size_t>::max(), zeroArr, notInitArr});
+InferenceEngine::SizeVector MKLDNNExtensionUtils::convertToSizeVector(const mkldnn::memory::dims& dims) {
+    return InferenceEngine::SizeVector(dims.begin(), dims.end());
 }
 
-bool MKLDNNExtensionUtils::initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2) {
-    if (desc1.getDims() != desc2.getDims() || desc1.getPrecision() != desc2.getPrecision())
-        return false;
-    if (desc1.getLayout() == InferenceEngine::Layout::SCALAR && desc2.getLayout() == InferenceEngine::Layout::SCALAR)
-        return true;
-    if (desc1.getLayout() == InferenceEngine::Layout::ANY || desc2.getLayout() == InferenceEngine::Layout::ANY)
-        return true;
-    bool batch1 = desc1.getDims()[0] == 1;
-    const auto& in1Block = desc1.getBlockingDesc();
-    const auto& in2Block = desc2.getBlockingDesc();
-    size_t uninitNum = std::numeric_limits<size_t>::max();
-    if (in1Block.getBlockDims().size() != in2Block.getBlockDims().size())
-        return false;
-    for (size_t i = 0; i < in1Block.getBlockDims().size(); i++) {
-        if (in1Block.getBlockDims()[i] != in2Block.getBlockDims()[i] &&
-                in1Block.getBlockDims()[i] != uninitNum && in2Block.getBlockDims()[i] != uninitNum)
-            return false;
-        if (in1Block.getOffsetPaddingToData()[i] != in2Block.getOffsetPaddingToData()[i] &&
-                in1Block.getOffsetPaddingToData()[i] != uninitNum && in2Block.getOffsetPaddingToData()[i] != uninitNum)
-            return false;
-        if (i >= batch1 && in1Block.getStrides()[i] != in2Block.getStrides()[i] &&
-                in1Block.getStrides()[i] != uninitNum && in2Block.getStrides()[i] != uninitNum)
-            return false;
-        if (in1Block.getOrder()[i] != in2Block.getOrder()[i] &&
-                in1Block.getOrder()[i] != uninitNum && in2Block.getOrder()[i] != uninitNum)
-            return false;
-    }
-    return !(in1Block.getOffsetPadding() != in2Block.getOffsetPadding() &&
-        in1Block.getOffsetPadding() != uninitNum && in2Block.getOffsetPadding() != uninitNum);
-}
-
-PartialBlkDesc PartialBlkDesc::makePlain(const InferenceEngine::SizeVector &dims) {
-    PartialBlkDesc res;
-    res.outer_order.resize(dims.size());
-    std::iota(res.outer_order.begin(), res.outer_order.end(), 0);
-    return res;
-}
-
-PartialBlkDesc PartialBlkDesc::makeCBlocked(const InferenceEngine::SizeVector &dims, size_t block_size) {
-    PartialBlkDesc res;
-    res.outer_order.resize(dims.size());
-    std::iota(res.outer_order.begin(), res.outer_order.end(), 0);
-    res.inner_blk_size = {block_size};
-    res.inner_blk_idxes = {1};
-    return res;
-}
-
-
-PartialBlkDesc PartialBlkDesc::makeTailC(const InferenceEngine::SizeVector &dims) {
-    PartialBlkDesc res = makePlain(dims);
-    if (dims.size() > 2) {
-        auto itr = res.outer_order.begin() + 1;
-        std::rotate(itr, itr + 1, res.outer_order.end());
-    }
-    return res;
-}
-
-PartialBlkDesc PartialBlkDesc::extractFrom(const InferenceEngine::TensorDesc &desc) {
-    if (desc.getLayout() == InferenceEngine::ANY)
-        IE_THROW() << "Cannot extract partial blocked descriptor for `ANY` layout";
-
-    const auto &dims = desc.getDims();
-    const auto &blk = desc.getBlockingDesc();
-    const auto &blk_dims = blk.getBlockDims();
-    const auto &blk_order = blk.getOrder();
-
-    PartialBlkDesc res;
-    res.outer_order = {blk_order.begin(), blk_order.begin() + dims.size()};
-    res.inner_blk_idxes = {blk_order.begin() + dims.size(), blk_order.end()};
-    res.inner_blk_size = {blk_dims.begin() + dims.size(), blk_dims.end()};
-
-    return res;
-}
-
-bool PartialBlkDesc::isAutoExtendedWith(const InferenceEngine::SizeVector &dims) const {
-    auto tmp_dims = dims;
-    for (int i = 0; i < inner_blk_size.size(); i++) {
-        auto idx = inner_blk_idxes[i];
-        auto blk = inner_blk_size[i];
-        if (tmp_dims[idx] % blk == 0)
-            tmp_dims[idx] /= blk;
-        else
-            return true;
-    }
-    return false;
-}
-
-bool PartialBlkDesc::operator == (const PartialBlkDesc& it) const {
-    return std::tie(this->inner_blk_idxes,
-                    this->inner_blk_size,
-                    this->outer_order) ==
-           std::tie(it.inner_blk_idxes,
-                    it.inner_blk_size,
-                    it.outer_order);
-}
-
-// Lexicographical compare of content
-bool PartialBlkDesc::operator < (const PartialBlkDesc& it) const {
-    return std::tie(this->inner_blk_idxes,
-                    this->inner_blk_size,
-                    this->outer_order) <
-           std::tie(it.inner_blk_idxes,
-                    it.inner_blk_size,
-                    it.outer_order);
-}
-
-std::string MKLDNNExtensionUtils::getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
-    std::string inArgs, outArgs;
-    if (parentDesc.getPrecision() != childDesc.getPrecision()) {
-        inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
-        outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
-    }
-    auto fmt_tag_src = MKLDNNMemoryDesc(parentDesc).getFormat();
-    auto fmt_tag_dst = MKLDNNMemoryDesc(childDesc).getFormat();
-    if (fmt_tag_src != fmt_tag_dst || one_of(mkldnn::memory::format_tag::undef, fmt_tag_src, fmt_tag_dst)) {
-        inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(fmt_tag_src);
-        outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(fmt_tag_dst);
-    }
-    return inArgs + "_" + outArgs;
-}
-
-InferenceEngine::Precision MKLDNNExtensionUtils::getMaxPrecision(std::vector<InferenceEngine::Precision> precisions) {
-    if (!precisions.empty()) {
-        std::sort(precisions.begin(), precisions.end(),
-                  [](const InferenceEngine::Precision &lhs, const InferenceEngine::Precision &rhs) {
-                      return lhs.size() > rhs.size();
-                  });
-        return precisions[0];
-    }
-
-    return InferenceEngine::Precision::UNSPECIFIED;
+std::vector<dnnl::memory::dim> MKLDNNExtensionUtils::convertToDnnlDims(const InferenceEngine::SizeVector& dims) {
+    return std::vector<dnnl::memory::dim>(dims.begin(), dims.end());;
 }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h
index 95e14a7afa2..8e7f9a1b374 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h
@@ -11,77 +11,17 @@
 #include <string>
 
 #include "mkldnn.hpp"
-#include "mkldnn_memory.h"
+#include "cpu_memory_desc.h"
 
 namespace MKLDNNPlugin {
 
-
-/**
- * Partial tensor descriptor
- *
- * Represent a classes of layout. As example Plain, TailC, CBlocked and other.
- *
- * The tensor are in one layout family if they have same PartialBlkDesc.
- *
- * Any tensor will have same PartialBlkDesc as it subview tensor.
- *
- * PartialBlkDesc plus Dims allow to reconstruct real tensorDesc (dense representation).
- */
-class PartialBlkDesc {
-public:
-    /**
-     * Check if this partial blocking desc will lead to additional zero padding
-     * for real tensor with provided dims
-     *
-     * Example: dims [2, 3, 8, 8] with blocking by 16 for second dim. Will lead
-     *          to effective dims [2, 16, 8, 8] with zeroing all values
-     *          [:, 3:16, :, :]
-     *
-     * @param dims to check on zero auto padding
-     * @return true if provided dims will use auto padding. Otherwise false.
-     */
-    bool isAutoExtendedWith(const InferenceEngine::SizeVector &dims) const;
-
-    /**
-     * Construct PartialBlkDesc from provided TensorDesc
-     *
-     * PartialBlkDesc has less expressiveness power so some information from TensorDesc will be dropped.
-     * The different TensorDesc object will has equal PartialBlkDesc.
-     *
-     * @param desc to extract PartialBlkDesc information about kind of layout
-     * @return PartialBlkDesc object corresponds layout described in desc
-     */
-    static PartialBlkDesc extractFrom(const InferenceEngine::TensorDesc &desc);
-
-    /** Construct plain PartialBlkDesc based on dims information */
-    static PartialBlkDesc makePlain(const InferenceEngine::SizeVector &dims);
-
-    /** Construct blocked Channel PartialBlkDesc based on dims information */
-    static PartialBlkDesc makeCBlocked(const InferenceEngine::SizeVector &dims, size_t block_size);
-
-    /** Construct per Channel PartialBlkDesc based on dims information */
-    static PartialBlkDesc makeTailC(const InferenceEngine::SizeVector &dims);
-
-    /** Compare operators. Allow to use it as key for std::map */
-    bool operator == (const PartialBlkDesc& it) const;
-    bool operator < (const PartialBlkDesc& it) const;
-
-private:
-    PartialBlkDesc() = default;
-    InferenceEngine::SizeVector outer_order;
-    InferenceEngine::SizeVector inner_blk_size;
-    InferenceEngine::SizeVector inner_blk_idxes;
-};
-
 class MKLDNNExtensionUtils {
 public:
     static uint8_t sizeOfDataType(mkldnn::memory::data_type dataType);
-    static mkldnn::memory::data_type IEPrecisionToDataType(InferenceEngine::Precision prec);
+    static mkldnn::memory::data_type IEPrecisionToDataType(const InferenceEngine::Precision& prec);
     static InferenceEngine::Precision DataTypeToIEPrecision(mkldnn::memory::data_type dataType);
-    static InferenceEngine::TensorDesc getUninitTensorDesc(const InferenceEngine::TensorDesc& desc);
-    static bool initTensorsAreEqual(const InferenceEngine::TensorDesc &desc1, const InferenceEngine::TensorDesc &desc2);
-    static std::string getReorderArgs(const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc);
-    static InferenceEngine::Precision getMaxPrecision(std::vector<InferenceEngine::Precision> precisions);
+    static InferenceEngine::SizeVector convertToSizeVector(const mkldnn::memory::dims& dims);
+    static std::vector<dnnl::memory::dim> convertToDnnlDims(const InferenceEngine::SizeVector& dims);
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index 4e3fba2d2b1..e97912762e7 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -39,6 +39,7 @@
 #include "utils/node_dumper.h"
 #include "utils/ngraph_utils.hpp"
 #include "utils/cpu_utils.hpp"
+#include "cpu_memory_desc_utils.h"
 
 #include <ngraph/node.hpp>
 #include <ngraph/function.hpp>
@@ -47,15 +48,6 @@
 #include <transformations/utils/utils.hpp>
 #include <low_precision/low_precision.hpp>
 
-/*****************************************************
- * Debug capability
- *  - PRINT_GRAPH_INFO : Define it to enable printing
- *    additional information to std output.
- *
- * @todo Align with CPU_DEBUG_CAPS implementation
- *****************************************************/
-// #define PRINT_GRAPH_INFO
-
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -168,7 +160,7 @@ void MKLDNNGraph::Replicate(const std::shared_ptr<const ngraph::Function> &subgr
         auto parentNode = portInfo.first;
         auto port = portInfo.second;
         const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName();
-        const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outDims[port].ToSizeVector(),
+        const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outputShapes[port],
                                                                         parentNode->getOriginalOutputPrecisionAtPort(port),
                                                                         nodeName, "Result", getEngine(), weightsCache);
         MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0));
@@ -269,7 +261,7 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
         auto parentNode = op2node[unusedOutput.get_node_shared_ptr()];
         const auto port = unusedOutput.get_index();
         const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName();
-        const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outDims[port].ToSizeVector(),
+        const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outputShapes[port],
                                                                         parentNode->getOriginalOutputPrecisionAtPort(port),
                                                                         nodeName, "Result", getEngine(), weightsCache);
         MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0));
@@ -306,15 +298,15 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
 
     // Loading mean images
     for (const auto& input : inputsInfo) {
-        MKLDNNDims outDims;
-        if (!inputNodesMap[input.first]->getChildEdgeAt(0)->getDims().ndims()) {
-            outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1));
+        Shape outShape;
+        if (!inputNodesMap[input.first]->outputShapes.front().getRank()) {
+            outShape =  Shape(SizeVector({1, 1}));
         } else {
-            outDims = inputNodesMap[input.first]->getChildEdgeAt(0)->getDims();
+            outShape = inputNodesMap[input.first]->outputShapes.front();
         }
         InputInfo::Ptr ii = inputsInfo[input.first];
         if (ii && ii->getPreProcess().getNumberOfChannels()) {
-            _normalizePreprocMap[input.first].Load(outDims, ii);
+            _normalizePreprocMap[input.first].Load(outShape, ii);
         }
     }
 }
@@ -347,6 +339,8 @@ void MKLDNNGraph::InitGraph() {
         graphNode->cleanup();
     }
 #endif
+    ExtractConstantNodes();
+
     ExecuteConstantNodesOnly();
 }
 
@@ -390,6 +384,16 @@ void MKLDNNGraph::InitOptimalPrimitiveDescriptors() {
     }
 }
 
+void MKLDNNGraph::ExtractConstantNodes() {
+    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExtractConstantNodes");
+    for (auto& graphNode : graphNodes) {
+        if (graphNode->isConstant())
+            constantGraphNodes.emplace_back(graphNode);
+        else
+            mutableGraphNodes.emplace_back(graphNode);
+    }
+}
+
 void MKLDNNGraph::ExecuteConstantNodesOnly() {
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly");
     mkldnn::stream stream(eng);
@@ -418,10 +422,7 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
         return std::make_tuple(hasExternalInvalidEdges, hasLocalAllocatedEdges, outputs);
     };
 
-    for (auto &graphNode : graphNodes) {
-        if (!graphNode->isConstant())
-            continue;
-
+    for (auto &graphNode : constantGraphNodes) {
         if (weightsCache) {
             auto sharedOutputs = acquireSharedOutputs(graphNode);
 
@@ -437,9 +438,9 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
     }
 }
 
-static bool isReorderAvailable(const TensorDesc& parentDesc, const TensorDesc& childDesc, const mkldnn::engine& eng) {
-    memory::desc dstMemDesc = MKLDNNMemoryDesc(childDesc);
-    memory::desc srcMemDesc = MKLDNNMemoryDesc(parentDesc);
+static bool isReorderAvailable(const MemoryDesc& parentDesc, const MemoryDesc& childDesc, const mkldnn::engine& eng) {
+    memory::desc dstMemDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(childDesc);
+    memory::desc srcMemDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(parentDesc);;
     mkldnn::primitive_attr attr;
 
     dnnl_primitive_desc_t result = nullptr;
@@ -471,14 +472,14 @@ void MKLDNNGraph::InitEdges() {
             if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() &&
                     !isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
                 // If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
-                const auto inDesc = edge->getInputDesc();
-                const auto outDesc = edge->getOutputDesc();
+                const auto& inDesc = edge->getInputDesc();
+                const auto& outDesc = edge->getOutputDesc();
 
                 std::string convertName = edge->getParent()->getName() + "_" +
                                           inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name();
 
-                auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getDims(), inDesc.getPrecision(), outDesc.getPrecision(), convertName,
-                                                                       this->getEngine(), this->weightsCache);
+                auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getShape().getStaticDims(), inDesc.getPrecision(), outDesc.getPrecision(),
+                                                                       convertName, this->getEngine(), this->weightsCache);
                 convertNode->setDescs(inDesc, outDesc);
                 InsertNode(edge, convertNode, true);
 
@@ -492,7 +493,7 @@ void MKLDNNGraph::InitEdges() {
 
             if (insertReorder) {
                 std::string basicLayerName = edge->getParent()->getName() + "_" +
-                                             MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
+                                             MKLDNNReorderNode::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
                                              edge->getChild()->getName();
                 std::string layerName = basicLayerName;
                 int idx = 0;
@@ -601,22 +602,10 @@ void MKLDNNGraph::AllocateWithReuse() {
             int e_start = edge->getParent()->execIndex;
             int e_finish = edge->getChild()->execIndex;
 
-            const BlockingDesc block_desk = edge->getDesc().getBlockingDesc();
-
-            int64_t e_size = block_desk.getOffsetPadding() + 1;  // size in bytes (from begin of data to last element)
-            for (int j = 0; j < block_desk.getBlockDims().size(); j++)
-                e_size += (block_desk.getBlockDims()[j] - 1) * block_desk.getStrides()[j];
-
-            // In some cases computational formula above doesn't work properly (e.g. for OhIw8o4i layout).
-            // This WA allows to limit the size of allocated memory from below.
-            // TODO: need to properly investigate the root cause of incorrect computations
-            int64_t min_size = 1;
-            for (int64_t dim : block_desk.getBlockDims()) {
-                min_size *= dim;
+            int64_t e_size = edge->getDesc().getCurrentSize();  // size in bytes (from the beginning of data to the last element)
+            if (e_size == MemoryDesc::UNDEFINED_SIZE) {
+                IE_THROW() << "Can not allocate memory since the size is undefined.";
             }
-            e_size = std::max(e_size, min_size);
-
-            e_size *= edge->getDesc().getPrecision() == Precision::BIN ? 1 : edge->getDesc().getPrecision().size();
 
             box.start = std::min(e_start, box.start);
             box.finish = std::max(e_finish, box.finish);
@@ -650,7 +639,7 @@ void MKLDNNGraph::AllocateWithReuse() {
     size_t total_size = static_cast<size_t>(memSolver.solve()) * alignment;
 
     memWorkspace = std::make_shared<MKLDNNMemory>(eng);
-    memWorkspace->Create(MKLDNNMemoryDesc(TensorDesc(Precision::I8, {total_size}, Layout::C)));
+    memWorkspace->Create(MKLDNNMemoryDesc({total_size}, mkldnn::memory::data_type::s8));
 
     if (edge_clusters.empty())
         return;
@@ -710,13 +699,11 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
 
     auto input = inputNodesMap.find(name);
     if (input != inputNodesMap.end()) {
-        MKLDNNDims outDims = input->second->getChildEdgeAt(0)->getDims();
-
         const void *ext_data_ptr = in->cbuffer();
         void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData();
 
         if (ext_data_ptr != inter_data_ptr) {
-            auto ext_tdesc = MKLDNNMemoryDesc {in->getTensorDesc()};
+            auto ext_tdesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(in->getTensorDesc());
 
             auto ext_mem = MKLDNNMemory(eng);
             ext_mem.Create(ext_tdesc, ext_data_ptr, false);
@@ -727,7 +714,8 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
         // todo: make sure 'name' exists in this map...
         if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
             if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
-                _normalizePreprocMap[name].NormalizeImage(outDims, reinterpret_cast<float *>(inter_data_ptr),
+                _normalizePreprocMap[name].NormalizeImage(input->second->getChildEdgeAt(0)->getShape(),
+                                                          reinterpret_cast<float *>(inter_data_ptr),
                                                           in->getTensorDesc().getLayout());
             } else {
                 IE_THROW() << "Mean image of type " << in->getTensorDesc().getPrecision().name() << " is unsupported";
@@ -775,7 +763,7 @@ void MKLDNNGraph::PullOutputData(const BlobMap &out) {
             MB_to_process = std::min<int>(config.batchLimit, MB_to_process);
         size_t size_to_copy = intr_blob.GetElementsCount() * MB_to_process / MB;
 
-        const auto actualDesc = node->getParentEdgeAt(0)->getDesc();
+        const auto actualDesc = MemoryDescUtils::convertToTensorDesc(node->getParentEdgeAt(0)->getDesc());
         const auto expectedDesc = ext_blob->getTensorDesc();
 
         // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
@@ -790,7 +778,7 @@ void MKLDNNGraph::PullOutputData(const BlobMap &out) {
         }
 
         if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) {
-            auto outBlobDesc = MKLDNNMemoryDesc{expectedDesc};
+            auto outBlobDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(expectedDesc);
             auto outBloMem = MKLDNNMemory(eng);
             outBloMem.Create(outBlobDesc, ext_blob_ptr, false);
 
@@ -810,24 +798,27 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
 
     ENABLE_CPU_DEBUG_CAP(NodeDumper nd(config.debugCaps, infer_count));
 
-    for (int i = 0; i < graphNodes.size(); i++) {
-        if (request != nullptr) {
+#ifdef CPU_DEBUG_CAPS
+    for (const auto& node : constantGraphNodes) {
+        if (request != nullptr)
             request->ThrowIfCanceled();
-        }
 
-        PERF(graphNodes[i]);
+        ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(node));
+        ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(node));
+    }
+#endif
 
-        if (batch > 0)
-            graphNodes[i]->setDynamicBatchLim(batch);
+    for (const auto& node : mutableGraphNodes) {
+        PERF(config.collectPerfCounters, node);
+        if (request != nullptr)
+            request->ThrowIfCanceled();
 
-        ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(graphNodes[i]));
+        ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(node));
 
-        if (!graphNodes[i]->isConstant()) {
-            OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, graphNodes[i]->profiling.execute);
-            graphNodes[i]->execute(stream);
-        }
+        OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, node->profiling.execute);
+        node->execute(stream);
 
-        ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(graphNodes[i]));
+        ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(node));
     }
 
     if (infer_count != -1) infer_count++;
@@ -889,7 +880,7 @@ void MKLDNNGraph::SortTopologically() {
     // Make first N (N == port_num) edge indexes are matched with port index
     for (auto &node : graphNodes) {
         {
-            int port_num = node->inDims.size();
+            int port_num = node->inputShapes.size();
             std::vector<MKLDNNEdgePtr> res(port_num);
 
             for (int i = 0; i < node->parentEdges.size(); i++) {
@@ -903,7 +894,7 @@ void MKLDNNGraph::SortTopologically() {
             node->parentEdges = {res.begin(), res.end()};
         }
         {
-            int port_num = node->outDims.size();
+            int port_num = node->outputShapes.size();
             std::vector<MKLDNNEdgePtr> res(port_num);
 
             for (int i = 0; i < node->childEdges.size(); i++) {
@@ -965,16 +956,20 @@ Config MKLDNNGraph::getProperty() const {
     return config;
 }
 
-void MKLDNNGraph::getInputBlobs(InferenceEngine::BlobMap &resp) {
-    for (auto &it : inputNodesMap) {
-        resp[it.first] = it.second->getChildEdgeAt(0)->getBlob();
+Blob::Ptr MKLDNNGraph::getInputBlob(const std::string& name) {
+    auto itr = inputNodesMap.find(name);
+    if (itr != inputNodesMap.end()) {
+        return MemoryDescUtils::interpretAsBlob(itr->second->getChildEdgeAt(0)->getMemory());
     }
+    return nullptr;
 }
 
-void MKLDNNGraph::getOutputBlobs(InferenceEngine::BlobMap &resp) {
-    for (auto &it : outputNodesMap) {
-        resp[it.first] = it.second->getParentEdgeAt(0)->getBlob();
+Blob::Ptr MKLDNNGraph::getOutputBlob(const std::string& name) {
+    auto itr = outputNodesMap.find(name);
+    if (itr != outputNodesMap.end()) {
+        return MemoryDescUtils::interpretAsBlob(itr->second->getParentEdgeAt(0)->getMemory());
     }
+    return nullptr;
 }
 
 void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) {
@@ -1084,7 +1079,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) {
         MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentConv, inNum, outNum));
         graphEdges.push_back(newEdge);
         parent->addEdge(newEdge);
-        parentConv->inDims.push_back(newEdge->getDims());
+        parentConv->inputShapes.push_back(Shape(newEdge->getShape()));
     }
 }
 
@@ -1116,15 +1111,14 @@ void MKLDNNGraph::RemoveDroppedEdges() {
     }
 }
 
-MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const TensorDesc& inDesc, const TensorDesc& outDesc,
-                                bool isOptimized, InferenceEngine::Blob::Ptr scales) {
+MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const MemoryDesc& inDesc, const MemoryDesc& outDesc,
+                                         bool isOptimized) {
     MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layerName, getEngine(), weightsCache));
     auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
     if (reorderPtr == nullptr) {
         IE_THROW() << "MKLDNNGraph::InsertReorder: Cannot cast to MKLDNNReorderNode";
     }
     reorderPtr->setDescs(inDesc, outDesc);
-    reorderPtr->_scales = scales;
     reorderPtr->setOptimized(isOptimized);
 
     InsertNode(edge, newReorder, true);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
index 213fb3b0d54..50ccd0be4f0 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -44,8 +44,8 @@ public:
     void setProperty(const std::map<std::string, std::string> &properties);
     Config getProperty() const;
 
-    void getInputBlobs(InferenceEngine::BlobMap &in_map);
-    void getOutputBlobs(InferenceEngine::BlobMap &out_map);
+    InferenceEngine::Blob::Ptr getInputBlob(const std::string& name);
+    InferenceEngine::Blob::Ptr getOutputBlob(const std::string& name);
 
     template<typename NET>
     void CreateGraph(NET &network,
@@ -115,17 +115,17 @@ public:
      * @param layerName
      * Reorder layer name
      * @param inDesc
-     * input tensor descriptor
+     * input memory descriptor
      * @param outDesc
-     * output tensor descriptor
+     * output memory descriptor
      * @param isOptimized
      * optimization flag; if isOptimized is true then Reorder node does nothing
      * @param scales
      * pointer to the blob containing scales
      * @return pointer to the new Reorder node.
      */
-    MKLDNNNodePtr InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const InferenceEngine::TensorDesc& inDesc,
-            const InferenceEngine::TensorDesc& outDesc, bool isOptimized = false, InferenceEngine::Blob::Ptr scales = nullptr);
+    MKLDNNNodePtr InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const MemoryDesc& inDesc,
+            const MemoryDesc& outDesc, bool isOptimized = false);
 
     /**
      * @brief Insert MKLDNNNode at the edge-specified location.
@@ -218,6 +218,7 @@ protected:
     void Allocate();
     void AllocateWithReuse();
     void CreatePrimitives();
+    void ExtractConstantNodes();
     void ExecuteConstantNodesOnly();
 
     friend class MKLDNNInferRequest;
@@ -225,6 +226,11 @@ protected:
     friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph);
 
 private:
+    // these node pointers (from graphNodes) are to avoid regular checking for
+    // constant node in ExecuteConstantNodesOnly and Infer methods
+    std::vector<MKLDNNNodePtr> constantGraphNodes;
+    std::vector<MKLDNNNodePtr> mutableGraphNodes;
+
     void EnforceBF16();
 };
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
index ac4bfff6b6d..909a5083f71 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
@@ -10,7 +10,6 @@
 #include <ngraph/variant.hpp>
 #include "ngraph/ngraph.hpp"
 #include "utils/debug_capabilities.h"
-
 #include <vector>
 #include <string>
 #include <memory>
@@ -46,11 +45,11 @@ std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &no
 
     std::string outputPrecisionsStr;
     if (!node->getChildEdges().empty()) {
-        outputPrecisionsStr = node->getChildEdgeAt(0)->getDesc().getPrecision().name();
+        outputPrecisionsStr = node->getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().name();
 
         bool isAllEqual = true;
         for (size_t i = 1; i < node->getChildEdges().size(); i++) {
-            if (node->getChildEdgeAt(i-1)->getDesc().getPrecision() != node->getChildEdgeAt(i)->getDesc().getPrecision()) {
+            if (node->getChildEdgeAt(i - 1)->getMemory().GetDesc().getPrecision() != node->getChildEdgeAt(i)->getMemory().GetDesc().getPrecision()) {
                 isAllEqual = false;
                 break;
             }
@@ -59,12 +58,12 @@ std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &no
         // If all output precisions are the same, we store the name only once
         if (!isAllEqual) {
             for (size_t i = 1; i < node->getChildEdges().size(); i++)
-                outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getDesc().getPrecision().name());
+                outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getMemory().GetDesc().getPrecision().name());
         }
     } else {
         // Branch to correctly handle output nodes
         if (!node->getParentEdges().empty()) {
-            outputPrecisionsStr = node->getParentEdgeAt(0)->getDesc().getPrecision().name();
+            outputPrecisionsStr = node->getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name();
         }
     }
     serialization_info[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = outputPrecisionsStr;
@@ -73,12 +72,11 @@ std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &no
     auto outDescs = node->getSelectedPrimitiveDescriptor()->getConfig().outConfs;
 
     if (!outDescs.empty()) {
-        auto fmt0 = MKLDNNMemoryDesc(outDescs[0].desc).getFormat();
-        outputLayoutsStr = mkldnn::utils::fmt2str(fmt0);
+        outputLayoutsStr = outDescs[0].desc->serializeFormat();
 
         bool isAllEqual = true;
         for (size_t i = 1; i < outDescs.size(); i++) {
-            if (MKLDNNMemoryDesc(outDescs[i - 1].desc).getFormat() != MKLDNNMemoryDesc(outDescs[i].desc).getFormat()) {
+            if (outDescs[i - 1].desc->serializeFormat() != outDescs[i].desc->serializeFormat()) {
                 isAllEqual = false;
                 break;
             }
@@ -87,8 +85,7 @@ std::map<std::string, std::string> extract_node_metadata(const MKLDNNNodePtr &no
         // If all output layouts are the same, we store the name only once
         if (!isAllEqual) {
             for (size_t i = 1; i < outDescs.size(); i++) {
-                auto fmt = MKLDNNMemoryDesc(outDescs[i].desc).getFormat();
-                outputLayoutsStr += "," + std::string(mkldnn::utils::fmt2str(fmt));
+                outputLayoutsStr += "," + outDescs[i].desc->serializeFormat();
             }
         }
     } else {
@@ -163,10 +160,8 @@ InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph
         auto meta_data = extract_node_metadata(node);
         std::shared_ptr<ngraph::Node> return_node;
         if (is_input) {
-            auto desc = node->getChildEdgeAt(0)->getDesc();
-            auto param = std::make_shared<ngraph::op::Parameter>(
-                details::convertPrecision(desc.getPrecision()),
-                ngraph::PartialShape(desc.getDims()));
+            auto& desc = node->getChildEdgeAt(0)->getMemory().GetDesc();
+            auto param = std::make_shared<ngraph::op::Parameter>(details::convertPrecision(desc.getPrecision()), desc.getShape().toPartialShape());
             return_node = param;
             params.push_back(param);
         } else if (is_output) {
@@ -177,10 +172,8 @@ InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph
                 get_inputs(node), node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size());
 
             for (size_t port = 0; port < return_node->get_output_size(); ++port) {
-                auto desc = node->getChildEdgeAt(port)->getDesc();
-                return_node->set_output_type(port,
-                    details::convertPrecision(desc.getPrecision()),
-                    ngraph::PartialShape(desc.getDims()));
+                auto& desc = node->getChildEdgeAt(port)->getMemory().GetDesc();
+                return_node->set_output_type(port, details::convertPrecision(desc.getPrecision()), desc.getShape().toPartialShape());
             }
         }
 
@@ -237,18 +230,19 @@ void serializeToXML(const MKLDNNGraph &graph, const std::string& path) {
 void serializeToCout(const MKLDNNGraph &graph) {
     for (const auto& node : graph.GetNodes()) {
         std::cout << "name: " << node->getName() << " [ ";
-        if (!node->getParentEdges().empty()) {
-            const auto& parentEdge = *(node->getParentEdges()[0].lock());
-            const auto& prnt_out_desc = parentEdge.getOutputDescRO();
-            std::cout << "in: " << prnt_out_desc.getPrecision().name()
-                      << "/l=" << prnt_out_desc.getLayout()
-                      << "; ";
-        }
-        if (!node->getChildEdges().empty()) {
-            const auto& childEdge = *(node->getChildEdges()[0].lock());
-            const auto& chld_in_desc = childEdge.getInputDescRO();
-            std::cout << "out: " << chld_in_desc.getPrecision().name()
-                      << "/l=" << chld_in_desc.getLayout();
+        auto nodeDesc = node->getSelectedPrimitiveDescriptor();
+        if (nodeDesc) {
+            auto& inConfs = nodeDesc->getConfig().inConfs;
+            if (!inConfs.empty()) {
+                std::cout << "in: " << inConfs.front().desc->getPrecision().name()
+                          << "/l=" << inConfs.front().desc->serializeFormat()
+                          << "; ";
+            }
+            auto& outConfs = nodeDesc->getConfig().outConfs;
+            if (!outConfs.empty()) {
+                std::cout << "out: " << outConfs.front().desc->getPrecision().name()
+                          << "/l=" << outConfs.front().desc->serializeFormat();
+            }
         }
         std::cout << " ]"  << std::endl;
     }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
index 9811b683ad1..9cbc9b79aeb 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -43,6 +43,7 @@
 #include <algorithm>
 
 #include "mkldnn_itt.h"
+#include "cpu_memory_desc_utils.h"
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -165,15 +166,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
         if (biasNode->getType() != Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1)
             return false;
 
-        auto convOutDims = parentNode->getChildEdgesAtPort(0)[0]->getDims().ToSizeVector();
-        auto biasDims = getNormalizedDimsBySize(biasNode->getChildEdgesAtPort(0)[0]->getDims().ToSizeVector(),
+        auto convOutDims = parentNode->getChildEdgesAtPort(0)[0]->getShape().getDims();
+        auto biasDims = getNormalizedDimsBySize(biasNode->getChildEdgesAtPort(0)[0]->getShape().getDims(),
                                                 convOutDims.size());
         // TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases.
         // Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant.
         if (convOutDims.size() != biasDims.size() || biasDims.size() < 2)
             return false;
 
-        if (biasDims[0] != 1 || biasDims[1] != convOutDims[1])
+        if (biasDims[0] != 1 || !dimsEqualStrong(biasDims[1], convOutDims[1]))
             return false;
 
         for (int i = 2; i < biasDims.size(); i++) {
@@ -249,8 +250,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
                 graphEdges.push_back(newEdge);
                 parent->addEdge(newEdge);
 
-                parent->outDims[inNum] = MKLDNNDims({parentEltwise->outDims[0][1]});
-                parentEltwise->inDims.push_back(parent->outDims[0]);
+                parent->outputShapes[inNum] = Shape(SizeVector{parentEltwise->outputShapes[0].getStaticDims()[1]});
+                parentEltwise->inputShapes.push_back(parent->outputShapes[0]);
             }
         }
 
@@ -299,17 +300,17 @@ void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &grap
 void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
-    auto isSutableSecondInput = [](MKLDNNNodePtr node, MKLDNNDims dataDims) {
+    auto isSutableSecondInput = [](MKLDNNNodePtr node, SizeVector dataDims) {
         if (node->getType() != Input || !node->isConstant())
             return false;
-        auto secondInputDims = node->outDims[0];
-        if (secondInputDims.ndims() != dataDims.ndims() || secondInputDims.ndims() < 2)
+        auto secondInputDims = node->outputShapes[0].getDims();
+        if (secondInputDims.size() != dataDims.size() || secondInputDims.size() < 2)
             return false;
 
-        if (secondInputDims[0] != 1 || secondInputDims[1] != dataDims[1])
+        if (secondInputDims[0] != 1 || !dimsEqualStrong(secondInputDims[1], dataDims[1]))
             return false;
 
-        for (size_t i = 2; i < secondInputDims.ndims(); i++) {
+        for (size_t i = 2; i < secondInputDims.size(); i++) {
             if (secondInputDims[i] != 1)
                 return false;
         }
@@ -322,14 +323,14 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) {
             node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1)
             return false;
 
-        return isSutableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getParentEdgesAtPort(0)[0]->getDims());
+        return isSutableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getParentEdgesAtPort(0)[0]->getShape().getDims());
     };
 
     auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
         if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2)
             return false;
 
-        return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getDims());
+        return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getShape().getDims());
     };
 
     auto parent = graphNodes.begin();
@@ -397,7 +398,7 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) {
                 graphEdges.push_back(newEdge);
                 parent->addEdge(newEdge);
 
-                parentEltwise->inDims.push_back(parent->outDims[0]);
+                parentEltwise->inputShapes.push_back(parent->outputShapes[0]);
             }
         }
 
@@ -416,9 +417,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
         bool retVal = false;
         if (node->getType() == Convolution) {
             if (auto convNode = std::dynamic_pointer_cast<MKLDNNConvolutionNode>(node)) {
-                auto ndims = convNode->getParentEdgeAt(0)->getDims().ndims();
+                auto rank = convNode->getParentEdgeAt(0)->getShape().getRank();
                 // int8 depthwise convolution does not support fusing zero points in 3D case
-                if (implication(convNode->isDepthWise(), ndims == 4)) {
+                if (implication(convNode->isDepthWise(), rank == 4)) {
                     retVal = true;
                 }
             }
@@ -431,8 +432,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
         if (convNode == nullptr)
             IE_THROW() << "Cannot get convolution node " << node->getName();
 
-        int IC = node->getParentEdgesAtPort(0)[0]->getDims()[1];
-        int OC = node->getChildEdgesAtPort(0)[0]->getDims()[1];
+        int IC = node->getParentEdgesAtPort(0)[0]->getShape().getDims()[1];
+        int OC = node->getChildEdgesAtPort(0)[0]->getShape().getDims()[1];
+
+        if (Shape::UNDEFINED_DIM == IC || Shape::UNDEFINED_DIM == OC) {
+            return false;
+        }
 
         if (parent0->getType() == Eltwise) {
             if (!parent0->getFusedWith().empty() || !parent1->getFusedWith().empty())
@@ -456,15 +461,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
                 if (arg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8)
                     return false;
 
-                if (parent0->getParentEdgesAtPort(1)[0]->getDims().size() < 2) {
+                if (parent0->getParentEdgesAtPort(1)[0]->getShape().getRank() < 2) {
                     return false;
                 }
 
-                auto zpDims = parent0->getParentEdgesAtPort(1)[0]->getDims();
-                if (zpDims[0] != 1 || zpDims[1] != IC)
+                auto zpDims = parent0->getParentEdgesAtPort(1)[0]->getShape().getDims();
+                if (zpDims[0] != 1 || !dimsEqualStrong(zpDims[1], IC))
                     return false;
 
-                for (int i = 2; i < zpDims.ndims(); i++) {
+                for (int i = 2; i < zpDims.size(); i++) {
                     if (zpDims[i] != 1)
                         return false;
                 }
@@ -485,7 +490,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
                 if (zeroPointsData == nullptr)
                     IE_THROW() << "zeroPointsBlob has not allocated buffer";
 
-                for (int j = 0; j < parent0->getParentEdgesAtPort(1)[0]->getDims()[1]; j++) {
+                auto zeroPointDataSize =  parent0->getParentEdgesAtPort(1)[0]->getShape().getDims()[1];
+                if (Shape::UNDEFINED_DIM == zeroPointDataSize) {
+                    return false;
+                }
+
+                for (int j = 0; j < zeroPointDataSize; j++) {
                     convNode->inputZeroPoints.push_back(zeroPointsData[j]);
                 }
             } else {
@@ -524,11 +534,13 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
 
         ptrdiff_t G = convNode->getGroupNum();
         const int groupOffset = convNode->getAlgorithm() == ConvolutionGrouped ? 1 : 0;
-        ptrdiff_t OC = weightsConstant->outDims[0][0 + groupOffset];
-        ptrdiff_t IC = weightsConstant->outDims[0][1 + groupOffset];
-        ptrdiff_t KD = weightsConstant->outDims[0].ndims() == (5 + groupOffset) ? weightsConstant->outDims[0][weightsConstant->outDims[0].ndims() - 3] : 1;
-        ptrdiff_t KH = weightsConstant->outDims[0][weightsConstant->outDims[0].ndims() - 2];
-        ptrdiff_t KW = weightsConstant->outDims[0][weightsConstant->outDims[0].ndims() - 1];
+        auto& weightsConstantDims = weightsConstant->outputShapes[0].getStaticDims();
+
+        ptrdiff_t OC = weightsConstantDims[0 + groupOffset];
+        ptrdiff_t IC = weightsConstantDims[1 + groupOffset];
+        ptrdiff_t KD = weightsConstantDims.size() == (5 + groupOffset) ? weightsConstantDims[weightsConstantDims.size() - 3] : 1;
+        ptrdiff_t KH = weightsConstantDims[weightsConstantDims.size() - 2];
+        ptrdiff_t KW = weightsConstantDims[weightsConstantDims.size() - 1];
 
         for (size_t g = 0; g < G; g++) {
             for (size_t oc = 0; oc < OC; oc++) {
@@ -588,7 +600,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getParentEdgeAt(0)->getDims().ndims() != 3;
+        return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getParentEdgeAt(0)->getShape().getRank() != 3;
     };
 
     auto parent = graphNodes.begin();
@@ -653,12 +665,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
 
         const auto &strides = conv->getStride();
         const auto &paddings = conv->getPaddingL();
-        const auto &inDims = node->getParentEdgeAt(0)->getDims();
-        const auto &outDims = node->getChildEdgeAt(0)->getDims();
+        const auto &inDims = node->getParentEdgeAt(0)->getShape().getDims();
+        const auto &outDims = node->getChildEdgeAt(0)->getShape().getDims();
         bool isSupportedParams = conv->getGroupNum() == 1 &&
-                inDims.ndims() == 4 &&
-                inDims[inDims.ndims() - 1] == outDims[outDims.ndims() - 1] &&
-                inDims[inDims.ndims() - 2] == outDims[outDims.ndims() - 2] &&
+                inDims.size() == 4 &&
+                dimsEqualStrong(inDims[inDims.size() - 1], outDims[outDims.size() - 1]) &&
+                dimsEqualStrong(inDims[inDims.size() - 2], outDims[outDims.size() - 2]) &&
                 is1x1Convolution(conv) &&  // TODO [oneDNN] : fusing is permitted only with 1x1 convolutions
                 everyone_is(1, strides[strides.size() - 1], strides[strides.size() - 2]) &&
                 everyone_is(0, paddings[paddings.size() - 1], paddings[paddings.size() - 2]) &&
@@ -702,8 +714,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
 
         const auto weightRank = convChild->getWeightDims().size();
         const auto stridesSize = convChild->getStride().size();
-        bool isSupportedParams = convChild->outDims[0][1] == convChild->getGroupNum() &&
-                                 convChild->outDims[0][1] != 1 &&
+        bool isSupportedParams = dimsEqualStrong(convChild->outputShapes[0].getDims()[1], convChild->getGroupNum()) &&
+                                 convChild->outputShapes[0].getDims()[1] != 1 &&
                                  everyone_is(3, convChild->getWeightDims()[weightRank - 1], convChild->getWeightDims()[weightRank - 2]) &&
                                  everyone_is(1, convChild->getPaddingL()[stridesSize - 1], convChild->getPaddingL()[stridesSize - 2]) &&
                                  everyone_is(1, convChild->getPaddingR()[stridesSize - 1], convChild->getPaddingR()[stridesSize - 2]) &&
@@ -711,14 +723,18 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
                                  convChild->getStride()[stridesSize - 1] == convChild->getStride()[stridesSize - 2] &&
                                  withBias &&
                                  one_of(convChild->getStride()[stridesSize - 1], 1, 2) &&
-                                 childNode->getChildEdgeAt(0)->getDims().ndims() == 4;
+                                 childNode->getChildEdgeAt(0)->getShape().getRank() == 4;
 
         return isSupportedParams;
     };
 
     auto isFusingWorthwhile = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) {
-        auto inDims = childNode->inDims[0];
-        auto outDims = childNode->outDims[0];
+        if (!childNode->inputShapes[0].isStatic() || !childNode->outputShapes[0].isStatic()) {
+            return false;
+        }
+
+        auto inDims = childNode->inputShapes[0].getStaticDims();
+        auto outDims = childNode->outputShapes[0].getStaticDims();
         int elemSize = childNode->getOriginalOutputPrecisionAtPort(0).size();
 
         int L3_cache_size = utils::get_cache_size(3, false);
@@ -1076,9 +1092,9 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
         if (mergedConv->fusedWith.size() > 0 &&
            (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) {
             // Merged with DW_conv. Shape may change
-            mergedConv->inDims.push_back(mergedConv->fusedWith[0]->outDims[0]);
+            mergedConv->inputShapes.push_back(mergedConv->fusedWith[0]->outputShapes[0]);
         } else {
-            mergedConv->inDims.push_back(mergedConv->outDims[0]);
+            mergedConv->inputShapes.push_back(mergedConv->outputShapes[0]);
         }
 
         size_t childIdx = 0lu;
@@ -1352,7 +1368,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
                         graphEdges.push_back(newEdge);
                         parent->addEdge(newEdge);
 
-                        parent->outDims[inNum] = child->inDims[outNum];
+                        parent->outputShapes[inNum] = child->inputShapes[outNum];
                     }
                 } else {
                     MKLDNNEdgePtr &remEdge = p_edge;
@@ -1373,7 +1389,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
                     graphEdges.push_back(newEdge);
                     parent->addEdge(newEdge);
 
-                    parentNode->inDims.push_back(parent->outDims[0]);
+                    parentNode->inputShapes.push_back(parent->outputShapes[0]);
                 }
             }
 
@@ -1400,16 +1416,6 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
             if (nn == nullptr)
                 IE_THROW() << "Cannot get reorder layer " << nextNode->getName();
 
-            auto scales = n->_scales;
-
-            if (n->_scales != nullptr && nn->_scales != nullptr) {
-                IE_THROW() << "Merging scales of two subsequent reorders is unsupported yet";
-            } else {
-                if (scales == nullptr) {
-                    scales = nn->_scales;
-                }
-            }
-
             MKLDNNNodePtr p = n->getParentEdgeAt(0)->getParent();
             MKLDNNNodePtr c = nn->getChildEdgeAt(0)->getChild();
 
@@ -1430,7 +1436,7 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
 
 
             std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName();
-            graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false, scales);
+            graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false);
             graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end());
         }
     }
@@ -1448,8 +1454,8 @@ void MKLDNNGraphOptimizer::FuseBroadcastAndEltwise(MKLDNNGraph &graph) {
 
         MKLDNNNodePtr& broadcastNode = graphNode;
         MKLDNNNodePtr eltwiseNode = broadcastNode->getChildEdgeAt(0)->getChild();
-        eltwiseNode->inDims[broadcastNode->getChildEdgeAt(0)->getOutputNum()]
-                = broadcastNode->getParentEdgeAt(0)->getDims();
+        eltwiseNode->inputShapes[broadcastNode->getChildEdgeAt(0)->getOutputNum()]
+                = broadcastNode->getParentEdgeAt(0)->getShape();
 
         auto& edges = graph.GetEdges();
         for (size_t i = 1lu; i < broadcastNode->getParentEdges().size(); i++) {
@@ -1673,9 +1679,14 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) {
         }
 
         auto& transposeOrder = transposeNode->getOrder();
-        auto& layoutOrder = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
-        auto& inOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getBlockingDesc().getOrder();
-        auto& outOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder();
+        auto layoutOrder = MemoryDescUtils::convertToBlockedDescriptor(
+                                                *transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc).getOrder();
+
+        auto inBlockedDesc = MemoryDescUtils::convertToBlockedDescriptor(*reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc);
+        auto outBlockedDesc = MemoryDescUtils::convertToBlockedDescriptor(*reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc);
+
+        auto& inOrder = inBlockedDesc.getOrder();
+        auto& outOrder = outBlockedDesc.getOrder();
 
         if (transposeOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) {
             return false;
@@ -1751,18 +1762,18 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) {
         graph.DropNode(parentNode);
         graph.DropNode(childNode);
 
-        auto inDesc = parentNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
-        auto outDesc = childNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc;
+        auto& inDesc = parentNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
+        auto& outDesc = childNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc;
 
-        auto inPrec = inDesc.getPrecision();
-        auto outPrec = outDesc.getPrecision();
+        auto inPrec = inDesc->getPrecision();
+        auto outPrec = outDesc->getPrecision();
 
-        auto reorderInDesc = TensorDesc(inDesc);
-        auto reorderOutDesc = TensorDesc(outDesc);
-        reorderOutDesc.setPrecision(inPrec);
+        auto reorderInDesc = inDesc->clone();
+        auto reorderOutDesc = outDesc->clone();
+        reorderOutDesc->setPrecision(inPrec);
 
         std::string reorderlayerName = parentParentNode->getName() + "_" +
-                MKLDNNExtensionUtils::getReorderArgs(reorderInDesc, reorderOutDesc) + "_" + "fake";
+                MKLDNNReorderNode::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_" + "fake";
 
         MKLDNNEdgePtr edge;
         for (auto &childEdge : parentParentNode->getChildEdges()) {
@@ -1775,17 +1786,17 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) {
             IE_THROW() << "Transpose node '" << parentNode->getName() << "' has invalid edges.";
         }
 
-        auto reorderNode = graph.InsertReorder(edge, reorderlayerName, reorderInDesc, reorderOutDesc, true);
+        auto reorderNode = graph.InsertReorder(edge, reorderlayerName, *reorderInDesc, *reorderOutDesc, true);
 
         // case 2
         if (inPrec != outPrec) {
-            auto reorderInDesc2 = TensorDesc(reorderOutDesc);
-            auto reorderOutDesc2 = TensorDesc(outDesc);
+            auto reorderInDesc2 = reorderOutDesc->clone();
+            auto reorderOutDesc2 = outDesc->clone();
 
             std::string reorderLayerName2 = reorderNode->getName() + "_" +
-                                    MKLDNNExtensionUtils::getReorderArgs(reorderInDesc2, reorderOutDesc2) + "_" + childChildNode->getName();
+                                    MKLDNNReorderNode::getReorderArgs(*reorderInDesc2, *reorderOutDesc2) + "_" + childChildNode->getName();
 
-            graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, reorderInDesc2, reorderOutDesc2, false);
+            graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, *reorderInDesc2, *reorderOutDesc2, false);
         }
     };
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
index 738604a6f0a..77dbe3e1215 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
@@ -213,8 +213,6 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
     InferenceEngine::Blob::Ptr data;
 
     if (graph->hasInputWithName(name)) {
-        InferenceEngine::BlobMap blobs;
-        graph->getInputBlobs(blobs);
         // ROI blob is returned only if it was set previously.
         auto it = _preProcData.find(name);
         if (it != _preProcData.end()) {
@@ -223,7 +221,12 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
         }
 
         if (_inputs.find(name) == _inputs.end()) {
-            InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc();
+            auto pBlob = graph->getInputBlob(name);
+            if (!pBlob) {
+                IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name;
+            }
+
+            InferenceEngine::TensorDesc desc = pBlob->getTensorDesc();
 
             if (_networkInputs.find(name) != _networkInputs.end()) {
                 InferenceEngine::Layout l = _networkInputs[name]->getLayout();
@@ -235,7 +238,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
 
             _inputs[name] = make_blob_with_precision(desc);
             _inputs[name]->allocate();
-            if (blobs[name]->getTensorDesc() == desc &&
+            if (pBlob->getTensorDesc() == desc &&
                 graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) {
                 externalPtr[name] = _inputs[name]->buffer();
             }
@@ -258,9 +261,12 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
     }
 
     if (graph->hasOutputWithName(name)) {
-        InferenceEngine::BlobMap blobs;
-        graph->getOutputBlobs(blobs);
         if (_outputs.find(name) == _outputs.end()) {
+            auto pBlob = graph->getOutputBlob(name);
+            if (!pBlob) {
+                IE_THROW() << "MKLDNN graph doesn't contain output node with name: " << name;
+            }
+
             if (!data) {
                 InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc();
                 desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision()));
@@ -275,7 +281,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
                 data = make_blob_with_precision(desc);
                 data->allocate();
             } else {
-                const auto& expectedTensorDesc = blobs[name]->getTensorDesc();
+                const auto& expectedTensorDesc = pBlob->getTensorDesc();
 
                 if (expectedTensorDesc.getPrecision() != data->getTensorDesc().getPrecision()) {
                     IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name << " but expect blobs with different precision: "
@@ -295,7 +301,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std::
             }
 
             _outputs[name] = data;
-            if (!externalPtr.count(name) && data->getTensorDesc() == blobs[name]->getTensorDesc() && !graph->getProperty().batchLimit) {
+            if (!externalPtr.count(name) && data->getTensorDesc() == pBlob->getTensorDesc() && !graph->getProperty().batchLimit) {
                 externalPtr[name] = data->buffer();
             }
         }
@@ -366,12 +372,12 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In
                 IE_THROW(ParameterMismatch) << "Failed to set input blob. Blocking descriptor mismatch.";
             }
 
-            InferenceEngine::BlobMap blobs;
-            graph->getInputBlobs(blobs);
-            if (blobs.find(name) == blobs.end())
+            auto pBlob = graph->getInputBlob(name);
+            if (!pBlob) {
                 IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name;
+            }
 
-            if (data->getTensorDesc() == blobs.at(name)->getTensorDesc() &&
+            if (data->getTensorDesc() == pBlob->getTensorDesc() &&
                 graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) {
                 externalPtr[name] = data->buffer();
             } else if (externalPtr.find(name) != externalPtr.end()) {
@@ -404,12 +410,11 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In
                 IE_THROW(ParameterMismatch) << "Failed to set output blob. Blocking descriptor mismatch.";
         }
 
-        InferenceEngine::BlobMap blobs;
-        graph->getOutputBlobs(blobs);
-        if (blobs.find(name) == blobs.end())
+        auto pBlob = graph->getOutputBlob(name);
+        if (!pBlob)
             IE_THROW() << "MKLDNN graph doesn't contain output node with name: " << name;
 
-        if (data->getTensorDesc() == blobs.at(name)->getTensorDesc() &&
+        if (data->getTensorDesc() == pBlob->getTensorDesc() &&
                 !graph->getProperty().batchLimit) {
             externalPtr[name] = data->buffer();
         } else if (externalPtr.find(name) != externalPtr.end()) {
@@ -435,6 +440,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() {
                 auto& child = input->second->getChildEdgeAt(i)->getChild();
                 if (child->isConstant())
                     canBeInPlace = false;
+
                 auto* concat = dynamic_cast<MKLDNNConcatNode *>(child.get());
                 if (canBeInPlace && concat && concat->isOptimized())
                     canBeInPlace = false;
@@ -506,6 +512,10 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBatch(int new_batch) {
     }
 
     m_curBatch = new_batch;
+
+    for (const auto& node : graph->GetNodes()) {
+        node->setDynamicBatchLim(new_batch);
+    }
 }
 
 std::vector<InferenceEngine::IVariableStateInternal::Ptr> MKLDNNPlugin::MKLDNNInferRequest::QueryState() {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
index 6d82ccf3e22..a6a64120f00 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
@@ -2,23 +2,24 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <limits>
 #include <vector>
-#include <cmath>
 #include <algorithm>
 #include <numeric>
 #include <unordered_set>
-#include <utility>
 
 #include "utils/general_utils.h"
 
 #include <mkldnn_types.h>
 #include <dnnl_types.h>
+#include <common/memory_desc_wrapper.hpp>
 #include "mkldnn_memory.h"
 #include "mkldnn_extension_utils.h"
 #include "nodes/common/cpu_memcpy.h"
 #include "nodes/common/cpu_convert.h"
 #include "mkldnn/ie_mkldnn.h"
+#include "cpu_shape.h"
+#include "cpu_memory_desc_utils.h"
+#include "mkldnn_extension_utils.h"
 
 using namespace InferenceEngine;
 using namespace mkldnn;
@@ -54,7 +55,7 @@ void MKLDNNMemory::Create(const memory::dims& dims, memory::data_type data_type,
         format = memory::format_tag::any;
     }
 
-    memory::desc desc = MKLDNNMemoryDesc({dims}, data_type, format);
+    memory::desc desc = MKLDNNMemoryDesc(MKLDNNExtensionUtils::convertToSizeVector(dims), data_type, format);
 
     Create(desc, data);
 }
@@ -89,10 +90,16 @@ void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bo
     }
 }
 
+void MKLDNNMemory::Create(const MemoryDesc &desc, const void *data, bool pads_zeroing) {
+    pMemDesc = desc.clone();
+    Create(mkldnn::memory::desc(MemoryDescUtils::convertToMKLDNNMemoryDesc(desc)), data, pads_zeroing);
+}
+
+
 void MKLDNNMemory::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size) {
     if (size != 0)
         IE_ASSERT(size <= output.GetDescriptor().get_size());
-    if (input.GetDesc() == output.GetDesc()) {
+    if (input.GetDescriptor() == output.GetDescriptor()) {
         auto srcPtr = static_cast<uint8_t*>(input.GetPtr());
         auto dstPtr = static_cast<uint8_t*>(output.GetPtr());
 
@@ -118,7 +125,7 @@ void MKLDNNMemory::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &ou
                             MKLDNNExtensionUtils::DataTypeToIEPrecision(output.GetDataType()), input.GetElementsCount());
 
                 MKLDNNMemory tmpMem(output.eng);
-                tmpMem.Create(input.GetDims(), output.GetDataType(), input.GetDesc().getFormat(), tmpBuff.data());
+                tmpMem.Create(input.GetDims(), output.GetDataType(), input.GetMKLDNNDesc().getFormat(), tmpBuff.data());
 
                 pReorder = std::unique_ptr<mkldnn::reorder>(new mkldnn::reorder(tmpMem.GetPrimitive(), output.GetPrimitive()));
                 srcMemoryPtr = tmpMem.prim;
@@ -189,8 +196,8 @@ void MKLDNNMemory::FillZero() {
     memset(dataPtr, 0, GetSize());
 }
 
-memory::format_tag MKLDNNMemory::GetPlainFormat(const memory::dims& dims) {
-    switch (dims.size()) {
+memory::format_tag MKLDNNMemory::GetPlainFormatByRank(size_t rank) {
+    switch (rank) {
         case 0:
         case 1:
             return memory::format_tag::a;
@@ -222,11 +229,6 @@ InferenceEngine::Layout MKLDNNMemory::GetPlainLayout(const memory::dims& dims) {
     }
 }
 
-bool MKLDNNMemory::isConsistant(const mkldnn::memory::dims& dims, mkldnn::memory::format_tag format) {
-    memory::desc attempt(dims, memory::data_type::f32, format, true);
-    return static_cast<bool>(attempt);
-}
-
 Precision MKLDNNMemory::convertToIePrec(memory::data_type dataType) {
     return MKLDNNExtensionUtils::DataTypeToIEPrecision(dataType);
 }
@@ -262,6 +264,42 @@ std::string MKLDNNMemory::formatToString(memory::format_tag fmt) {
     return mkldnn::utils::fmt2str(fmt);
 }
 
+void *MKLDNNMemory::GetPtr() const  {
+    auto ptr = static_cast<uint8_t*>(GetData());
+    auto md = GetDescriptor().data;
+    mkldnn::impl::memory_desc_wrapper wrapper(md);
+    ptr += wrapper.offset0() * wrapper.data_type_size();
+    return ptr;
+}
+
+template<>
+MKLDNNMemoryDesc MKLDNNMemory::GetDescWithType<MKLDNNMemoryDesc, 0, 0>() const {
+    if (auto descPtr = dynamic_cast<const MKLDNNMemoryDesc*>(pMemDesc.get())) {
+        return *descPtr;
+    } else {
+        switch (pMemDesc->getType()) {
+            case (MemoryDescType::Blocked):
+                return MemoryDescUtils::convertToMKLDNNMemoryDesc(*(pMemDesc->as<BlockedMemoryDesc>()));
+            default:
+                IE_THROW() << "Can not convert unsupported memory descriptor";
+        }
+    }
+}
+
+template<>
+BlockedMemoryDesc MKLDNNMemory::GetDescWithType<BlockedMemoryDesc, 0, 0>() const {
+    if (auto descPtr = dynamic_cast<const BlockedMemoryDesc*>(pMemDesc.get())) {
+        return *descPtr;
+    } else {
+        switch (pMemDesc->getType()) {
+            case (MemoryDescType::Mkldnn):
+                return MemoryDescUtils::convertToBlockedDescriptor(*(pMemDesc->as<MKLDNNMemoryDesc>()));
+            default:
+                IE_THROW() << "Can not convert unsupported memory descriptor";
+        }
+    }
+}
+
 bool MKLDNNMemoryDesc::operator==(const MKLDNNMemoryDesc &rhs) const {
     return this->desc == rhs.desc;
 }
@@ -274,51 +312,42 @@ MKLDNNMemoryDesc::operator mkldnn::memory::desc() const {
     return desc;
 }
 
-MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType,
-                                   mkldnn::memory::format_tag format): desc(dims, dataType, mkldnn::memory::format_tag::any) {
+MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::desc& desc) :
+    MemoryDesc(Shape(MKLDNNExtensionUtils::convertToSizeVector(desc.dims())), Mkldnn), desc(desc) {
+    if (desc.data.format_kind == dnnl::impl::format_kind::any)
+        IE_THROW(Unexpected) << "Memory format any is prohibited!";
+}
+
+MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector<size_t>& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format)
+       : MemoryDesc(Shape(_dims), Mkldnn) {
+    if (format == memory::format_tag::any)
+        IE_THROW(Unexpected) << "Memory format any is prohibited!";
     if (format != memory::format_tag::undef) {
-        if (format == memory::format_tag::x && dims.size() == 0) {
+        if (format == memory::format_tag::x && _dims.size() == 0) {
             desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format);
         } else {
-            desc = mkldnn::memory::desc(dims, dataType, format);
+            desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, format);
         }
     } else {
         // Trying to create plain descriptor
         // This WA is needed since memory::format_tag doesn't contain plain tag for tensors with rank > 6D
-        mkldnn::memory::dims strides(dims.size(), 1);
-        for (int d = dims.size() - 2; d >= 0; d--) {
-            strides[d] = strides[d + 1] * dims[d + 1];
+        mkldnn::memory::dims strides(_dims.size(), 1);
+        for (int d = _dims.size() - 2; d >= 0; d--) {
+            strides[d] = strides[d + 1] * _dims[d + 1];
         }
 
-        desc = mkldnn::memory::desc(dims, dataType, strides);
+        desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, strides);
     }
 }
 
-MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType) : desc() {
-    const auto ndims = dims.size();
+MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector<size_t>& _dims, mkldnn::memory::data_type dataType)
+        : MemoryDesc(Shape(_dims), Mkldnn), desc() {
+    const auto ndims = _dims.size();
     mkldnn::memory::dims plain_strides(ndims, 1);
     for (size_t i = 1; i < ndims; i++) {
-        plain_strides[ndims - i -1] = plain_strides[ndims - i] * dims[ndims - i];
-    }
-    desc = {dims, dataType, plain_strides};
-}
-
-size_t MKLDNNMemoryDesc::GetElementSize() const {
-    const auto type = desc.data_type();
-    switch (type) {
-        case memory::data_type::f16 :
-        case memory::data_type::bf16 :
-            return 2;
-        case memory::data_type::f32 :
-        case memory::data_type::s32 :
-            return 4;
-        case memory::data_type::s8 :
-        case memory::data_type::u8 :
-        case memory::data_type::bin :
-            return 1;
-        default:
-            IE_THROW() << "Unknown data type";
+        plain_strides[ndims - i -1] = plain_strides[ndims - i] * _dims[ndims - i];
     }
+    desc = {MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, plain_strides};
 }
 
 static const std::map<int, std::vector<mkldnn::memory::format_tag>> form_tags_by_ndims {
@@ -677,32 +706,92 @@ bool MKLDNNMemoryDesc::isTailCFormat() const {
     return is_tailc_strides;
 }
 
+bool MKLDNNMemoryDesc::blocksExtended() const {
+    for (int i = 0; i < desc.data.ndims; i++) {
+        if (desc.data.dims[i] != desc.data.padded_dims[i])
+            return true;
+    }
+    return false;
+}
+
+size_t MKLDNNMemoryDesc::getMemSizeImp() const {
+    return desc.get_size();
+}
+
+size_t MKLDNNMemoryDesc::getElementOffset(size_t elemNumber) const {
+    mkldnn::impl::memory_desc_wrapper wrapped(desc.data);
+    return wrapped.off_l(elemNumber);
+}
+
+bool MKLDNNMemoryDesc::isCompatible(const MemoryDesc &rhs) const {
+    if (MemoryDescType::Blocked == rhs.getType()) {
+        return isCompatible(*(rhs.as<BlockedMemoryDesc>()));
+    } else if (MemoryDescType::Mkldnn == rhs.getType()) {
+        return isCompatible(*(rhs.as<MKLDNNMemoryDesc>()));
+    } else {
+        return false;
+    }
+}
+
+bool MKLDNNMemoryDesc::isCompatible(const MKLDNNMemoryDesc &rhs) const {
+    using namespace dnnl;
+    using namespace impl;
+    using namespace dnnl::impl::utils;
+    if (this->desc == rhs.desc) {
+        return true;
+    }
+    mkldnn::impl::memory_desc_wrapper wrappedThis(this->desc.data);
+    mkldnn::impl::memory_desc_wrapper wrappedRhs(rhs.desc.data);
+    if (one_of(wrappedThis.format_kind(), format_kind::undef, format_kind::any))
+        return false;
+    if (wrappedThis.is_wino_desc() || wrappedThis.is_rnn_packed_desc()) return false;
+
+    const auto &blk = wrappedThis.blocking_desc();
+    const auto &r_blk = wrappedRhs.blocking_desc();
+
+    int stride_start = wrappedThis.ndims() >0 && wrappedThis.dims()[0] == 1 ? 1 : 0;  //ignore batch axis stride if batch size == 1
+
+    // Here is a slightly modified version of mkldnn::impl::memory_desc_wrapper::similar_to() call able to skip specific strides check.
+    return wrappedThis.ndims() == wrappedRhs.ndims()
+           && wrappedThis.format_kind() == wrappedRhs.format_kind()
+           && wrappedThis.data_type() == wrappedRhs.data_type()
+           && array_cmp(wrappedThis.dims(), wrappedRhs.dims(), wrappedThis.ndims())
+           && array_cmp(blk.strides + stride_start, r_blk.strides + stride_start, wrappedThis.ndims() - stride_start)
+           && blk.inner_nblks == r_blk.inner_nblks
+           && array_cmp(blk.inner_blks, r_blk.inner_blks, blk.inner_nblks)
+           && array_cmp(blk.inner_idxs, r_blk.inner_idxs, blk.inner_nblks)
+           && array_cmp(wrappedThis.padded_dims(), wrappedRhs.padded_dims(), wrappedRhs.ndims())
+           && array_cmp(wrappedThis.padded_offsets(), wrappedRhs.padded_offsets(), wrappedThis.ndims())
+           && dimsEqualWeak(wrappedThis.offset0(), wrappedRhs.offset0());
+}
+
+
 /**
- * Convert to  IE::TensorDesc
+ * Check compatibility with BlockedMemoryDesc
  *
  * mkl:  IOhw_4i16o4i    dims {32, 64, 128, 128}
  *   strides               // the order of outer dims is encoded here
  *   inner_blks   4 16 4
  *   inner_idxs   1  0 1
  *
- * IE tensor desc has more expressive ability. Any oneDNN blocked tensor can be covreted.
- * How to convert into IE representation:
+ * BlockedMemoryDesc desc has more expressive ability.
+ * How to check compatibility with BlockedMemoryDesc representation:
  *    0. Detect a new_outer_order of outer_dims via descending strides.
- *    1. IE strides :  concatenate strides in new_outer_order and inner strides.
- *    2. IE dims    :  concatenate outer dims in new_outer_order with auto padding and inner blocks
- *    3. IE order   :  concatenate new_outer_order and inner_idxs
+ *    1. BlockedMemoryDesc strides :  concatenate strides in new_outer_order and inner strides.
+ *    2. BlockedMemoryDesc dims    :  concatenate outer dims in new_outer_order with auto padding and inner blocks
+ *    3. BlockedMemoryDesc order   :  concatenate new_outer_order and inner_idxs
  */
-MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
+
+bool MKLDNNMemoryDesc::isCompatible(const BlockedMemoryDesc &rhs) const {
+    if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) {
+        return false;
+    }
+
     const auto dims = desc.dims();
 
-    if (desc.data.format_kind == dnnl_format_kind_any)
-        return TensorDesc {
-                MKLDNNMemory::convertToIePrec(desc.data_type()),
-                SizeVector {begin(dims), end(dims)},
-                Layout::ANY};
-
-    if (desc.data.format_kind != dnnl_blocked)
-        IE_THROW() << "Conversion is not possible";
+    if (desc.data.format_kind != dnnl_blocked) {
+        return false;
+    }
 
     const auto &blk_desc = desc.data.format_desc.blocking;
 
@@ -731,174 +820,99 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
     std::iota(outer_order.begin(), outer_order.end(), 0);
     std::sort(outer_order.begin(), outer_order.end(),
               [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) {
-        return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) ||
-               (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]);
-    });
+                  return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) ||
+                         (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]);
+              });
 
-    // IE blocked order
+    // blocked order
     // [new_outer_order] U [inner_idxs]
-    SizeVector ie_blk_order(total_ndims, 0);
-    std::copy(outer_order.begin(), outer_order.end(), ie_blk_order.begin());
-    std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, ie_blk_order.begin() + dims.size());
+    SizeVector blk_order(total_ndims, 0);
+    std::copy(outer_order.begin(), outer_order.end(), blk_order.begin());
+    std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, blk_order.begin() + dims.size());
 
-    // IE blocked strides
-    // [outer_strides via new_outer_order] U [inner_strides]
-    SizeVector ie_blk_strides(total_ndims, 0);
-    std::copy(inner_strides.rbegin(), inner_strides.rend(), ie_blk_strides.rbegin());
-    std::transform(outer_order.begin(), outer_order.end(), ie_blk_strides.begin(),
-                   [&] (size_t i) { return blk_desc.strides[i]; });
+    if (!dimsEqualWeak(blk_order, rhs.getOrder())) {
+        return false;
+    }
 
-    // IE blocked dims
+    //TODO [DS]: undefined offset is also used now as an indicator of undefined strides
+    if (desc.data.offset0 != Shape::UNDEFINED_DIM) {
+        // blocked strides
+        // [outer_strides via new_outer_order] U [inner_strides]
+        SizeVector blk_strides(total_ndims, 0);
+        std::copy(inner_strides.rbegin(), inner_strides.rend(), blk_strides.rbegin());
+        std::transform(outer_order.begin(), outer_order.end(), blk_strides.begin(),
+                       [&](size_t i) { return blk_desc.strides[i]; });
+
+        size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 :
+                Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1
+        if (!dimsEqualWeak(blk_strides, rhs.getStrides(), skipAxis)) {
+            return false;
+        }
+    }
+
+    // blocked dims
     // [dims via new_outer_order with auto pad] U [inner_blk_dims]
-    SizeVector ie_blk_dims(total_ndims, 0);
+    SizeVector blk_dims(total_ndims, 0);
     std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks,
-              ie_blk_dims.end() - blk_desc.inner_nblks);
-    std::transform(outer_order.begin(), outer_order.end(), ie_blk_dims.begin(),
+              blk_dims.end() - blk_desc.inner_nblks);
+    std::transform(outer_order.begin(), outer_order.end(), blk_dims.begin(),
                    [&] (size_t i) { return outer_block_dims[i]; });
 
-    // IE offset padded to data. Same as for oneDNN
-    SizeVector ie_blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims};
-    size_t ie_blk_offset0 = desc.data.offset0;
+    if (!dimsEqualWeak(blk_dims, rhs.getBlockDims())) {
+        return false;
+    }
 
-    // TODO: The tensor desc implementation allow to specify offset_to_data for inner blocked dims.
+    // offset padded to data. Same as for oneDNN
+    SizeVector blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims};
+    // TODO: The BlockedMemoryDesc implementation allow to specify offset_to_data for inner blocked dims.
     //       Which is not obvious behavior. It required offset_to_data.size == total_ndims, so will
     //       fill it with zero.
-    ie_blk_offset_to_data.insert(ie_blk_offset_to_data.end(), inner_ndims, 0);
+    blk_offset_to_data.insert(blk_offset_to_data.end(), inner_ndims, 0);
+    if (!dimsEqualWeak(blk_offset_to_data, rhs.getOffsetPaddingToData())) {
+        return false;
+    }
 
-
-    BlockingDesc ie_blk_desc { ie_blk_dims,
-                               ie_blk_order,
-                               ie_blk_offset0,
-                               ie_blk_offset_to_data,
-                               ie_blk_strides };
-    TensorDesc res {
-        MKLDNNMemory::convertToIePrec(desc.data_type()),
-        SizeVector {begin(dims), end(dims)},
-        ie_blk_desc };
-    // TODO: BLOCKED is the most common layout which covers all other permute layout like NHWC.
-    //       But for some cases we have to specify it more correctly.. may be.. or just keep
-    //       auto detected layout in constructor of TensorDesc.
-    return res;
+    return dimsEqualWeak(desc.data.offset0, rhs.getOffsetPadding());
 }
 
-/**
- * Construct from IE::TensorDesc
- * @param tDesc
- *
- * IE  IOhw_4i16o4i   dims(N) = {32, 64, 128, 128}
- *   blockedDims  {4, 2, 128, 128, 4, 16, 4}                      // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides.
- *   strides      {8388608, 4194304,  32768, 256, 64,  4, 1}      // strides for blockedDims, growing sequence
- *   order        {1, 0,   2,   3, 1,  0, 1}                      // matching to original dims
- *
- *   All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims)
- *
- *   Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of
- *   real dims spliting.
- *      for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4]
- *      but not [0]<=>[4] because it breacke spliting original dims into internal blocked dims
- *   Normalization of representation: Make strides growing but keep layout same as original. Not all
- *   layout allow us to meet normalize form of tensor desc.
- *
- *   Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N]
- */
-MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
-        desc({}, mkldnn::memory::data_type::undef, mkldnn::memory::format_tag::undef) {
-    auto dims = tDesc.getDims();
-
-    // TODO: implicit conversion of dims is no good...
-    if (tDesc.getLayout() == Layout::SCALAR) {
-        desc.data.format_kind = dnnl_blocked;
-        desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision()));
-        desc.data.ndims = 1;
-        desc.data.dims[0] = 1;
-        desc.data.padded_dims[0] = 1;
-        desc.data.format_desc.blocking.strides[0] = 1;
-        desc.data.padded_offsets[0] = 0;
-        desc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding();
-        return;
-    }
-
-    if (tDesc.getLayout() == Layout::ANY) {
-        desc.data.format_kind = dnnl_format_kind_any;
-        desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision()));
-        desc.data.ndims = dims.size();
-        std::copy(dims.begin(), dims.end(), desc.data.dims);
-        std::copy(dims.begin(), dims.end(), desc.data.padded_dims);
-        desc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding();
-        std::fill(desc.data.padded_offsets, desc.data.padded_offsets + dims.size(), 0);
-        return;
-    }
-
-    auto ie_blkdDims = tDesc.getBlockingDesc().getBlockDims();
-    auto ie_order = tDesc.getBlockingDesc().getOrder();
-    auto ie_offsetsToData = tDesc.getBlockingDesc().getOffsetPaddingToData();
-    auto ie_strides = tDesc.getBlockingDesc().getStrides();
-
-    size_t outer_ndims = dims.size();
-    size_t inner_ndims = ie_order.size() - dims.size();
-
-    bool is_descending_strides = true;
-    for (int i = 1; i < ie_strides.size(); i++) {
-        is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]);
-    }
-
-    // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims
-    //       and may be we can achieve correct "descending strides" form which allow conversion.
-    if (!is_descending_strides)
-        IE_THROW() << "Unsupported case for conversion";
-
-    std::vector<size_t> outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension
-    for (size_t i = 0; i < outer_ndims; i++) {
-        outer_order[ie_order[i]] = i;
-    }
-    bool outer_is_correct_permutation_of_n =
-            std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end();
-
-    if (!outer_is_correct_permutation_of_n)
-        IE_THROW() << "Unsupported case for conversion";
-
-    bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1);  // stride 1 - is dense case, 0 - broad casted
-    for (int i = outer_ndims; i < ie_strides.size() - 1; i++) {
-        inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]);
-    }
-
-    if (!inner_block_are_dense)
-        IE_THROW() << "Unsupported case for conversion";
-
-    bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(),
-                                                 [](size_t pad) { return  pad == 0; });
-
-    if (!inner_pad_offsets_is_zero)
-        IE_THROW() << "Unsupported case for conversion";
-
-    // Fill general memory desc fields
-    desc.data.format_kind = dnnl_blocked;
-    desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision()));
-    desc.data.ndims = dims.size();
-    desc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding();
-    std::copy(dims.begin(), dims.end(), desc.data.dims);
-    std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, desc.data.padded_offsets);
-    std::fill(desc.data.padded_dims, desc.data.padded_dims + outer_ndims, 1);
-    for (size_t i = 0; i < ie_order.size(); i++) {
-        auto idx = ie_order[i];
-        desc.data.padded_dims[idx] *= ie_blkdDims[i];
-    }
-
-    // Fill blocking desc
-    auto &dnn_blk_desc = desc.data.format_desc.blocking;
-    dnn_blk_desc.inner_nblks = inner_ndims;
-    std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks);
-    std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs);
-    for (size_t i = 0; i < outer_ndims; i++) {
-        dnn_blk_desc.strides[i] = ie_strides[outer_order[i]];
+bool MKLDNNMemoryDesc::hasLayoutType(LayoutType layoutType) const {
+    switch (layoutType) {
+        case LayoutType::ncsp:
+            return isPlainFormat();
+        case LayoutType::nspc:
+            return isTailCFormat();
+        case LayoutType::nCsp8c:
+            return isBlockedCFormat(8);
+        case LayoutType::nCsp16c:
+            return isBlockedCFormat(16);
+        default:
+            return false;
     }
 }
 
-bool MKLDNNMemoryDesc::blocksExtended() const {
-    for (int i = 0; i < desc.data.ndims; i++) {
-        if (desc.data.dims[i] != desc.data.padded_dims[i])
-            return true;
+std::string MKLDNNMemoryDesc::serializeFormat() const {
+    if (desc.data.format_kind == dnnl_format_kind_wino) {
+        switch (desc.data.format_desc.wino_desc.wino_format) {
+            case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOIoi: return "wino_aaOIoi";
+            case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOio: return "wino_aaOio";
+            case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOBiOo: return "wino_aaOBiOo";
+            case dnnl_wino_memory_format_t::dnnl_wino_wei_OBaaIBOIio: return "wino_OBaaIBOIio";
+            default: return "wino_undef";
+        }
     }
-    return false;
+    auto fmt = getFormat();
+    return mkldnn::utils::fmt2str(fmt);
+}
+
+bool MKLDNNMemoryDesc::isDefined() const {
+    return desc.data.offset0 != Shape::UNDEFINED_DIM;
+}
+
+InferenceEngine::Precision MKLDNNMemoryDesc::getPrecision() const {
+    return MKLDNNExtensionUtils::DataTypeToIEPrecision(desc.data_type());
+}
+
+void MKLDNNMemoryDesc::setPrecision(InferenceEngine::Precision prc) {
+    desc.data.data_type = static_cast<dnnl_data_type_t>(MKLDNNExtensionUtils::IEPrecisionToDataType(prc));
 }
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
index 5de42240dba..d4cf4fc634b 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
@@ -6,13 +6,18 @@
 
 #include "ie_layouts.h"
 #include "mkldnn_dims.h"
+#include "cpu_memory_desc.h"
+#include "mkldnn_extension_utils.h"
 #include <mkldnn.hpp>
 #include <mkldnn_types.h>
+#include <cpu_shape.h>
+#include <cpu_blocked_memory_desc.h>
 
 #include <string>
 #include <functional>
 #include <memory>
 #include <vector>
+#include <ie_precision.hpp>
 
 /**
  * @file contains a concept classes to work with memory/tensor/blob abstractions on plugin level.
@@ -34,20 +39,15 @@ namespace MKLDNNPlugin {
  * Represent internal plugin abstraction of tensor description
  *
  */
-class MKLDNNMemoryDesc {
+class MKLDNNMemoryDesc : public MemoryDesc {
 public:
-    /** Empty constructor - doesn't define any tensor representation */
-    MKLDNNMemoryDesc(): desc() {}
-
     /** Construct a tensor desc with plain layout format (like ND C array) */
-    MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType);
+    MKLDNNMemoryDesc(const std::vector<size_t>& _dims, mkldnn::memory::data_type dataType);
 
     /** Construct a tensor desc with specified layout format tag. Any and Undef is not supported */
-    MKLDNNMemoryDesc(const mkldnn::memory::dims& dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format);
-
-    explicit MKLDNNMemoryDesc(const InferenceEngine::TensorDesc& tDesc);
-    explicit MKLDNNMemoryDesc(const mkldnn::memory::desc& desc): desc(desc) {}
+    MKLDNNMemoryDesc(const std::vector<size_t>& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format);
 
+    explicit MKLDNNMemoryDesc(const mkldnn::memory::desc& desc);
 
     /**
      * Try to define original format tag use on creation
@@ -60,8 +60,6 @@ public:
         return static_cast<mkldnn::memory::data_type>(desc.data.data_type);
     }
 
-    size_t GetElementSize() const;
-
     MKLDNNDims getDims() const {
         return MKLDNNDims(desc.data.dims, desc.data.ndims);
     }
@@ -75,15 +73,38 @@ public:
     bool operator != (const MKLDNNMemoryDesc& rhs) const;
 
     operator mkldnn::memory::desc() const;
-    operator InferenceEngine::TensorDesc() const;
 
+    bool isSame(mkldnn::memory::format_tag fmt) const;
 
+    dnnl_format_kind_t getFormatKind() const {
+        return desc.data.format_kind;
+    }
+
+    std::unique_ptr<MemoryDesc> clone() const override {
+        return MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(*this);
+    }
+
+    bool hasLayoutType(LayoutType layoutType) const override;
+
+    std::string serializeFormat() const override;
+
+    bool isDefined() const override;
+
+    InferenceEngine::Precision getPrecision() const override;
+
+    void setPrecision(InferenceEngine::Precision prc) override;
+
+    bool isCompatible(const MemoryDesc& rhs) const override;
+    bool isCompatible(const BlockedMemoryDesc& rhs) const;
+    bool isCompatible(const MKLDNNMemoryDesc& rhs) const;
+
+private:
+    size_t getElementOffset(size_t elemNumber) const override;
+    size_t getMemSizeImp() const override;
     bool isPlainFormat() const;
     bool isBlockedCFormat(size_t blk_size = UNREACHABLE_DIM) const;
     bool isTailCFormat() const;
 
-    bool isSame(mkldnn::memory::format_tag fmt) const;
-
 private:
     static constexpr size_t UNREACHABLE_DIM = std::numeric_limits<size_t>::max();
     mkldnn::memory::desc desc;
@@ -94,6 +115,12 @@ class MKLDNNMemory {
 public:
     explicit MKLDNNMemory(const mkldnn::engine& eng);
 
+    MKLDNNMemory(const MKLDNNMemory&) = delete;
+    MKLDNNMemory& operator= (const MKLDNNMemory&) = delete;
+
+    MKLDNNMemory(MKLDNNMemory&&) = default;
+    MKLDNNMemory& operator= (MKLDNNMemory&&) = default;
+
     const mkldnn::memory& GetPrimitive() const {
         return *prim;
     }
@@ -106,10 +133,15 @@ public:
         return prim->get_desc();
     }
 
-    const MKLDNNMemoryDesc GetDesc() const {
-        return MKLDNNMemoryDesc {prim->get_desc()};
+    const MemoryDesc& GetDesc() const {
+        return *pMemDesc;
     }
 
+    template <typename T,
+            typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
+            typename std::enable_if<std::is_base_of<MemoryDesc, T>::value, int>::type = 0>
+    T GetDescWithType() const;
+
     /**
      * Return handler of buffer. Real data may starts from some other offset
      * @return
@@ -126,12 +158,7 @@ public:
      * Like a GetData() but offset is applied.
      * @return
      */
-    void* GetPtr() const {
-        auto ptr = static_cast<uint8_t*>(GetData());
-        ptr += GetDescriptor().data.offset0 * GetDesc().GetElementSize();
-        return ptr;
-    }
-
+    void* GetPtr() const;
 
     mkldnn::memory::data_type GetDataType() const {
         return static_cast<mkldnn::memory::data_type>(GetDescriptor().data.data_type);
@@ -145,19 +172,15 @@ public:
         return {std::begin(data.dims), std::begin(data.dims) + data.ndims};
     }
 
-    void Create(const mkldnn::memory::dims& dims, mkldnn::memory::data_type data_type, mkldnn::memory::format_tag format,
-                const void* data = nullptr);
-
-    void Create(const mkldnn::memory::desc& desc, const void* data = nullptr, bool pads_zeroing = true);
+    void Create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true);
 
     // Like a plain format
     void SetData(mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format, const void* data, size_t size, bool ftz = true) const;
     void SetData(const MKLDNNMemory& memory, size_t size = 0, bool ftz = true) const;
     void FillZero();
 
-    static mkldnn::memory::format_tag GetPlainFormat(const mkldnn::memory::dims& dims);
+    static mkldnn::memory::format_tag GetPlainFormatByRank(size_t rank);
     static InferenceEngine::Layout GetPlainLayout(const mkldnn::memory::dims& dims);
-    static bool isConsistant(const mkldnn::memory::dims& dims, mkldnn::memory::format_tag format);
     static mkldnn::memory::format_tag Convert(const InferenceEngine::Layout layout);
     static InferenceEngine::Precision convertToIePrec(mkldnn::memory::data_type dataType);
     static mkldnn::memory::data_type convertToDataType(const InferenceEngine::Precision &precision);
@@ -167,6 +190,17 @@ public:
     static void reorderData(const MKLDNNMemory& input, const MKLDNNMemory& output, size_t size = 0);
 
 private:
+    void Create(const mkldnn::memory::dims& dims, mkldnn::memory::data_type data_type, mkldnn::memory::format_tag format,
+                const void* data = nullptr);
+
+    void Create(const mkldnn::memory::desc& desc, const void* data = nullptr, bool pads_zeroing = true);
+
+    const MKLDNNMemoryDesc GetMKLDNNDesc() const {
+        return MKLDNNMemoryDesc(prim->get_desc());
+    }
+
+private:
+    MemoryDescPtr pMemDesc;
     std::shared_ptr<mkldnn::memory> prim;
     mkldnn::engine eng;
 };
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h
index aaddd7e4575..3cbe768370c 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h
@@ -8,6 +8,7 @@
 #include "blob_factory.hpp"
 #include "mkldnn_memory.h"
 #include "nodes/common/cpu_memcpy.h"
+#include "cpu_memory_desc_utils.h"
 
 #include <string>
 
@@ -17,7 +18,7 @@ class MKLDNNVariableState : public InferenceEngine::IVariableStateInternal {
 public:
     MKLDNNVariableState(std::string name, MKLDNNMemoryPtr storage) :
             InferenceEngine::IVariableStateInternal{name} {
-        state = make_blob_with_precision(MKLDNNMemoryDesc(storage->GetDescriptor()));
+        state = make_blob_with_precision(MemoryDescUtils::convertToTensorDesc(storage->GetDesc()));
         state->allocate();
         cpu_memcpy(state->buffer(), storage->GetData(), storage->GetSize());
     }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
index e46c7a7b0bd..7e29589caf9 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -55,6 +55,7 @@
 #include "utils/general_utils.h"
 #include "utils/cpu_utils.hpp"
 #include "nodes/common/cpu_convert.h"
+#include "cpu_memory_desc_utils.h"
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -72,6 +73,8 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
         { "FullyConnected", FullyConnected },
         { "MaxPool", Pooling },
         { "AvgPool", Pooling },
+        { "AdaptiveMaxPool", AdaptivePooling},
+        { "AdaptiveAvgPool", AdaptivePooling},
         { "Add", Eltwise },
         { "Subtract", Eltwise },
         { "Multiply", Eltwise },
@@ -223,7 +226,9 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
         { "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator},
         { "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage},
         { "ExtractImagePatches", ExtractImagePatches},
-        { "NonMaxSuppressionIEInternal", NonMaxSuppression}
+        { "NonMaxSuppressionIEInternal", NonMaxSuppression},
+        { "MatrixNms", MatrixNms},
+        { "MulticlassNms", MulticlassNms}
 };
 
 Type TypeFromName(const std::string type) {
@@ -248,20 +253,16 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
           type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) {
     algorithm = Algorithm::Undefined;
     fusingPort = -1;
-
     const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name();
-    for (size_t i = 0; i < op->get_input_size(); i++) {
-        if (op->get_input_partial_shape(i).is_dynamic())
-            IE_THROW() << errorPrefix << " has dynamic input shape on " << i << " port, but CPU plug-in supports only static shape";
-    }
-    for (size_t i = 0; i < op->get_output_size(); i++) {
-        if (op->get_output_partial_shape(i).is_dynamic())
-            IE_THROW() << errorPrefix << " has dynamic output shape on " << i << " port, but CPU plug-in supports only static shape";
-    }
 
     for (size_t i = 0; i < op->get_input_size(); i++) {
-        const auto &shape = op->get_input_shape(i);
-        inDims.emplace_back(ngraph::is_scalar(shape) ? ngraph::Shape{1} : shape);
+        const auto &shape = op->get_input_partial_shape(i);
+
+        bool isScalar = false;
+        if (shape.rank().is_static()) {
+            isScalar = shape.rank().get_length() == 0;
+        }
+        inputShapes.emplace_back(isScalar ? ngraph::PartialShape{1} : shape);
         originalInputPrecisions.emplace_back(details::convertPrecision(op->get_input_element_type(i)));
     }
 
@@ -270,8 +271,13 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
             IE_THROW() << "Node with type '" << typeStr << "' and name '" << name << "' does not have any outputs.";
         }
         for (size_t i = 0; i < op->get_output_size(); i++) {
-            const auto &shape = op->get_output_shape(i);
-            outDims.emplace_back(ngraph::is_scalar(shape) ? ngraph::Shape{1} : shape);
+            const auto &shape = op->get_output_partial_shape(i);
+
+            bool isScalar = false;
+            if (shape.rank().is_static()) {
+                isScalar = shape.rank().get_length() == 0;
+            }
+            outputShapes.emplace_back(isScalar ? ngraph::PartialShape{1} : shape);
             originalOutputPrecisions.emplace_back(details::convertPrecision(op->get_output_element_type(i)));
         }
     }
@@ -418,9 +424,10 @@ void MKLDNNNode::selectPreferPrimitiveDescriptor(const std::vector<impl_desc_typ
                         if (inNum < 0 || inNum >= parent_spd->getConfig().outConfs.size()) {
                             inNum = 0;
                         }
-                        if (MKLDNNExtensionUtils::initTensorsAreEqual(
-                                getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[j].desc,
-                                parent_spd->getConfig().outConfs[inNum].desc)) {
+                        auto& curDesc = getSupportedPrimitiveDescriptors()[i].getConfig().inConfs[j].desc;
+                        auto& parentDesc = parent_spd->getConfig().outConfs[inNum].desc;
+
+                        if (curDesc->isCompatible(*parentDesc)) {
                             equalsLocalFormatCount++;
                         }
                     }
@@ -455,9 +462,9 @@ bool MKLDNNNode::canBeInPlace() const {
             return false;
     }
 
-    MKLDNNDims dims = getParentEdgeAt(0)->getDims();
+    auto inShape = getParentEdgeAt(0)->getShape();
     for (size_t cIdx = 0; cIdx < getChildEdges().size(); cIdx++) {
-        if (getChildEdgeAt(cIdx)->getDims() != dims) {
+        if (getChildEdgeAt(cIdx)->getShape() != inShape) {
             return false;
         }
     }
@@ -465,7 +472,7 @@ bool MKLDNNNode::canBeInPlace() const {
 }
 
 void MKLDNNNode::resolveNotAllocatedEdges() {
-    const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor();
+    const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
     if (!selected_pd)
         IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName();
     for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) {
@@ -476,7 +483,7 @@ void MKLDNNNode::resolveNotAllocatedEdges() {
 
         auto * memPtr = reinterpret_cast<char*>(parentEdge->getMemory().GetData());
         parentEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine()));
-        parentEdge->getMemoryPtr()->Create(MKLDNNMemoryDesc(selected_pd->getConfig().inConfs[i].desc), memPtr);
+        parentEdge->getMemoryPtr()->Create(*selected_pd->getConfig().inConfs[i].desc, memPtr);
 
         parentEdge->changeStatus(MKLDNNEdge::Status::Allocated);
     }
@@ -488,7 +495,7 @@ void MKLDNNNode::resolveNotAllocatedEdges() {
 
         auto * memPtr = reinterpret_cast<char*>(childEdge->getMemory().GetData());
         childEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine()));
-        childEdge->getMemoryPtr()->Create(MKLDNNMemoryDesc(selected_pd->getConfig().outConfs[i].desc), memPtr);
+        childEdge->getMemoryPtr()->Create(*selected_pd->getConfig().outConfs[i].desc, memPtr);
 
         childEdge->changeStatus(MKLDNNEdge::Status::Allocated);
     }
@@ -543,14 +550,14 @@ std::string MKLDNNNode::getPrimitiveDescriptorType() {
     // it is mixed precision.
     if (selectedPrimitiveDesc) {
         if (!selectedPrimitiveDesc->getConfig().inConfs.empty()) {
-            if (selectedPrimitiveDesc->getConfig().inConfs[0].desc.getPrecision() != InferenceEngine::Precision::U8) {
-                str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().inConfs[0].desc.getPrecision().name());
+            if (selectedPrimitiveDesc->getConfig().inConfs[0].desc->getPrecision() != InferenceEngine::Precision::U8) {
+                str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().inConfs[0].desc->getPrecision().name());
             } else {
                 str_type += "_I8";
             }
         } else {
-            if (selectedPrimitiveDesc->getConfig().outConfs[0].desc.getPrecision() != InferenceEngine::Precision::U8) {
-                str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().outConfs[0].desc.getPrecision().name());
+            if (selectedPrimitiveDesc->getConfig().outConfs[0].desc->getPrecision() != InferenceEngine::Precision::U8) {
+                str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().outConfs[0].desc->getPrecision().name());
             } else {
                 str_type += "_I8";
             }
@@ -579,7 +586,7 @@ const MKLDNNEdgePtr MKLDNNNode::getChildEdgeAt(size_t idx) const {
 }
 
 const std::vector<MKLDNNEdgePtr> MKLDNNNode::getParentEdgesAtPort(size_t idx) const {
-    if (idx >= inDims.size())
+    if (idx >= inputShapes.size())
         IE_THROW() << "Node " << getName() << " contains less input ports than " << idx;
 
     std::vector<MKLDNNEdgePtr> res;
@@ -593,7 +600,7 @@ const std::vector<MKLDNNEdgePtr> MKLDNNNode::getParentEdgesAtPort(size_t idx) co
 }
 
 const std::vector<MKLDNNEdgePtr> MKLDNNNode::getChildEdgesAtPort(size_t idx) const {
-    if (idx >= outDims.size())
+    if (idx >= outputShapes.size())
         IE_THROW() << "Node " << getName() << " contains less output ports than " << idx;
 
     std::vector<MKLDNNEdgePtr> res;
@@ -607,18 +614,18 @@ const std::vector<MKLDNNEdgePtr> MKLDNNNode::getChildEdgesAtPort(size_t idx) con
 }
 
 
-std::vector<memory::format_tag> MKLDNNNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const {
-    if (dims.ndims() == 0)
+std::vector<memory::format_tag> MKLDNNNode::getAvailableFormatsForDims(const Shape &dims) const {
+    if (dims.getRank() == 0)
         return {memory::format_tag::x};
-    else if (dims.ndims() == 1)
+    else if (dims.getRank() == 1)
         return {memory::format_tag::x};
-    else if (dims.ndims() == 2)
+    else if (dims.getRank() == 2)
         return {memory::format_tag::nc};
-    else if (dims.ndims() == 3)
+    else if (dims.getRank() == 3)
         return {memory::format_tag::tnc, memory::format_tag::ntc};
-    else if (dims.ndims() == 4)
+    else if (dims.getRank() == 4)
         return {memory::format_tag::nchw, memory::format_tag::nChw8c, memory::format_tag::nChw16c};
-    else if (dims.ndims() == 5)
+    else if (dims.getRank() == 5)
         return {memory::format_tag::ncdhw, memory::format_tag::nCdhw8c, memory::format_tag::nCdhw16c};
     return {memory::format_tag::any};
 }
@@ -637,22 +644,22 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() {
         auto itpd = desc.createPrimitiveDescriptorIterator(engine);
 
         while (static_cast<bool>(itpd)) {
-            InferenceEngine::LayerConfig config;
+            NodeConfig config;
             config.dynBatchSupport = true;
             for (size_t i = 0; i < descInputNumbers(desc); i++) {
-                InferenceEngine::DataConfig dataConfig;
-                dataConfig.inPlace = -1;
-                dataConfig.constant = false;
-                dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(itpd, i));
-                config.inConfs.push_back(dataConfig);
+                PortConfig portConfig;
+                portConfig.inPlace = -1;
+                portConfig.constant = false;
+                portConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getSrcMemDesc(itpd, i));
+                config.inConfs.push_back(portConfig);
             }
 
             for (size_t i = 0; i < descOutputNumbers(desc); i++) {
-                InferenceEngine::DataConfig dataConfig;
-                dataConfig.inPlace = canBeInPlace() ? 0 : -1;
-                dataConfig.constant = false;
-                dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(itpd, i));
-                config.outConfs.push_back(dataConfig);
+                PortConfig portConfig;
+                portConfig.inPlace = canBeInPlace() ? 0 : -1;
+                portConfig.constant = false;
+                portConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getDstMemDesc(itpd, i));
+                config.outConfs.push_back(portConfig);
             }
             impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
 
@@ -665,15 +672,12 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() {
 
 void MKLDNNNode::filterSupportedPrimitiveDescriptors() {
     // Compare by partial layout descriptor (without particular strides values)
-    auto areCompatible = [](const TensorDesc& tdesc, mkldnn::memory::format_tag fmt) {
-        TensorDesc fmt_tdesc = MKLDNNMemoryDesc{
-            MKLDNNDims(tdesc.getDims()),
-            MKLDNNExtensionUtils::IEPrecisionToDataType(tdesc.getPrecision()),
-            fmt};
+    auto areCompatible = [](const MemoryDesc& desc, mkldnn::memory::format_tag fmt) -> bool {
+        MKLDNNMemoryDesc fmt_tdesc = MKLDNNMemoryDesc{desc.getShape().getStaticDims(),
+                                                      MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()),
+                                                      fmt};
 
-        auto tmp_partial_tdesc = PartialBlkDesc::extractFrom(fmt_tdesc);
-        auto actual_partial_tdesc = PartialBlkDesc::extractFrom(tdesc);
-        return tmp_partial_tdesc == actual_partial_tdesc;
+        return desc.isCompatible(fmt_tdesc);
     };
 
     if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty()) {
@@ -685,11 +689,11 @@ void MKLDNNNode::filterSupportedPrimitiveDescriptors() {
 
             bool isSuitableDesc = true;
             for (int i = 0; i < inputMemoryFormatsFilter.size(); i++) {
-                const bool matched = areCompatible(config.inConfs[i].desc, inputMemoryFormatsFilter[i]);
+                const bool matched = areCompatible(*config.inConfs[i].desc, inputMemoryFormatsFilter[i]);
                 isSuitableDesc &= matched;
             }
             for (int i = 0; i < outputMemoryFormatsFilter.size(); i++) {
-                const bool matched = areCompatible(config.outConfs[i].desc, outputMemoryFormatsFilter[i]);
+                const bool matched = areCompatible(*config.outConfs[i].desc, outputMemoryFormatsFilter[i]);
                 isSuitableDesc &= matched;
             }
             if (!isSuitableDesc) {
@@ -701,22 +705,22 @@ void MKLDNNNode::filterSupportedPrimitiveDescriptors() {
     }
 }
 
-void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
+void MKLDNNNode::initDescriptor(const NodeConfig& config) {
     auto* selectedPD = getSelectedPrimitiveDescriptor();
     if (!selectedPD) {
         return;
     }
-    std::vector<InferenceEngine::TensorDesc> inDescs;
+    std::vector<const MemoryDesc*> inDescs;
     for (const auto& inConf : config.inConfs)
-        inDescs.push_back(inConf.desc);
-    std::vector<InferenceEngine::TensorDesc> outDescs;
+        inDescs.push_back(inConf.desc.get());
+    std::vector<const MemoryDesc*> outDescs;
     for (const auto& outConf : config.outConfs)
-        outDescs.push_back(outConf.desc);
-    createDescriptor({inDescs}, {outDescs});
+        outDescs.push_back(outConf.desc.get());
+    createDescriptor(inDescs, outDescs);
 
     std::shared_ptr<mkldnn::primitive_attr> attr = initPrimitiveAttr();
 
-    InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
+    NodeConfig rightConfig = selectedPD->getConfig();
     size_t selected_count = 0;
     for (size_t j = 0; j < descs.size(); j++) {
         const auto &desc = descs[j];
@@ -727,10 +731,10 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
             itpd = desc.createPrimitiveDescriptorIterator(engine, *(attr.get()));
         }
         while (static_cast<bool>(itpd)) {
-            InferenceEngine::LayerConfig cfg;
+            NodeConfig cfg;
             cfg.dynBatchSupport = true;
             for (size_t i = 0; i < descInputNumbers(desc); i++) {
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 dataConfig.inPlace = canBeInPlace() ? 0 : -1;
                 dataConfig.constant = false;
                 dataConfig.desc = getSrcMemDesc(itpd, i);
@@ -738,7 +742,7 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
             }
 
             for (size_t i = 0; i < descOutputNumbers(desc); i++) {
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 dataConfig.inPlace = -1;
                 dataConfig.constant = false;
                 dataConfig.desc = getDstMemDesc(itpd, i);
@@ -768,23 +772,21 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
             return;
 
         for (size_t i = 0; i < selectedConfig.inConfs.size(); i++) {
-            if (selectedConfig.inConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY &&
-                !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.inConfs[i].desc, config.inConfs[i].desc))
+            if (!selectedConfig.inConfs[i].desc->isCompatible(*config.inConfs[i].desc))
                 IE_THROW() << "Incorrect descriptor for node: " << getName();
         }
 
         for (size_t i = 0; i < selectedConfig.outConfs.size(); i++) {
-            if (selectedConfig.outConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY &&
-                !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.outConfs[i].desc, config.outConfs[i].desc))
+            if (!selectedConfig.outConfs[i].desc->isCompatible(*config.outConfs[i].desc))
                 IE_THROW() << "Incorrect descriptor for node: " << getName();
         }
         rightConfig = config;
     }
 
-    selectedPD->getConfig() = rightConfig;
+    selectedPD->setConfig(rightConfig);
 }
 
-void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
+void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
     for (size_t i = 0; i < getChildEdges().size(); i++) {
         auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
         if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
@@ -806,7 +808,8 @@ void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::pri
         const auto &internalBlob = internalBlobs[i];
 
         auto create = [&] () {
-            auto newDesc = MKLDNNMemoryDesc(internalBlob->getTensorDesc());
+            // TODO [DS]: internal blobs should be removed or rewritten using Memory object
+            auto newDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(internalBlob->getTensorDesc());
 
             MKLDNNMemory memory{ engine };
             memory.Create(newDesc, internalBlob->buffer());
@@ -947,119 +950,60 @@ const std::vector<impl_desc_type>& MKLDNNNode::getPrimitivesPriority() {
     return implPriorities;
 }
 
-bool MKLDNNNode::isUninitTensorDesc(const InferenceEngine::TensorDesc& desc) const {
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
-        return true;
-
-    if (desc.getBlockingDesc().getOffsetPadding() == std::numeric_limits<size_t>::max())
-        return true;
-
-    for (size_t i = 0; i < desc.getBlockingDesc().getOrder().size(); i++) {
-        if (desc.getBlockingDesc().getOffsetPaddingToData()[i] == std::numeric_limits<size_t>::max() ||
-                desc.getBlockingDesc().getStrides()[i] == std::numeric_limits<size_t>::max())
-            return true;
-    }
-
-    return false;
-}
-
-InferenceEngine::TensorDesc MKLDNNNode::getConfiguredInputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const {
-    if (!isUninitTensorDesc(config.inConfs[idx].desc))
-        return config.inConfs[idx].desc;
-
+std::unique_ptr<MemoryDesc> MKLDNNNode::getDefinedInputDesc(const NodeConfig &config, size_t idx) const {
     int num = getParentEdgeAt(idx)->getInputNum();
     auto *selectedPD = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor();
     if (!selectedPD)
         IE_THROW() << "Cannot get selected primitive descriptor for node: " << getParentEdgeAt(idx)->getParent()->getName();
 
-    if (selectedPD->getConfig().outConfs.size() <= num)
-        num = 0;
+    if (config.inConfs[idx].desc->isDefined()) {
+        return config.inConfs[idx].desc->clone();
+    }
 
     if (config.inConfs[idx].inPlace >= 0) {
-        return getConfiguredOutputDesc(config, static_cast<size_t>(config.inConfs[idx].inPlace));
+        return getDefinedOutputDesc(config, static_cast<size_t>(config.inConfs[idx].inPlace));
     }
 
     if (num >= 0) {
         auto parentConf = selectedPD->getConfig().outConfs[num];
-        parentConf.desc.setPrecision(config.inConfs[idx].desc.getPrecision());
-        if (isUninitTensorDesc(parentConf.desc) && parentConf.inPlace >= 0)
+        parentConf.desc->setPrecision(config.inConfs[idx].desc->getPrecision());
+        if (!parentConf.desc->isDefined() && parentConf.inPlace >= 0)
             getParentEdgeAt(idx)->getParent()->initOptimalPrimitiveDescriptor();
         parentConf = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num];
-        if (!isUninitTensorDesc(parentConf.desc) &&
-            MKLDNNExtensionUtils::initTensorsAreEqual(parentConf.desc, config.inConfs[idx].desc)) {
-            return parentConf.desc;
-        }
-
-        if (config.inConfs[idx].desc.getLayout() == InferenceEngine::Layout::ANY &&
-            parentConf.desc.getLayout() != InferenceEngine::Layout::ANY) {
-            return InferenceEngine::TensorDesc(parentConf.desc.getPrecision(),
-                                               parentConf.desc.getDims(), {
-                                                       parentConf.desc.getBlockingDesc().getBlockDims(),
-                                                       parentConf.desc.getBlockingDesc().getOrder()
-                                               });
+        if (parentConf.desc->isDefined() && parentConf.desc->isCompatible(*config.inConfs[idx].desc)) {
+            return parentConf.desc->clone();
         }
     }
 
-    if (config.inConfs[idx].desc.getLayout() != InferenceEngine::Layout::ANY) {
-        return InferenceEngine::TensorDesc(config.inConfs[idx].desc.getPrecision(),
-                                           config.inConfs[idx].desc.getDims(), {
-                                                   config.inConfs[idx].desc.getBlockingDesc().getBlockDims(),
-                                                   config.inConfs[idx].desc.getBlockingDesc().getOrder()
-                                           });
-    }
-
-    return InferenceEngine::TensorDesc(config.inConfs[idx].desc.getPrecision(),
-                                       config.inConfs[idx].desc.getDims(),
-                                       InferenceEngine::TensorDesc::getLayoutByDims(config.inConfs[idx].desc.getDims()));
+    return MemoryDescUtils::resetOffset(config.inConfs[idx].desc.get());
 }
 
-InferenceEngine::TensorDesc MKLDNNNode::getConfiguredOutputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const {
-    if (!isUninitTensorDesc(config.outConfs[idx].desc))
-        return config.outConfs[idx].desc;
-
+std::unique_ptr<MemoryDesc> MKLDNNNode::getDefinedOutputDesc(const NodeConfig &config, size_t idx) const {
     int num = getChildEdgeAt(idx)->getOutputNum();
     auto *selectedPD = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor();
     if (!selectedPD)
         IE_THROW() << "Cannot get selected primitive descriptor for node: " << getChildEdgeAt(idx)->getChild()->getName();
 
-    if (selectedPD->getConfig().inConfs.size() <= num)
-        num = 0;
+    if (config.outConfs[idx].desc->isDefined()) {
+        return config.outConfs[idx].desc->clone();
+    }
 
     if (config.outConfs[idx].inPlace >= 0) {
-        return getConfiguredInputDesc(config, static_cast<size_t>(config.outConfs[idx].inPlace));
+        return getDefinedInputDesc(config, static_cast<size_t>(config.outConfs[idx].inPlace));
     }
 
     if (num >= 0) {
         auto childConf = selectedPD->getConfig().inConfs[num];
-        childConf.desc.setPrecision(config.outConfs[idx].desc.getPrecision());
-        if (isUninitTensorDesc(childConf.desc) && childConf.inPlace >= 0)
+        childConf.desc->setPrecision(config.outConfs[idx].desc->getPrecision());
+        if (!childConf.desc->isDefined() && childConf.inPlace >= 0)
             getChildEdgeAt(idx)->getChild()->initOptimalPrimitiveDescriptor();
         childConf = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num];
-        if (!isUninitTensorDesc(childConf.desc) &&
-            MKLDNNExtensionUtils::initTensorsAreEqual(childConf.desc, config.outConfs[idx].desc)) {
-            return childConf.desc;
-        }
-        if (config.outConfs[idx].desc.getLayout() == InferenceEngine::Layout::ANY &&
-            childConf.desc.getLayout() != InferenceEngine::Layout::ANY) {
-            return InferenceEngine::TensorDesc(childConf.desc.getPrecision(),
-                                               childConf.desc.getDims(), {
-                                                       childConf.desc.getBlockingDesc().getBlockDims(),
-                                                       childConf.desc.getBlockingDesc().getOrder()
-                                               });
+        if (childConf.desc->isDefined() && childConf.desc->isCompatible(*config.outConfs[idx].desc)) {
+            return childConf.desc->clone();
         }
     }
 
-    if (config.outConfs[idx].desc.getLayout() != InferenceEngine::Layout::ANY) {
-        return InferenceEngine::TensorDesc(config.outConfs[idx].desc.getPrecision(),
-                                                                config.outConfs[idx].desc.getDims(), {
-                                                                        config.outConfs[idx].desc.getBlockingDesc().getBlockDims(),
-                                                                        config.outConfs[idx].desc.getBlockingDesc().getOrder()
-                                                                });
-    }
-
-    return InferenceEngine::TensorDesc(config.outConfs[idx].desc.getPrecision(),
-                                       config.outConfs[idx].desc.getDims(),
-                                       InferenceEngine::TensorDesc::getLayoutByDims(config.outConfs[idx].desc.getDims()));
+    return MemoryDescUtils::resetOffset(config.outConfs[idx].desc.get());
 }
 
 void MKLDNNNode::initOptimalPrimitiveDescriptor() {
@@ -1067,17 +1011,13 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() {
     if (selected_pd == nullptr)
         IE_THROW() << "Preferable primitive descriptor is not set.";
     auto config = selected_pd->getConfig();
-    if (!isInitConfig(config)) {
+    if (!isConfigDefined(config)) {
         for (size_t i = 0; i < config.inConfs.size(); i++) {
-            // TensorDescriptor constructor which is called inside getConfiguredInputDesc incorrectly computes offset field.
-            // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
-            config.inConfs[i].desc = MKLDNNMemoryDesc(getConfiguredInputDesc(config, i));
+            config.inConfs[i].desc = getDefinedInputDesc(config, i);
         }
 
         for (size_t i = 0; i < config.outConfs.size(); i++) {
-            // TensorDescriptor constructor which is called inside getConfiguredOutputDesc incorrectly computes offset field.
-            // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
-            config.outConfs[i].desc = MKLDNNMemoryDesc(getConfiguredOutputDesc(config, i));
+            config.outConfs[i].desc = getDefinedOutputDesc(config, i);
         }
 
         initDescriptor(config);
@@ -1086,38 +1026,22 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() {
     }
 }
 
-bool MKLDNNNode::isInitConfig(const InferenceEngine::LayerConfig& config) const {
+bool MKLDNNNode::isConfigDefined(const NodeConfig &config) const {
     for (const auto& configs : {config.inConfs, config.outConfs}) {
         for (const auto &dc : configs) {
-            if (isUninitTensorDesc(dc.desc))
+            if (!dc.desc->isDefined())
                 return false;
         }
     }
     return true;
 }
 
-MKLDNNMemoryDesc MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
-    InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx));
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getLayout()));
-    else
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getBlockingDesc()));
+std::unique_ptr<MKLDNNMemoryDesc> MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
+    return MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(primitive_desc_it.src_desc(idx));
 }
 
-MKLDNNMemoryDesc MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
-    InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx));
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getLayout()));
-    else
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getBlockingDesc()));
+std::unique_ptr<MKLDNNMemoryDesc> MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
+    return MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(primitive_desc_it.dst_desc(idx));
 }
 
 int MKLDNNNode::batchToProcess() {
@@ -1126,15 +1050,15 @@ int MKLDNNNode::batchToProcess() {
 
 int MKLDNNNode::getMaxBatch() {
     // FIXME: batch != 0 dims number
-    if (!inDims.empty()) {
-        if (inDims[0].ndims())
-            return inDims[0][0];
+    if (!inputShapes.empty()) {
+        if (inputShapes[0].getRank())
+            return static_cast<int>(inputShapes[0].getStaticDims()[0]);
         else
             return 1;
     }
-    if (!outDims.empty() && outDims[0].ndims()) {
-        if (outDims[0].ndims())
-            return outDims[0][0];
+    if (!outputShapes.empty()) {
+        if (outputShapes[0].getRank())
+            return static_cast<int>(outputShapes[0].getStaticDims()[0]);
         else
             return 1;
     }
@@ -1323,12 +1247,12 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
     }
 
     const auto isBroadcastableToDataInput = [&]() {
-        const auto dataShape = getParentEdgeAt(fusingPort)->getDims().ToSizeVector();
+        const auto dataShape = getParentEdgeAt(fusingPort)->getShape().getStaticDims();
         for (size_t i = 0; i < getParentEdges().size(); i++) {
             if (i == fusingPort)
                 continue;
-            auto weightShape = getParentEdgeAt(i)->getDims().ToSizeVector();
-            if (!isPerTensorOrPerChannelBroadcastable(dataShape, weightShape))
+            auto weightShape = getParentEdgeAt(i)->getShape().getStaticDims();
+            if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape))
                 return false;
         }
         return true;
@@ -1351,7 +1275,11 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
 
 bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
     if (node->getType() == FakeQuantize) {
-        return node->getAlgorithm() != FQBinarization;
+        bool ret = node->getAlgorithm() != FQBinarization;
+        for (size_t i = 1; i < node->getParentEdges().size(); i++) {
+            ret &= node->getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() == 1;
+        }
+        return ret;
     } else if (node->getType() == Eltwise) {
         return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
                                             EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
@@ -1396,7 +1324,7 @@ void MKLDNNNode::fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector<f
         IE_THROW() << "Can't fill scale and shifts for node: " << getName() << " with type: " << NameFromType(getType());
     }
 
-    const size_t bufferSize = static_cast<size_t>(outDims[0][outDims[0].ndims() > 1 ? 1 : 0]);
+    const size_t bufferSize = static_cast<size_t>(outputShapes[0].getStaticDims()[outputShapes[0].getRank() > 1 ? 1 : 0]);
     if (align == -1) {
         align = bufferSize;
     }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
index 29618d51fdb..77dab59e904 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -26,8 +26,10 @@
 #include <ngraph/ops.hpp>
 #include <ngraph/node.hpp>
 #include <ie_precision.hpp>
-#include <nodes/common/tensor_desc_creator.h>
+#include <nodes/common/blocked_desc_creator.h>
 #include "cpu_types.h"
+#include "cpu_shape.h"
+#include "cpu_memory_desc.h"
 
 namespace MKLDNNPlugin {
 
@@ -54,6 +56,8 @@ static std::string NameFromType(Type type) {
             return "Lrn";
         case Pooling:
             return "Pooling";
+        case AdaptivePooling:
+            return "AdaptivePooling";
         case FullyConnected:
             return "FullyConnected";
         case MatMul:
@@ -192,89 +196,101 @@ static std::string NameFromType(Type type) {
             return "ExtractImagePatches";
         case NonMaxSuppression:
             return "NonMaxSuppression";
+        case MatrixNms:
+            return "MatrixNms";
+        case MulticlassNms:
+            return "MulticlassNms";
         default:
             return "Unknown";
     }
 }
 
-class PrimitiveDescInfo {
+class PortConfigurator {
 public:
-    PrimitiveDescInfo(const InferenceEngine::LayerConfig& conf, impl_desc_type type): config(conf) {
+    PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc, const Shape& shape,
+                     bool constant = false, int inPlace = -1) :
+            blockedDescCreator(getBlockedDescCreator(blockedDescType)), prc(prc), shape(shape), constant(constant), inPlace(inPlace) {}
+
+    PortConfigurator(MKLDNNPlugin::LayoutType blockedDescType, InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED,
+                     bool constant = false, int inPlace = -1) :
+            blockedDescCreator(getBlockedDescCreator(blockedDescType)), prc(prc), constant(constant), inPlace(inPlace) {}
+
+    MKLDNNPlugin::BlockedDescCreator::CreatorConstPtr blockedDescCreator;
+    const InferenceEngine::Precision prc;
+    const Shape shape;
+    bool constant = false;
+    int inPlace = -1;
+
+private:
+    static MKLDNNPlugin::BlockedDescCreator::CreatorConstPtr getBlockedDescCreator(MKLDNNPlugin::LayoutType blockedDescType) {
+        auto& creators = MKLDNNPlugin::BlockedDescCreator::getCommonCreators();
+        if (creators.find(blockedDescType) == creators.end()) {
+            IE_THROW() << "Cannot find tensor descriptor creator";
+        }
+        return creators.at(blockedDescType);
+    }
+};
+
+struct PortConfig {
+    PortConfig() = default;
+
+    PortConfig(const PortConfig& rhs) {
+        this->constant = rhs.constant;
+        this->inPlace = rhs.inPlace;
+        if (rhs.desc) {
+            this->desc = rhs.desc->clone();
+        }
+    }
+
+    PortConfig& operator=(const PortConfig& rhs) {
+        this->constant = rhs.constant;
+        this->inPlace = rhs.inPlace;
+        if (rhs.desc) {
+            this->desc = rhs.desc->clone();
+        }
+        return *this;
+    }
+
+    PortConfig(PortConfig&& rhs) = default;
+    PortConfig& operator=(PortConfig&& rhs) = default;
+
+    // TODO [DS]: better to make private and const
+    bool constant = false;
+    int inPlace = -1;
+    std::unique_ptr<MemoryDesc> desc;
+};
+
+struct NodeConfig {
+    bool dynBatchSupport = false;
+    std::vector<PortConfig> inConfs;
+    std::vector<PortConfig> outConfs;
+};
+
+class NodeDesc {
+public:
+    NodeDesc(const NodeConfig& conf, impl_desc_type type): config(conf) {
         implementationType = type;
     }
 
-    PrimitiveDescInfo(const InferenceEngine::LayerConfig& conf, impl_desc_type type, const std::vector<mkldnn::memory::format_tag>& outFmts): config(conf) {
-        implementationType = type;
-        outputLayouts = outFmts;
-    }
-
-    PrimitiveDescInfo(const InferenceEngine::LayerConfig& conf, impl_desc_type type, mkldnn::memory::format_tag outFmt): config(conf) {
-        implementationType = type;
-
-        setOutputLayouts(outFmt);
-    }
-
-    PrimitiveDescInfo(const PrimitiveDescInfo &descInfo) = default;
-    PrimitiveDescInfo(PrimitiveDescInfo &&descInfo) = default;
-
-    PrimitiveDescInfo &operator=(const PrimitiveDescInfo &descInfo) = default;
-
-    const InferenceEngine::LayerConfig getConfig() const {
+    const NodeConfig& getConfig() const {
         return config;
     }
-    InferenceEngine::LayerConfig& getConfig() {
-        return config;
+
+    void setConfig(const NodeConfig& config) {
+        this->config = config;
     }
 
     impl_desc_type getImplementationType() const {
         return implementationType;
     }
 
-    const std::vector<mkldnn::memory::format_tag>& getOutputLayouts() const {
-        return outputLayouts;
-    }
-
     void setImplementationType(impl_desc_type type) {
         implementationType = type;
     }
 
-    void setOutputLayouts(mkldnn::memory::format_tag outFmt) {
-        outputLayouts.clear();
-
-        for (int i = 0; i < config.outConfs.size(); i++) {
-            outputLayouts.push_back(outFmt);
-        }
-    }
-
 private:
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     impl_desc_type implementationType;
-    std::vector<mkldnn::memory::format_tag> outputLayouts;
-};
-
-class DataConfigurator {
-public:
-    DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, InferenceEngine::Precision prc, const InferenceEngine::SizeVector& shape,
-                     bool constant = false, int inplace = -1) :
-            tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), shape(shape), constant(constant), inplace(inplace) {}
-
-    DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED,
-                     bool constant = false, int inplace = -1) :
-            tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), shape({}), constant(constant), inplace(inplace) {}
-
-    const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr tensorDescCreator;
-    const InferenceEngine::Precision prc = InferenceEngine::Precision::UNSPECIFIED;
-    const InferenceEngine::SizeVector shape;
-    const bool constant = false;
-    const int inplace = -1;
-private:
-    static MKLDNNPlugin::TensorDescCreator::CreatorConstPtr getTensorDescCreator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType) {
-        auto& creators = MKLDNNPlugin::TensorDescCreator::getCommonCreators();
-        if (creators.find(tensorDescType) == creators.end()) {
-            IE_THROW() << "Cannot find tensor descriptor creator";
-        }
-        return creators.at(tensorDescType);
-    }
 };
 
 class MKLDNNNode {
@@ -420,18 +436,18 @@ public:
         return type;
     }
 
-    const std::vector<PrimitiveDescInfo>& getSupportedPrimitiveDescriptors() const {
+    const std::vector<NodeDesc>& getSupportedPrimitiveDescriptors() const {
         return supportedPrimitiveDescriptors;
     }
 
-    inline const PrimitiveDescInfo* getSelectedPrimitiveDescriptor() const {
+    inline const NodeDesc* getSelectedPrimitiveDescriptor() const {
         if (selectedPrimitiveDescriptorIndex < 0 ||
             selectedPrimitiveDescriptorIndex >= supportedPrimitiveDescriptors.size())
             return nullptr;
         return &supportedPrimitiveDescriptors[selectedPrimitiveDescriptorIndex];
     }
 
-    inline PrimitiveDescInfo* getSelectedPrimitiveDescriptor() {
+    inline NodeDesc* getSelectedPrimitiveDescriptor() {
         if (selectedPrimitiveDescriptorIndex < 0 ||
             selectedPrimitiveDescriptorIndex >= supportedPrimitiveDescriptors.size())
             return nullptr;
@@ -467,9 +483,10 @@ public:
     virtual void initOptimalPrimitiveDescriptor();
 
     virtual void getSupportedDescriptors() = 0;
-    virtual void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
-                                  const std::vector<InferenceEngine::TensorDesc>& outputDesc) {}
-    virtual void initDescriptor(const InferenceEngine::LayerConfig& config);
+    // TODO [DS]: Should be moved into Node derivative class
+    virtual void createDescriptor(const std::vector<const MemoryDesc*>& inputDesc,
+                                  const std::vector<const MemoryDesc*>& outputDesc) {}
+    virtual void initDescriptor(const NodeConfig& config);
     virtual bool created() const = 0;
     virtual bool created(const MKLDNNExtensionManager::Ptr& extMgr) {
         return created();
@@ -483,23 +500,19 @@ public:
 
     template <class PD, class D, typename FPD = bool>
     PD createPrimitiveDescriptor(const mkldnn::primitive_attr &attr = mkldnn::primitive_attr()) {
-        auto descsEqual = [](const std::vector<InferenceEngine::TensorDesc>& srcDescs,
-                               const std::vector<InferenceEngine::DataConfig>& selectedDescs) {
+        auto descsCompatible = [](const std::vector<MemoryDescPtr>& srcDescs,
+                               const std::vector<PortConfig>& selectedDescs) {
             if (srcDescs.empty() && selectedDescs.empty())
                 return true;
             if (srcDescs.empty() || selectedDescs.empty())
                 return false;
             for (size_t i = 0; i < srcDescs.size() && i < selectedDescs.size(); i++) {
-                if (!(srcDescs[i].getBlockingDesc() == selectedDescs[i].desc.getBlockingDesc() &&
-                      srcDescs[i].getPrecision() == selectedDescs[i].desc.getPrecision() &&
-                      srcDescs[i].getDims() == selectedDescs[i].desc.getDims()) &&
-                      srcDescs[i].getLayout() != InferenceEngine::Layout::ANY)
-                    return false;
+                return srcDescs[i]->isCompatible(*selectedDescs[i].desc);
             }
             return true;
         };
 
-        const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor();
+        const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
         if (selected_pd == nullptr)
             IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
 
@@ -507,19 +520,19 @@ public:
             auto itpd = desc.createPrimitiveDescriptorIterator(engine, attr);
 
             while (static_cast<bool>(itpd))  {
-                std::vector<InferenceEngine::TensorDesc> srcDescs;
+                std::vector<MemoryDescPtr> srcDescs;
                 for (size_t i = 0; i < descInputNumbers(desc); i++)
                     srcDescs.push_back(getSrcMemDesc(itpd, i));
 
-                std::vector<InferenceEngine::TensorDesc> dstDescs;
+                std::vector<MemoryDescPtr> dstDescs;
                 for (size_t i = 0; i < descOutputNumbers(desc); i++)
                     dstDescs.push_back(getDstMemDesc(itpd, i));
 
                 impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
 
                 if (impl_type == selected_pd->getImplementationType() &&
-                    descsEqual(srcDescs, selected_pd->getConfig().inConfs) &&
-                    descsEqual(dstDescs, selected_pd->getConfig().outConfs)) {
+                    descsCompatible(srcDescs, selected_pd->getConfig().inConfs) &&
+                    descsCompatible(dstDescs, selected_pd->getConfig().outConfs)) {
                     prepareMemory(selected_pd, itpd);
                     PD prim_desc = createPd<PD, D, FPD>(desc);
                     return {itpd.get()};
@@ -646,10 +659,10 @@ protected:
     virtual int getMaxBatch();
 
 
-    virtual InferenceEngine::TensorDesc getConfiguredInputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const;
-    virtual InferenceEngine::TensorDesc getConfiguredOutputDesc(const InferenceEngine::LayerConfig& config, size_t idx) const;
-    virtual MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx);
-    virtual MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx);
+    virtual std::unique_ptr<MemoryDesc> getDefinedInputDesc(const NodeConfig &config, size_t idx) const;
+    virtual std::unique_ptr<MemoryDesc> getDefinedOutputDesc(const NodeConfig &config, size_t idx) const;
+    virtual std::unique_ptr<MKLDNNMemoryDesc> getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx);
+    virtual std::unique_ptr<MKLDNNMemoryDesc> getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx);
 
     /**
      * @brief Appends new item into ops list with the information on how the node should be executed as post operation.
@@ -663,8 +676,8 @@ protected:
             GetPrimitiveMemoryFormatFunc;
     std::vector<GetPrimitiveMemoryFormatFunc> internalBlobDesc;
 
-    std::vector<MKLDNNDims> inDims;
-    std::vector<MKLDNNDims> outDims;
+    std::vector<Shape> inputShapes;
+    std::vector<Shape> outputShapes;
 
     std::vector <MKLDNNNodePtr> fusedWith;
     std::vector <MKLDNNNodePtr> mergedWith;
@@ -689,12 +702,11 @@ protected:
     ConstantType constant = ConstantType::Unknown;
     std::vector<InferenceEngine::Blob::Ptr> internalBlobs;
     std::vector<MKLDNNMemoryPtr> internalBlobMemory;
-    std::vector<PrimitiveDescInfo> supportedPrimitiveDescriptors;
+    std::vector<NodeDesc> supportedPrimitiveDescriptors;
     std::unordered_map<int, mkldnn::memory> primArgs;
     MKLDNNPrimitive prim;
     std::vector<MKLDNNDescriptor> descs;
 
-    InferenceEngine::Blob::Ptr ext_scales;
     MKLDNNWeightsSharing::Ptr weightCache;
 
     Algorithm algorithm = Algorithm::Undefined;
@@ -706,14 +718,13 @@ protected:
     friend class MKLDNNGraphOptimizer;
     friend class NodeDumper;
 
-    bool isUninitTensorDesc(const InferenceEngine::TensorDesc& desc) const;
-    bool isInitConfig(const InferenceEngine::LayerConfig& config) const;
     void selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& priority, bool ignoreConstInputs);
+    bool isConfigDefined(const NodeConfig &config) const;
     virtual bool canBeInPlace() const;
 
     virtual const std::vector<impl_desc_type>& getPrimitivesPriority();
 
-    virtual std::vector<mkldnn::memory::format_tag> getAvailableFormatsForDims(const MKLDNNDims& dims) const;
+    virtual std::vector<mkldnn::memory::format_tag> getAvailableFormatsForDims(const Shape& dims) const;
     int batchToProcess();
 
     InferenceEngine::Layout getWeightsLayoutByDims(InferenceEngine::SizeVector dims, bool isGrouped);
@@ -730,42 +741,39 @@ protected:
      */
     virtual std::vector<InferenceEngine::Precision> getOutputPrecisions() const;
 
-    void addSupportedPrimDesc(const std::vector<DataConfigurator>& inDataConfigurators,
-                              const std::vector<DataConfigurator>& outDataConfigurators,
+    void addSupportedPrimDesc(const std::vector<PortConfigurator>& inPortConfigs,
+                              const std::vector<PortConfigurator>& outPortConfigs,
                               impl_desc_type implType,
                               bool dynBatchSupport = false) {
-        auto fill_port = [] (const DataConfigurator& dataConfigurator, const InferenceEngine::SizeVector& dims,
-                             InferenceEngine::Precision prc, std::vector<InferenceEngine::DataConfig>& port) -> bool {
-            // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by tensorDescCreator.
-            // This should be suitable for major of scenarios since almost all nodes add `ncsp` tensorDescCreator which supports any shape rank.
-            if (dims.size() < dataConfigurator.tensorDescCreator->getMinimalRank())
+        auto fill_port = [] (const PortConfigurator& portConfigurator, const Shape& shape,
+                             InferenceEngine::Precision prc, std::vector<PortConfig>& port) -> bool {
+            // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by blockedDescCreator.
+            // This should be suitable for major of scenarios since almost all nodes add `ncsp` blockedDescCreator which supports any shape rank.
+            if (shape.getRank() < portConfigurator.blockedDescCreator->getMinimalRank())
                 return false;
 
-            InferenceEngine::DataConfig dataConfig;
-            dataConfig.inPlace = dataConfigurator.inplace;
-            dataConfig.constant = dataConfigurator.constant;
+            PortConfig portConfig;
+            portConfig.inPlace = portConfigurator.inPlace;
+            portConfig.constant = portConfigurator.constant;
+            portConfig.desc = portConfigurator.blockedDescCreator->createUniqueDesc(prc, shape.getStaticDims());
 
-            dataConfig.desc = dataConfigurator.tensorDescCreator->createDesc(prc, dims);
-
-            port.push_back(dataConfig);
+            port.push_back(std::move(portConfig));
 
             return true;
         };
 
-        InferenceEngine::LayerConfig config;
-        for (size_t i = 0; i < inDataConfigurators.size(); i++) {
-            auto dims = inDataConfigurators[i].shape.empty() ? getParentEdgesAtPort(i)[0]->getDims().ToSizeVector() : inDataConfigurators[i].shape;
-            auto prc = inDataConfigurators[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalInputPrecisionAtPort(i)
-                                                                                             : inDataConfigurators[i].prc;
-            if (!fill_port(inDataConfigurators[i], dims, prc, config.inConfs))
+        NodeConfig config;
+        for (size_t i = 0; i < inPortConfigs.size(); i++) {
+            auto shape = inPortConfigs[i].shape.getRank() == 0 ? getParentEdgesAtPort(i)[0]->getShape() : inPortConfigs[i].shape;
+            auto prc = inPortConfigs[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalInputPrecisionAtPort(i) : inPortConfigs[i].prc;
+            if (!fill_port(inPortConfigs[i], shape, prc, config.inConfs))
                 return;
         }
 
-        for (size_t i = 0; i < outDataConfigurators.size(); i++) {
-            auto dims = outDataConfigurators[i].shape.empty() ? getChildEdgesAtPort(i)[0]->getDims().ToSizeVector() : outDataConfigurators[i].shape;
-            auto prc = outDataConfigurators[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalOutputPrecisionAtPort(i)
-                                                                                              : outDataConfigurators[i].prc;
-            if (!fill_port(outDataConfigurators[i], dims, prc, config.outConfs))
+        for (size_t i = 0; i < outPortConfigs.size(); i++) {
+            auto dims = outPortConfigs[i].shape.getRank() == 0 ? getChildEdgesAtPort(i)[0]->getShape() : outPortConfigs[i].shape;
+            auto prc = outPortConfigs[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalOutputPrecisionAtPort(i) : outPortConfigs[i].prc;
+            if (!fill_port(outPortConfigs[i], dims, prc, config.outConfs))
                 return;
         }
 
@@ -811,7 +819,7 @@ private:
         return PD(*selected_desc_ptr, engine);
     }
 
-    void prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd);
+    void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
     enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
     ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
 };
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index 59a29ebf40a..c7907aa5569 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -57,7 +57,10 @@
 #include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
 #include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
 #include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
+#include <transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp>
+#include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
 #include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
+#include <transformations/smart_reshape/matmul_sr.hpp>
 #include <transformations/convert_precision.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/rt_info/fused_names_attribute.hpp>
@@ -167,6 +170,9 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
     manager.register_pass<ngraph::pass::ConvertNMS3ToNMS5>();
     manager.register_pass<ngraph::pass::ConvertNMS4ToNMS5>();
     manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
+    manager.register_pass<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
+    manager.register_pass<ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>();
+    manager.register_pass<ngraph::pass::TransposeMatMul>();
     manager.register_pass<ngraph::pass::ConstantFolding>();
 
     if (useLpt) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/base.hpp b/inference-engine/src/mkldnn_plugin/nodes/base.hpp
deleted file mode 100644
index b611c8eb0a4..00000000000
--- a/inference-engine/src/mkldnn_plugin/nodes/base.hpp
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <ie_iextension.h>
-#include "nodes/list.hpp"
-#include "common/tensor_desc_creator.h"
-#include "ngraph/descriptor/tensor.hpp"
-#include <ie_ngraph_utils.hpp>
-#include "cpu_types.h"
-
-#include <string>
-#include <vector>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class ExtLayerBase: public ILayerExecImpl {
-public:
-    StatusCode getSupportedConfigurations(std::vector<LayerConfig>& conf, ResponseDesc *resp) noexcept override {
-        if (!errorMsg.empty()) {
-            if (resp) {
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return GENERAL_ERROR;
-        }
-        conf = confs;
-        return OK;
-    }
-
-    StatusCode init(LayerConfig& config, ResponseDesc *resp) noexcept override {
-        for (auto& input : config.inConfs) {
-            for (auto& offset : input.desc.getBlockingDesc().getOffsetPaddingToData()) {
-                if (offset) {
-                    return GENERAL_ERROR;
-                }
-            }
-            if (input.desc.getBlockingDesc().getOffsetPadding()) {
-                return GENERAL_ERROR;
-            }
-        }
-        for (auto& output : config.outConfs) {
-            for (auto& offset : output.desc.getBlockingDesc().getOffsetPaddingToData()) {
-                if (offset) {
-                    return GENERAL_ERROR;
-                }
-            }
-            if (output.desc.getBlockingDesc().getOffsetPadding()) {
-                return GENERAL_ERROR;
-            }
-        }
-        return OK;
-    }
-
-protected:
-    MKLDNNPlugin::Algorithm getAlgorithm() const {
-        return algorithm;
-    }
-    MKLDNNPlugin::Algorithm algorithm;
-
-    class DataConfigurator {
-    public:
-        DataConfigurator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType, Precision prc = Precision::UNSPECIFIED, bool constant = false, int inplace = -1) :
-                tensorDescCreator(getTensorDescCreator(tensorDescType)), prc(prc), constant(constant), inplace(inplace) {}
-
-        DataConfigurator(const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr& tensorDescCreator, Precision prc = Precision::UNSPECIFIED,
-                bool constant = false, int inplace = -1) : tensorDescCreator(tensorDescCreator), prc(prc), constant(constant), inplace(inplace) {}
-
-        const MKLDNNPlugin::TensorDescCreator::CreatorConstPtr tensorDescCreator;
-        const bool constant = false;
-        const int inplace = -1;
-        const Precision prc = Precision::UNSPECIFIED; // By default ngraph node precision is used
-    private:
-        static MKLDNNPlugin::TensorDescCreator::CreatorConstPtr getTensorDescCreator(MKLDNNPlugin::TensorDescCreatorTypes tensorDescType) {
-            auto& creators = MKLDNNPlugin::TensorDescCreator::getCommonCreators();
-            if (creators.find(tensorDescType) == creators.end()) {
-                IE_THROW() << "Cannot find tensor descriptor creator";
-            }
-            return creators.at(tensorDescType);
-        }
-    };
-
-    void addConfig(const std::shared_ptr<ngraph::Node>& op,
-                   const std::vector<DataConfigurator>& inDataConfigurators,
-                   const std::vector<DataConfigurator>& outDataConfigurators,
-                   bool dynBatchSupport = false) {
-        LayerConfig config;
-
-        if (inDataConfigurators.size() != op->get_input_size())
-            IE_THROW() << "Cannot add config for operation " << op->get_friendly_name() << ". Incorrect number of inputs: " <<
-                                  "expected: " << op->get_input_size() << ", provided: " << inDataConfigurators.size();
-        if (outDataConfigurators.size() != op->get_output_size())
-            IE_THROW() << "Cannot add config for operation " << op->get_friendly_name() << ". Incorrect number of outputs: " <<
-                               "expected: " << op->get_output_size() << ", provided: " << outDataConfigurators.size();
-
-        auto fill_port = [] (const DataConfigurator& dataConfigurator, const ngraph::descriptor::Tensor& tensor, std::vector<DataConfig>& port) -> bool {
-            // In order to simplify particular node initialization logic we just don't add config in case target shape is not supported by tensorDescCreator.
-            // This should be suitable for major of scenarios since almost all nodes add `ncsp` tensorDescCreator which supports any shape rank.
-            if (tensor.get_shape().size() < dataConfigurator.tensorDescCreator->getMinimalRank())
-                return false;
-
-            auto precision = dataConfigurator.prc != Precision::UNSPECIFIED ? dataConfigurator.prc : details::convertPrecision(tensor.get_element_type());
-
-            DataConfig dataConfig;
-            dataConfig.inPlace = dataConfigurator.inplace;
-            dataConfig.constant = dataConfigurator.constant;
-            dataConfig.desc = dataConfigurator.tensorDescCreator->createDesc(precision, tensor.get_shape());
-
-            port.push_back(dataConfig);
-
-            return true;
-        };
-
-        for (size_t i = 0; i < inDataConfigurators.size(); i++)
-            if (!fill_port(inDataConfigurators[i], op->get_input_tensor(i), config.inConfs))
-                return;
-
-        for (size_t i = 0; i < outDataConfigurators.size(); i++)
-            if (!fill_port(outDataConfigurators[i], op->get_output_tensor(i), config.outConfs))
-                return;
-
-        config.dynBatchSupport = dynBatchSupport;
-        confs.push_back(config);
-    }
-
-    std::string errorMsg;
-    std::vector<LayerConfig> confs;
-};
-
-template <class IMPL>
-class ImplFactory : public ILayerImplFactory {
-public:
-    explicit ImplFactory(const std::shared_ptr<ngraph::Node>& op) : ngraphOp(op) {}
-
-    // First implementation has more priority than next
-    StatusCode getImplementations(std::vector<ILayerImpl::Ptr>& impls, ResponseDesc *resp) noexcept override {
-        try {
-            impls.push_back(ILayerImpl::Ptr(new IMPL(ngraphOp)));
-        } catch (const InferenceEngine::Exception& ex) {
-            strncpy(resp->msg, ex.what(), sizeof(resp->msg) - 1);
-            IE_SUPPRESS_DEPRECATED_START
-            return ex.getStatus() != OK ? ex.getStatus() : GENERAL_ERROR;
-            IE_SUPPRESS_DEPRECATED_END
-        }
-        return OK;
-    }
-protected:
-    const std::shared_ptr<ngraph::Node> ngraphOp;
-};
-
-#define REG_FACTORY_FOR(__prim, __type) \
-    void __prim ## __type(MKLDNNExtensions * extInstance) { \
-        using namespace MKLDNNPlugin; \
-        extInstance->layersFactory.registerNodeIfRequired(MKLDNNPlugin, __type, OV_PP_TOSTRING(__type), ImplFactory<__prim>); \
-    }
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp
similarity index 60%
rename from inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.cpp
rename to inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp
index 18d48383162..85566b3833a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "tensor_desc_creator.h"
+#include "blocked_desc_creator.h"
 #include <numeric>
 
 using namespace InferenceEngine;
@@ -11,19 +11,19 @@ using namespace MKLDNNPlugin;
 namespace {
 constexpr size_t channelsPos = 1lu;
 
-class PlainFormatCreator : public TensorDescCreator {
+class PlainFormatCreator : public BlockedDescCreator {
 public:
-    InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override {
+    BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override {
         SizeVector order(srcDims.size());
         std::iota(order.begin(), order.end(), 0);
-        return TensorDesc(precision, srcDims, {srcDims, order});
+        return BlockedMemoryDesc(precision, srcDims, srcDims, order);
     }
     size_t getMinimalRank() const override { return 0lu; }
 };
 
-class PerChannelCreator : public TensorDescCreator {
+class PerChannelCreator : public BlockedDescCreator {
 public:
-    InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision &precision, const InferenceEngine::SizeVector &srcDims) const override {
+    BlockedMemoryDesc createDesc(const InferenceEngine::Precision &precision, const InferenceEngine::SizeVector &srcDims) const override {
         SizeVector order(srcDims.size());
         std::iota(order.begin(), order.end(), 0);
         SizeVector blkDims = srcDims;
@@ -37,15 +37,15 @@ public:
             moveElementBack(blkDims, channelsPos);
         }
 
-        return TensorDesc(precision, srcDims, {blkDims, order});
+        return BlockedMemoryDesc(precision, srcDims, blkDims, order);
     }
     size_t getMinimalRank() const override { return 3lu; }
 };
 
-class ChannelBlockedCreator : public TensorDescCreator {
+class ChannelBlockedCreator : public BlockedDescCreator {
 public:
     ChannelBlockedCreator(size_t blockSize) : _blockSize(blockSize) {}
-    InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override {
+    BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override {
         if (srcDims.size() < 2) {
             IE_THROW() << "Can't create blocked tensor descriptor!";
         }
@@ -55,10 +55,12 @@ public:
         order.push_back(channelsPos);
 
         SizeVector blkDims = srcDims;
-        blkDims[channelsPos] = blkDims[channelsPos] / _blockSize + (blkDims[channelsPos] % _blockSize ? 1 : 0);
+        if (Shape::UNDEFINED_DIM != blkDims[channelsPos]) {
+            blkDims[channelsPos] = blkDims[channelsPos] / _blockSize + (blkDims[channelsPos] % _blockSize ? 1 : 0);
+        }
         blkDims.push_back(_blockSize);
 
-        return TensorDesc(precision, srcDims, {blkDims, order});
+        return BlockedMemoryDesc(precision, srcDims, blkDims, order);
     }
     size_t getMinimalRank() const override { return 3lu; }
 
@@ -67,16 +69,16 @@ private:
 };
 } // namespace
 
-const TensorDescCreator::CreatorsMap& TensorDescCreator::getCommonCreators() {
-    static const CreatorsMap map{ { TensorDescCreatorTypes::nspc, CreatorConstPtr(new PerChannelCreator) },
-                                { TensorDescCreatorTypes::nCsp8c, CreatorConstPtr(new ChannelBlockedCreator(8)) },
-                                { TensorDescCreatorTypes::nCsp16c, CreatorConstPtr(new ChannelBlockedCreator(16)) },
-                                { TensorDescCreatorTypes::ncsp, CreatorConstPtr(new PlainFormatCreator) } };
+const BlockedDescCreator::CreatorsMap& BlockedDescCreator::getCommonCreators() {
+    static const CreatorsMap map{ { LayoutType::nspc, CreatorConstPtr(new PerChannelCreator) },
+                                { LayoutType::nCsp8c, CreatorConstPtr(new ChannelBlockedCreator(8)) },
+                                { LayoutType::nCsp16c, CreatorConstPtr(new ChannelBlockedCreator(16)) },
+                                { LayoutType::ncsp, CreatorConstPtr(new PlainFormatCreator) } };
     return map;
 }
 
 std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
-TensorDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank) {
+BlockedDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank) {
     auto rankFilter = [rank](const CreatorsMap::value_type& item) {
         if (item.second->getMinimalRank() > rank) {
             return false;
@@ -90,7 +92,7 @@ TensorDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank)
 }
 
 std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
-TensorDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<TensorDescCreatorTypes>& supportedTypes) {
+BlockedDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<LayoutType>& supportedTypes) {
     unsigned bitMask = 0ul;
     for (auto& item : supportedTypes) {
         bitMask |= 1 << static_cast<unsigned>(item);
@@ -112,7 +114,7 @@ TensorDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, cons
 }
 
 std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
-TensorDescCreator::makeFilteredRange(const CreatorsMap &map, TensorDescCreator::Predicate predicate) {
+BlockedDescCreator::makeFilteredRange(const CreatorsMap &map, BlockedDescCreator::Predicate predicate) {
     auto first = CreatorsMapFilterConstIterator(std::move(predicate), map.begin(), map.end());
     auto last = first.end();
     return std::make_pair(first, last);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.h b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h
similarity index 74%
rename from inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.h
rename to inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h
index 4fda57fcb2f..f53524288e4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/common/tensor_desc_creator.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h
@@ -4,25 +4,19 @@
 
 #pragma once
 
-#include <ie_layouts.h>
 #include <functional>
+#include "cpu_shape.h"
+#include "cpu_blocked_memory_desc.h"
 
 namespace MKLDNNPlugin {
 
-enum class TensorDescCreatorTypes : unsigned {
-    nspc,       // general per channels format
-    ncsp,        // general planar
-    nCsp8c,     // general channels blocked by 8
-    nCsp16c    // general channels blocked by 16
-};
-
 class CreatorsMapFilterConstIterator;
 
-class TensorDescCreator {
+class BlockedDescCreator {
 public:
-    typedef std::shared_ptr<TensorDescCreator> CreatorPtr;
-    typedef std::shared_ptr<const TensorDescCreator> CreatorConstPtr;
-    typedef std::map<TensorDescCreatorTypes, CreatorConstPtr> CreatorsMap;
+    typedef std::shared_ptr<BlockedDescCreator> CreatorPtr;
+    typedef std::shared_ptr<const BlockedDescCreator> CreatorConstPtr;
+    typedef std::map<LayoutType, CreatorConstPtr> CreatorsMap;
     typedef std::function<bool(const CreatorsMap::value_type&)> Predicate;
 
 public:
@@ -30,17 +24,20 @@ public:
     static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
     makeFilteredRange(const CreatorsMap &map, unsigned rank);
     static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
-    makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<TensorDescCreatorTypes>& supportedTypes);
+    makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<LayoutType>& supportedTypes);
     static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
     makeFilteredRange(const CreatorsMap& map, Predicate predicate);
-    virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const = 0;
+    virtual BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const = 0;
+    std::unique_ptr<BlockedMemoryDesc> createUniqueDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const {
+        return MKLDNNPlugin::make_unique<BlockedMemoryDesc>(createDesc(precision, srcDims));
+    }
     virtual size_t getMinimalRank() const = 0;
-    virtual ~TensorDescCreator() = default;
+    virtual ~BlockedDescCreator() = default;
 };
 
 class CreatorsMapFilterConstIterator {
 public:
-    typedef TensorDescCreator::CreatorsMap::const_iterator Iterator;
+    typedef BlockedDescCreator::CreatorsMap::const_iterator Iterator;
     typedef std::iterator_traits<Iterator>::value_type value_type;
     typedef std::iterator_traits<Iterator>::reference reference;
     typedef std::iterator_traits<Iterator>::pointer pointer;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp
new file mode 100644
index 00000000000..4bf60d6eb21
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp
@@ -0,0 +1,264 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mkldnn_adaptive_pooling.h"
+#include "ie_parallel.hpp"
+#include <cpu/x64/cpu_isa_traits.hpp>
+#include <math.h>
+#include <mkldnn.hpp>
+#include <mkldnn_extension_utils.h>
+#include <mkldnn_selective_build.h>
+#include <mkldnn_types.h>
+#include <ngraph/opsets/opset8.hpp>
+#include <string>
+#include <utils/bfloat16.hpp>
+#include <utils/general_utils.h>
+#include <vector>
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+using namespace mkldnn;
+using namespace mkldnn::impl::cpu::x64;
+
+bool MKLDNNAdaptivePoolingNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveAvgPool::type_info)) {
+            auto adaPool = std::dynamic_pointer_cast<ngraph::opset8::AdaptiveAvgPool>(op);
+            if (!adaPool) {
+                errorMessage = "Only opset8 AdaptiveAvgPooling operation is supported";
+                return false;
+            }
+        } else if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveMaxPool::type_info)) {
+            auto adaPool = std::dynamic_pointer_cast<ngraph::opset8::AdaptiveMaxPool>(op);
+            if (!adaPool) {
+                errorMessage = "Only opset8 AdaptiveMaxPooling operation is supported";
+                return false;
+            }
+        } else {
+            errorMessage = "Unsupported Adaptive pooling mode";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNAdaptivePoolingNode::MKLDNNAdaptivePoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+                                           MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (isSupportedOperation(op, errorMessage)) {
+      errorPrefix = "Adaptive Pooling layer with name '" + getName() + "' ";
+    } else {
+      IE_THROW(NotImplemented) << errorMessage;
+    }
+    if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveAvgPool::type_info)) {
+        algorithm = Algorithm::AdaptivePoolingAvg;
+    } else if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveMaxPool::type_info)) {
+        algorithm = Algorithm::AdaptivePoolingMax;
+    }
+}
+
+void MKLDNNAdaptivePoolingNode::getSupportedDescriptors() {
+    if (!descs.empty())
+        return;
+
+    if (getParentEdges().size() != 2)
+        IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size();
+    if (getChildEdges().size() != (algorithm == AdaptivePoolingMax ? 2 : 1))
+        IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getParentEdges().size();
+
+    auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims();
+    auto childDims = getChildEdgeAt(0)->getShape().getStaticDims();
+
+    spatialDimsCount = parentDims.size() - 2;
+    if (!one_of(spatialDimsCount, 1, 2, 3)) {
+        IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank();
+    }
+
+    if (getParentEdgeAt(1)->getShape().getRank() != 1) {
+        IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank();
+    }
+
+    if (getChildEdgeAt(0)->getShape().getRank() != getParentEdgeAt(0)->getShape().getRank()) {
+        IE_THROW() << errorPrefix << "must keep data rank";
+    }
+}
+
+void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    // we supports only fp32 currently
+    precision = Precision::FP32;
+
+    InferenceEngine::LayerConfig config;
+    config.dynBatchSupport = false;
+    config.inConfs.resize(2);
+    config.outConfs.resize((algorithm == Algorithm::AdaptivePoolingAvg ? 1 : 2));
+
+    std::vector<LayoutType> dataFormats{ LayoutType::ncsp };
+    if (getParentEdgeAt(0)->getShape().getStaticDims()[1] != 1) {
+        dataFormats.push_back(LayoutType::nspc);
+        dataFormats.push_back(LayoutType::nCsp16c);
+        dataFormats.push_back(LayoutType::nCsp8c);
+    }
+    for (const auto &df : dataFormats) {
+        if (algorithm == Algorithm::AdaptivePoolingAvg) {
+            addSupportedPrimDesc({{df, precision}, {LayoutType::ncsp, Precision::I32}},
+                                 {{df, precision}},
+                                 impl_desc_type::unknown);
+        } else {
+            addSupportedPrimDesc({{df, precision}, {LayoutType::ncsp, Precision::I32}},
+                                 {{df, precision}, {LayoutType::ncsp, Precision::I32}},
+                                 impl_desc_type::unknown);
+        }
+    }
+}
+
+void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) {
+    auto inputPrec = getParentEdgeAt(0)->getMemory().GetDescriptor().data.data_type;
+    auto outputPrec = getChildEdgeAt(0)->getMemory().GetDescriptor().data.data_type;
+    if (!(inputPrec == mkldnn_f32 && outputPrec == mkldnn_f32))
+        IE_THROW() << errorPrefix << "doesn't support demanded precisions";
+
+    auto &srcMemory0 = getParentEdgeAt(0)->getMemory();
+    auto &srcMemory1 = getParentEdgeAt(1)->getMemory();
+    int *indexDst = nullptr;
+
+    if (algorithm == Algorithm::AdaptivePoolingMax) {
+        indexDst = reinterpret_cast<int *>(getChildEdgeAt(1)->getMemoryPtr()->GetPtr());
+    }
+
+    auto srcBlockDesc = srcMemory0.GetDescriptor().data.format_desc.blocking;
+
+    int blockSize = srcBlockDesc.inner_nblks > 0 ? srcBlockDesc.inner_blks[0] : 1;
+    auto isPlainFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::ncsp);
+    auto isTailCFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::nspc);
+
+    const auto *src = reinterpret_cast<const float *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const auto *srcPooledSpatialShapes = reinterpret_cast<const int *>(getParentEdgeAt(1)->getMemoryPtr()->GetPtr());
+    auto *dst = reinterpret_cast<float *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+
+    if (srcMemory1.GetElementsCount() != spatialDimsCount)
+        IE_THROW() << errorPrefix << "has input spatial dimension (" << srcMemory1.GetElementsCount()
+                   << ") inconsistent with pooling vector size (" << spatialDimsCount << ")";
+
+    auto inputDimVector = srcMemory0.GetDims();
+    const int N = static_cast<int>(inputDimVector[0]);
+    const int C = static_cast<int>(inputDimVector[1]);
+    const int ID = static_cast<int>(spatialDimsCount == 3 ? inputDimVector[2] : 1);
+    const int IH = static_cast<int>(spatialDimsCount >= 2 ? inputDimVector[spatialDimsCount] : 1);
+    const int IW = static_cast<int>(inputDimVector[spatialDimsCount + 1]);
+
+    const int OD = static_cast<int>(spatialDimsCount == 3 ? srcPooledSpatialShapes[0] : 1);
+    const int OH = static_cast<int>(spatialDimsCount >= 2 ? srcPooledSpatialShapes[spatialDimsCount - 2] : 1);
+    const int OW = static_cast<int>(srcPooledSpatialShapes[spatialDimsCount - 1]);
+
+    const int iHW = IH * IW;
+    const int oDHW = OD * OH * OW, oHW = OH * OW;
+
+    const int chPadding = srcMemory0.GetDescriptor().data.padded_dims[1];
+    const int blockCount = (isTailCFmt ? 1 :  chPadding / blockSize);
+    auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
+    if (!selectedPrimitiveDescriptor)
+        IE_THROW() << errorPrefix << "doesn't have primitive descriptors.";
+    auto config = selectedPrimitiveDescriptor->getConfig();
+    auto srcStrides = getParentEdgesAtPort(0)[0]->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
+    auto dstStrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
+
+    // unified strides array
+    const size_t tailDimsOffset = (isTailCFmt ? -1 : 0);
+    const size_t inStrides[5] = {
+            srcStrides[0],
+            (isTailCFmt ? 1 : srcStrides[1]),
+            (spatialDimsCount == 3 ? srcStrides[2 + tailDimsOffset] : 0),
+            (spatialDimsCount >= 2 ? srcStrides[spatialDimsCount + tailDimsOffset] : 0),
+            srcStrides[spatialDimsCount + 1 + tailDimsOffset] };
+    const size_t outStrides[5] = {
+            dstStrides[0],
+            (isTailCFmt ? 1 : dstStrides[1]),
+            (spatialDimsCount == 3 ? dstStrides[2 + tailDimsOffset] : 0),
+            (spatialDimsCount >= 2 ? dstStrides[spatialDimsCount + tailDimsOffset] : 0),
+            dstStrides[spatialDimsCount + 1 + tailDimsOffset] };
+
+    std::function<void(const float *, float *, int, int, int, size_t)> pool;
+    auto poolMax = [&] (const float *srcData, float *dstData, int od, int oh, int ow, size_t spatIndOff) {
+        size_t dStart, dEnd, hStart, hEnd, wStart, wEnd;
+        setBinBorders(&dStart, &dEnd, od, ID, OD);
+        setBinBorders(&hStart, &hEnd, oh, IH, OH);
+        setBinBorders(&wStart, &wEnd, ow, IW, OW);
+        float res = srcData[dStart * inStrides[2] + hStart * inStrides[3] + wStart * inStrides[4]];  // initial max value
+        int resIndex = dStart * iHW + hStart * IW + wStart;  // initial max index
+        for (size_t pixD = dStart; pixD < dEnd; pixD++) {
+            for (size_t pixH = hStart; pixH < hEnd; pixH++) {
+                for (size_t pixW = wStart; pixW < wEnd; pixW++) {
+                    float curr = srcData[pixD * inStrides[2] + pixH * inStrides[3] + pixW * inStrides[4]];
+                    resIndex = (res < curr ? pixD * iHW + pixH * IW + pixW : resIndex);
+                    res = std::max(res, curr);
+                }
+            }
+        }
+        *dstData = res;
+        indexDst[spatIndOff * oDHW + od * oHW + oh * OW + ow] = resIndex;
+    };
+    auto poolAvg = [&] (const float *srcData, float *dstData, int od, int oh, int ow, size_t spatIndOff) {
+        size_t dStart, dEnd, hStart, hEnd, wStart, wEnd;
+        setBinBorders(&dStart, &dEnd, od, ID, OD);
+        setBinBorders(&hStart, &hEnd, oh, IH, OH);
+        setBinBorders(&wStart, &wEnd, ow, IW, OW);
+        auto binSize = (dEnd - dStart) * (hEnd - hStart) * (wEnd - wStart);
+        if (binSize == 0)
+            IE_THROW() << errorPrefix << "has empty bin";
+        float sum = 0;
+        for (size_t pixD = dStart; pixD < dEnd; pixD++) {
+            for (size_t pixH = hStart; pixH < hEnd; pixH++) {
+                for (size_t pixW = wStart; pixW < wEnd; pixW++) {
+                    float curr = srcData[pixD * inStrides[2] + pixH * inStrides[3] + pixW * inStrides[4]];
+                    sum = sum + curr;
+                }
+            }
+        }
+        *dstData = sum / binSize;
+    };
+
+    if (algorithm == Algorithm::AdaptivePoolingMax) {
+        pool = poolMax;
+    } else {
+        pool = poolAvg;
+    }
+
+    parallel_for5d(N, blockCount, OD, OH, OW,
+        [&](int n, int blkIdx, int od, int oh, int ow) {
+        auto srcData = src + n * inStrides[0] + blkIdx * inStrides[1];
+        auto dstData = dst + n * outStrides[0] + blkIdx * outStrides[1] +
+                      od * outStrides[2] + oh * outStrides[3] + ow * outStrides[4];
+        int cStart = 0, cEnd = C, inResidual = 0, outResidual = 0;
+        if (!isTailCFmt) {
+           cStart = blkIdx * blockSize;
+           cEnd = (blkIdx == blockCount - 1 ? C : cStart + blockSize);
+        }
+        for (int c = cStart; c < cEnd; c++) {
+           if (isTailCFmt) {
+               inResidual = c * inStrides[1];
+               outResidual = c * outStrides[1];
+           } else if (!isPlainFmt) {
+               inResidual = outResidual = c % blockSize;
+           }
+           pool(srcData + inResidual, dstData + outResidual, od, oh, ow, n * C + c);
+        }});
+}
+
+bool MKLDNNAdaptivePoolingNode::created() const {
+    return getType() == AdaptivePooling;
+}
+
+void MKLDNNAdaptivePoolingNode::createPrimitive() {}
+
+inline void MKLDNNAdaptivePoolingNode::setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength) {
+    *(startPtr) = idx * inputLength / outputLength;
+    *(endPtr) = ceil(static_cast<float>((idx + 1) * inputLength) / outputLength);
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNAdaptivePoolingNode, AdaptivePooling)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h
new file mode 100644
index 00000000000..386e57f4dcf
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h
@@ -0,0 +1,35 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+#include <mkldnn_extension_utils.h>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNAdaptivePoolingNode : public MKLDNNNode {
+public:
+  MKLDNNAdaptivePoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+
+    void getSupportedDescriptors() override;
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override;
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    int spatialDimsCount;
+    InferenceEngine::Precision precision = InferenceEngine::Precision::FP32;
+    inline void setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength);
+
+    std::string errorPrefix;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp
index e2616f43c99..8700a70c5b6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp
@@ -10,7 +10,7 @@
 #include "utils/bfloat16.hpp"
 #include <mkldnn_selective_build.h>
 #include "mkldnn_batch_to_space_node.h"
-#include <nodes/common/tensor_desc_creator.h>
+#include <nodes/common/blocked_desc_creator.h>
 #include <ngraph/opsets/opset2.hpp>
 
 using namespace MKLDNNPlugin;
@@ -67,32 +67,32 @@ void MKLDNNBatchToSpaceNode::initSupportedPrimitiveDescriptors() {
     if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end())
         IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name();
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::nspc, precision},
-                          {TensorDescCreatorTypes::ncsp},
-                          {TensorDescCreatorTypes::ncsp},
-                          {TensorDescCreatorTypes::ncsp}},
-                         {{TensorDescCreatorTypes::nspc, precision}},
+    addSupportedPrimDesc({{LayoutType::nspc, precision},
+                          {LayoutType::ncsp},
+                          {LayoutType::ncsp},
+                          {LayoutType::ncsp}},
+                         {{LayoutType::nspc, precision}},
                          impl_desc_type::ref_any);
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision},
-                          {TensorDescCreatorTypes::ncsp},
-                          {TensorDescCreatorTypes::ncsp},
-                          {TensorDescCreatorTypes::ncsp}},
-                         {{TensorDescCreatorTypes::ncsp, precision}},
+    addSupportedPrimDesc({{LayoutType::ncsp, precision},
+                          {LayoutType::ncsp},
+                          {LayoutType::ncsp},
+                          {LayoutType::ncsp}},
+                         {{LayoutType::ncsp, precision}},
                          impl_desc_type::ref_any);
     if (inDims[1] % 8 == 0) {
-        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision},
-                              {TensorDescCreatorTypes::ncsp},
-                              {TensorDescCreatorTypes::ncsp},
-                              {TensorDescCreatorTypes::ncsp}},
-                             {{TensorDescCreatorTypes::nCsp8c, precision}},
+        addSupportedPrimDesc({{LayoutType::nCsp8c, precision},
+                              {LayoutType::ncsp},
+                              {LayoutType::ncsp},
+                              {LayoutType::ncsp}},
+                             {{LayoutType::nCsp8c, precision}},
                              impl_desc_type::ref_any);
     }
     if (inDims[1] % 16 == 0) {
-        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision},
-                              {TensorDescCreatorTypes::ncsp},
-                              {TensorDescCreatorTypes::ncsp},
-                              {TensorDescCreatorTypes::ncsp}},
-                             {{TensorDescCreatorTypes::nCsp16c, precision}},
+        addSupportedPrimDesc({{LayoutType::nCsp16c, precision},
+                              {LayoutType::ncsp},
+                              {LayoutType::ncsp},
+                              {LayoutType::ncsp}},
+                             {{LayoutType::nCsp16c, precision}},
                              impl_desc_type::ref_any);
     }
 }
@@ -112,15 +112,16 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() {
     const auto *srcData = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     auto *dstData = reinterpret_cast<T *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
 
-    const auto layout = getParentEdgeAt(0)->getDesc().getLayout();
-    const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC;
+    auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+
+    const bool blocked = srcDesc.hasLayoutType(LayoutType::nCsp8c) || srcDesc.hasLayoutType(LayoutType::nCsp16c);
     const auto dimsSize = inDims.size();
 
     auto inShape5D = getShape5D(inDims);
     auto outShape5D = getShape5D(outDims);
     auto blockShape = getShape5D(blockShapeIn);
 
-    if (layout == NHWC || layout == NDHWC) {
+    if (srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5)) {
         inShape5D.push_back(inShape5D[1]);
         inShape5D.erase(inShape5D.begin() + 1);
         outShape5D.push_back(outShape5D[1]);
@@ -129,9 +130,11 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() {
         blockShape.erase(blockShape.begin() + 1);
     }
 
-    const size_t blockSize = blocked ? getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back() : 1lu;
-    const size_t blockCountInput = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1];
-    const size_t blockCountOutput = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1];
+    auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+
+    const size_t blockSize = blocked ? dstDesc.getBlockDims().back() : 1lu;
+    const size_t blockCountInput = srcDesc.getBlockDims()[1];
+    const size_t blockCountOutput = dstDesc.getBlockDims()[1];
     const auto blockRemainder = inShape5D[1] % blockSize;
     const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder;
 
@@ -166,7 +169,7 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() {
             oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - cropsBeginIn[2] : 0lu;
             bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx;
             oAdd[1] = bIdx % blockShapeIn[1] - cropsBeginIn[1];
-            if (layout == NHWC || layout == NDHWC) {
+            if (srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5)) {
                 oAdd.push_back(oAdd[1]);
                 oAdd.erase(oAdd.begin() + 1);
             }
@@ -221,12 +224,13 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() {
 }
 
 void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) {
-    switch (getParentEdgeAt(0)->getDesc().getPrecision().size()) {
+    switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size()) {
         case 1: batchToSpaceKernel<PrecisionTrait<Precision::U8>::value_type>();  break;
         case 2: batchToSpaceKernel<PrecisionTrait<Precision::U16>::value_type>(); break;
         case 4: batchToSpaceKernel<PrecisionTrait<Precision::I32>::value_type>(); break;
         default:
-            IE_THROW() << "BatchToSpace layer does not support precision '" + std::string(getParentEdgeAt(0)->getDesc().getPrecision().name()) + "'";
+            IE_THROW() << "BatchToSpace layer does not support precision '" <<
+                std::string(getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name()) << "'";
     }
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h
index cab89df7dc6..353ea634511 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h
@@ -24,6 +24,10 @@ public:
 
     static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
 
+private:
+    template<typename T>
+    void batchToSpaceKernel();
+
 private:
     InferenceEngine::SizeVector inDims;
     InferenceEngine::SizeVector outDims;
@@ -31,9 +35,6 @@ private:
     std::vector<size_t> cropsBeginIn;
 
     std::string errorPrefix;
-
-    template<typename T>
-    void batchToSpaceKernel();
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
index 517066d6f32..183bc158ff2 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
@@ -942,16 +942,16 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << errorPrefix << "has incorrect number of output edges";
 
-    if (getParentEdgeAt(0)->getDims().ndims() != 4) {
-        IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims();
+    if (getParentEdgeAt(0)->getShape().getRank() != 4) {
+        IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank();
     }
 
-    if (getParentEdgeAt(1)->getDims().ndims() != 4) {
-        IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims();
+    if (getParentEdgeAt(1)->getShape().getRank() != 4) {
+        IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank();
     }
 
-    if (getChildEdgeAt(0)->getDims().ndims() != 4) {
-        IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims();
+    if (getChildEdgeAt(0)->getShape().getRank() != 4) {
+        IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank();
     }
 }
 
@@ -961,7 +961,7 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
 
     setPostOps(attr);
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     config.inConfs.resize(2);
     config.inConfs[0].constant = false;
@@ -975,26 +975,38 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
 
     if (implType != impl_desc_type::ref) {
         // optimzed implementation
-        auto outputDataType = withBinarization ? memory::data_type::bin : memory::data_type::f32;
-        auto weiFormat = implType == impl_desc_type::jit_avx512 ? memory::format_tag::OIhw16o32i : memory::format_tag::OIhw8o32i;
 //        auto weiFormat = implType == impl_desc_type::jit_avx512 ? memory::format_tag::OhIw16o32i : memory::format_tag::OhIw8o32i;
 
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::bin, memory::format_tag::nhwc);
-        config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::bin, weiFormat);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nhwc);
+        //activation
+        auto nspcCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::nspc);
+        config.inConfs[0].desc = nspcCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(0)->getShape().getStaticDims());
+
+        //weights
+        size_t weiFirstDimBlockSize = implType == impl_desc_type::jit_avx512 ? 16 : 8; //memory::format_tag::OIhw16o32i : memory::format_tag::OIhw8o32i;
+        auto weiDims = getParentEdgeAt(1)->getShape().getStaticDims();
+        std::vector<size_t> weiBlockDims = {div_up(weiDims[0], weiFirstDimBlockSize), div_up(weiDims[1], 32),
+                                            weiDims[2], weiDims[3], weiFirstDimBlockSize, 32};
+        std::vector<size_t> weiOrder = {0, 1, 2, 3, 0, 1};
+
+        config.inConfs[1].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(Precision::BIN, weiDims, weiBlockDims, weiOrder);
+
+        //result
+        auto outputPrecision = withBinarization ? Precision::BIN : Precision::FP32;
+        config.outConfs[0].desc = nspcCreator->createUniqueDesc(outputPrecision, getChildEdgeAt(0)->getShape().getStaticDims());
         if (withSum) {
             config.inConfs.push_back(config.outConfs[0]);
             config.outConfs[0].inPlace = 2;
         }
-        supportedPrimitiveDescriptors.push_back({config, implType, memory::format_tag::nhwc});
+        supportedPrimitiveDescriptors.push_back({config, implType});
     } else {
         // reference implementation
-        auto weiFormat = group > 1 ? memory::format_tag::goihw : memory::format_tag::oihw;
+        auto weiCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp);
+        auto nspcCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::nspc);
 
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::bin, memory::format_tag::nhwc);
-        config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::bin, weiFormat);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), memory::data_type::f32, memory::format_tag::nhwc);
-        supportedPrimitiveDescriptors.push_back({config, implType, memory::format_tag::nhwc});
+        config.inConfs[0].desc = nspcCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(0)->getShape().getStaticDims());
+        config.inConfs[1].desc = weiCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(1)->getShape().getStaticDims());
+        config.outConfs[0].desc = nspcCreator->createUniqueDesc(Precision::FP32, getChildEdgeAt(0)->getShape().getStaticDims());
+        supportedPrimitiveDescriptors.push_back({config, implType});
     }
 }
 
@@ -1003,11 +1015,9 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() {
     if (!selectedPrimitiveDescriptor)
         IE_THROW() << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors.";
 
-    auto config = selectedPrimitiveDescriptor->getConfig();
-
-    auto srcDims = config.inConfs[0].desc.getDims();
-    auto weiDims = config.inConfs[1].desc.getDims();
-    auto dstDims = config.outConfs[0].desc.getDims();
+    auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims();
+    auto weiDims = getParentEdgeAt(1)->getShape().getStaticDims();
+    auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims();
 
     auto implType = selectedPrimitiveDescriptor->getImplementationType();
 
@@ -1061,9 +1071,12 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() {
 
     jcp.nb_oc_blocking = nstl::min(implType == impl_desc_type::jit_sse42 ? 2 : implType == impl_desc_type::jit_avx2 ? 4 : 6, jcp.nb_oc);
 
-    jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(config.outConfs[0].desc.getPrecision());
-    jcp.typesize_in = config.inConfs[0].desc.getPrecision() == Precision::BIN ? 1 : config.inConfs[0].desc.getPrecision().size();
-    jcp.typesize_out = config.outConfs[0].desc.getPrecision() == Precision::BIN ? 1 : config.outConfs[0].desc.getPrecision().size();
+    auto srcPrecision = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision();
+    auto dstPrecision = getChildEdgeAt(0)->getMemory().GetDesc().getPrecision();
+
+    jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(dstPrecision);
+    jcp.typesize_in = srcPrecision == Precision::BIN ? 1 : srcPrecision.size();
+    jcp.typesize_out = dstPrecision == Precision::BIN ? 1 : dstPrecision.size();
 
     int r_pad_no_tail = nstl::max(0, (jcp.ow - jcp.ur_w_tail - 1) * jcp.stride_w
                                      + (jcp.kw - 1) * (jcp.dilate_w + 1) - (jcp.iw + jcp.l_pad - 1));
@@ -1093,7 +1106,11 @@ bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
         return false;
 
     if (node->getType() == FakeQuantize) {
-        return node->getAlgorithm() == FQBinarization;
+        bool ret = node->getAlgorithm() == FQBinarization;
+        for (size_t i = 1; i < node->getParentEdges().size(); i++) {
+            ret &= node->getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() == 1;
+        }
+        return ret;
     } else {
         return canFuseSimpleOperation(node);
     }
@@ -1277,30 +1294,28 @@ void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) {
     auto weights = reinterpret_cast<const uint8_t*>(weightsMemory->GetPtr());
     auto dst = reinterpret_cast<uint8_t*>(dstMemory->GetPtr());
 
+    auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    std::vector<size_t> srcStride(srcDesc.getStrides().size());
+    for (int i = 0; i < srcStride.size(); i++) {
+        srcStride[srcDesc.getOrder()[i]] = srcDesc.getStrides()[i];
+    }
+
+    auto weiDesc = getParentEdgeAt(1)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    std::vector<size_t> weightsStride(weiDesc.getShape().getRank());
+    for (int i = 0; i < weightsStride.size(); i++) {
+        weightsStride[weiDesc.getOrder()[i]] = weiDesc.getStrides()[i];
+    }
+
+    auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    std::vector<size_t> dstStride(dstDesc.getStrides().size());
+    for (int i = 0; i < dstStride.size(); i++) {
+        dstStride[dstDesc.getOrder()[i]] = dstDesc.getStrides()[i];
+    }
+
     auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
     if (!selectedPrimitiveDescriptor)
         IE_THROW() << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors.";
 
-    auto config = selectedPrimitiveDescriptor->getConfig();
-
-    auto srcBlockDesc = config.inConfs[0].desc.getBlockingDesc();
-    std::vector<size_t> srcStride(srcBlockDesc.getStrides().size());
-    for (int i = 0; i < srcStride.size(); i++) {
-        srcStride[srcBlockDesc.getOrder()[i]] = srcBlockDesc.getStrides()[i];
-    }
-
-    auto weiBlockDesc = config.inConfs[1].desc.getBlockingDesc();
-    std::vector<size_t> weightsStride(config.inConfs[1].desc.getDims().size());
-    for (int i = 0; i < weightsStride.size(); i++) {
-        weightsStride[weiBlockDesc.getOrder()[i]] = weiBlockDesc.getStrides()[i];
-    }
-
-    auto dstBlockDesc = config.outConfs[0].desc.getBlockingDesc();
-    std::vector<size_t> dstStride(dstBlockDesc.getStrides().size());
-    for (int i = 0; i < dstStride.size(); i++) {
-        dstStride[dstBlockDesc.getOrder()[i]] = dstBlockDesc.getStrides()[i];
-    }
-
     auto implType = selectedPrimitiveDescriptor->getImplementationType();
     if (implType != impl_desc_type::ref) {
         executeOptimized(src, weights, dst, srcStride, weightsStride, dstStride);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
index 3d9815d48c1..ef9c14ad0d4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
@@ -10,7 +10,7 @@
 #include "utils/bfloat16.hpp"
 #include <mkldnn_selective_build.h>
 #include "mkldnn_broadcast_node.h"
-#include <nodes/common/tensor_desc_creator.h>
+#include <nodes/common/blocked_desc_creator.h>
 #include <ngraph/opsets/opset1.hpp>
 #include "common/cpu_memcpy.h"
 
@@ -60,18 +60,20 @@ void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() {
 
     Precision prec = getOriginalInputPrecisionAtPort(BROADCAST_INPUT);
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, prec},
-                          {TensorDescCreatorTypes::ncsp, Precision::I32}},
-                         {{TensorDescCreatorTypes::ncsp, prec}},
+    addSupportedPrimDesc({{LayoutType::ncsp, prec},
+                          {LayoutType::ncsp, Precision::I32}},
+                         {{LayoutType::ncsp, prec}},
                          impl_desc_type::ref_any);
 }
 
 void MKLDNNBroadcastNode::execute(mkldnn::stream strm) {
-    size_t shape_size = (getParentEdgeAt(BROADCAST_SHAPE)->getDesc().getDims())[0];
-    SizeVector dst_dims = getChildEdgeAt(0)->getDesc().getDims();
-    SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getDims();
-    SizeVector srcStrides = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getBlockingDesc().getStrides();
-    size_t data_size = getParentEdgeAt(BROADCAST_INPUT)->getDesc().getPrecision().size();
+    size_t shape_size = (getParentEdgeAt(BROADCAST_SHAPE)->getMemory().GetDesc().getShape().getStaticDims())[0];
+    SizeVector dst_dims = getChildEdgeAt(0)->getMemory().GetDesc().getShape().getStaticDims();
+    SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getMemory().GetDesc().getShape().getStaticDims();
+
+    auto srcDesc = getParentEdgeAt(BROADCAST_INPUT)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    SizeVector srcStrides = srcDesc.getStrides();
+    size_t data_size = srcDesc.getPrecision().size();
 
     if (!src_dims.size())
         src_dims = SizeVector(1, 1);
@@ -86,7 +88,8 @@ void MKLDNNBroadcastNode::execute(mkldnn::stream strm) {
         IE_THROW() << "Output tensor dimension is smaller then input tensor dimension";
     }
 
-    InferenceEngine::SizeVector dstStrides = getChildEdgeAt(0)->getDesc().getBlockingDesc().getStrides();
+    auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    InferenceEngine::SizeVector dstStrides = dstDesc.getStrides();
     InferenceEngine::SizeVector src_aligned(dst_dims.size());
     InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size());
     size_t prefix_size = dst_dims.size() - src_dims.size();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp
index c6c327a1993..602f4954c3b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "base.hpp"
-
 #include <string>
 #include <vector>
 #include <algorithm>
@@ -82,9 +80,9 @@ void MKLDNNBucketizeNode::initSupportedPrimitiveDescriptors() {
         output_precision = Precision::I32;
     }
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, input_precision},
-                          {TensorDescCreatorTypes::ncsp, boundaries_precision}},
-                         {{TensorDescCreatorTypes::ncsp, output_precision}},
+    addSupportedPrimDesc({{LayoutType::ncsp, input_precision},
+                          {LayoutType::ncsp, boundaries_precision}},
+                         {{LayoutType::ncsp, output_precision}},
                          impl_desc_type::ref_any);
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
index 4990a658d61..2907a035788 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
@@ -21,7 +21,8 @@
 #include "mkldnn_eltwise_node.h"
 #include <limits>
 #include "common/cpu_memcpy.h"
-#include "common/tensor_desc_creator.h"
+#include "common/blocked_desc_creator.h"
+#include <cpu_memory_desc_utils.h>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -61,19 +62,19 @@ MKLDNNConcatNode::MKLDNNConcatNode(const std::shared_ptr<ngraph::Node>& op, cons
 }
 
 void MKLDNNConcatNode::getSupportedDescriptors() {
-    auto& firstParentDims = getParentEdgeAt(0)->getDims();
+    auto& firstParentDims = getParentEdgeAt(0)->getShape().getStaticDims();
     for (size_t i = 1; i < getParentEdges().size(); i++) {
-        auto& dims = getParentEdgeAt(i)->getDims();
+        auto& dims = getParentEdgeAt(i)->getShape().getStaticDims();
         bool incorrectDims = false;
-        for (size_t j = 0; j < firstParentDims.ndims(); j++) {
+        for (size_t j = 0; j < firstParentDims.size(); j++) {
             if (j == axis)
                 continue;
-            if (dims.ndims() != firstParentDims.ndims() || firstParentDims[j] != dims[j]) {
+            if (dims.size() != firstParentDims.size() || firstParentDims[j] != dims[j]) {
                 incorrectDims = true;
                 break;
             }
         }
-        if (incorrectDims || firstParentDims.ndims() == 0) {
+        if (incorrectDims || firstParentDims.size() == 0) {
             IE_THROW() << "Incorrect input dimensions for concat node " << getName();
         }
     }
@@ -100,19 +101,19 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
     // Concat supports only equal precisions for inputs and output
     outputPrecision = inputPrecision;
 
-    auto& dstDims = getChildEdgeAt(0)->getDims();
-    std::vector<TensorDescCreatorTypes> tdCreatorTypes = {TensorDescCreatorTypes::ncsp, TensorDescCreatorTypes::nspc};
+    auto& dstDims = getChildEdgeAt(0)->getShape().getStaticDims();
+    std::vector<LayoutType> tdCreatorTypes = {LayoutType::ncsp, LayoutType::nspc};
 
     // check if blocked layouts are available the channels size should be evenly divided by the block size to avoid slow oneDNN ref implementation
-    if (dstDims.ndims() > channelAxis) {
-        for (auto item : { std::make_pair(8lu, TensorDescCreatorTypes::nCsp8c), std::make_pair(16lu, TensorDescCreatorTypes::nCsp16c)}) {
-            SizeVector blkDims = dstDims.ToSizeVector();
+    if (dstDims.size() > channelAxis) {
+        for (auto item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c)}) {
+            SizeVector blkDims = dstDims;
             if (blkDims[channelAxis] % item.first)
                 continue;
 
             bool blocked = true;
             for (size_t i = 0; i < getParentEdges().size(); i++) {
-                auto& srcDims = getParentEdgeAt(i)->getDims();
+                auto& srcDims = getParentEdgeAt(i)->getShape().getStaticDims();
                 if (srcDims[channelAxis] % item.first) {
                     blocked = false;
                     break;
@@ -126,28 +127,27 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
 
     std::vector<size_t> pdIndexesToReuse;
 
-    auto& creatorsMap = TensorDescCreator::getCommonCreators();
-    auto itrRange = TensorDescCreator::makeFilteredRange(creatorsMap, static_cast<unsigned>(dstDims.ndims()), tdCreatorTypes);
+    auto& creatorsMap = BlockedDescCreator::getCommonCreators();
+    auto itrRange = BlockedDescCreator::makeFilteredRange(creatorsMap, static_cast<unsigned>(dstDims.size()), tdCreatorTypes);
     for (auto itr = itrRange.first; itr != itrRange.second; ++itr) {
-        InferenceEngine::LayerConfig config;
+        NodeConfig config;
 
         config.dynBatchSupport = true;
         config.outConfs.resize(1);
         config.outConfs[0].inPlace = -1;
         config.outConfs[0].constant = false;
-        config.outConfs[0].desc = itr->second->createDesc(outputPrecision, dstDims.ToSizeVector());
-        memory::format_tag outFmt = MKLDNNMemoryDesc(config.outConfs[0].desc).getFormat();
+        config.outConfs[0].desc = itr->second->createUniqueDesc(outputPrecision, dstDims);
 
         config.inConfs.resize(getParentEdges().size());
 
         for (size_t i = 0; i < getParentEdges().size(); ++i) {
             config.inConfs[i].inPlace = -1;
             config.inConfs[i].constant = false;
-            config.inConfs[i].desc = MKLDNNExtensionUtils::getUninitTensorDesc(
-                    itr->second->createDesc(inputPrecision, getParentEdgeAt(i)->getDims().ToSizeVector()));
+            config.inConfs[i].desc = MemoryDescUtils::applyUndefinedOffset(
+                    itr->second->createDesc(inputPrecision, getParentEdgeAt(i)->getShape().getStaticDims()));
         }
-        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, outFmt);
-        if (itr->first != TensorDescCreatorTypes::nspc) {
+        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref);
+        if (itr->first != LayoutType::nspc) {
             pdIndexesToReuse.push_back(supportedPrimitiveDescriptors.size() - 1);
         }
     }
@@ -161,8 +161,8 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
         const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig();
         auto config = refConfig;
 
-        const auto& order = refConfig.outConfs[0].desc.getBlockingDesc().getOrder();
-        const auto& blkDims = refConfig.outConfs[0].desc.getBlockingDesc().getBlockDims();
+        const auto &order = refConfig.outConfs[0].desc->as<BlockedMemoryDesc>()->getOrder();
+        const auto &blkDims = refConfig.outConfs[0].desc->as<BlockedMemoryDesc>()->getBlockDims();
         auto numOfDim = blkDims.size();
 
         SizeVector offsets(numOfDim, 0lu);
@@ -178,17 +178,16 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
             }
         }
 
-        config.outConfs[0].desc = TensorDesc(outputPrecision, dstDims.ToSizeVector(), {blkDims, order, offset, offsets, strides});
-        memory::format_tag outFmt = MKLDNNMemoryDesc(config.outConfs[0].desc).getFormat();
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(outputPrecision, dstDims, blkDims, order, offset, offsets, strides);
 
         for (size_t i = 0; i < getParentEdges().size(); i++) {
-            const auto& srcBlkDims = refConfig.inConfs[i].desc.getBlockingDesc().getBlockDims();
-            const auto& dims = refConfig.inConfs[i].desc.getDims();
+            const auto& srcBlkDims = refConfig.inConfs[i].desc->as<BlockedMemoryDesc>()->getBlockDims();
+            const auto& dims = refConfig.inConfs[i].desc->getShape().getStaticDims();
 
             config.inConfs[i].inPlace = 0;
-            config.inConfs[i].desc = TensorDesc(inputPrecision, dims, {srcBlkDims, order, offset, offsets, strides});
+            config.inConfs[i].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(inputPrecision, dims, srcBlkDims, order, offset, offsets, strides);
         }
-        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFmt);
+        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
     }
 }
 
@@ -210,7 +209,9 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() {
         canOptimize = false;
     }
 
-    std::map<PartialBlkDesc, size_t> formatFrequency;
+    std::map<LayoutType, size_t> formatFrequency;
+    std::vector<LayoutType> supportedLayouts = {LayoutType::ncsp, LayoutType::nspc, LayoutType::nCsp8c, LayoutType::nCsp16c};
+
     for (size_t i = 0; i < getParentEdges().size(); i++) {
         auto parentEdge = getParentEdgeAt(i);
         auto parent = parentEdge->getParent();
@@ -224,10 +225,11 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() {
         if (outputIndex < 0 || outputIndex >= parent_config.outConfs.size())
             IE_THROW() << "Cannot find index of output node";
         const auto &port_desc = parent_config.outConfs[outputIndex].desc;
-        if (port_desc.getLayout() == Layout::ANY)
-            continue;
-        auto partial_format_desc = PartialBlkDesc::extractFrom(port_desc);
-        formatFrequency[partial_format_desc] += 1;
+        for (auto& item : supportedLayouts) {
+            if (port_desc->hasLayoutType(item)) {
+                formatFrequency[item] += 1;
+            }
+        }
     }
     for (size_t i = 0; i < getChildEdges().size(); i++) {
         auto childEdge = getChildEdgeAt(i);
@@ -241,37 +243,47 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() {
         if (inputIndex < 0 || inputIndex >= config.inConfs.size())
             IE_THROW() << "Cannot find index of output node";
         const auto &port_desc = config.inConfs[inputIndex].desc;
-        if (port_desc.getLayout() == Layout::ANY)
-            continue;
-        auto partial_format_desc = PartialBlkDesc::extractFrom(port_desc);
-        formatFrequency[partial_format_desc] += 1;
+        for (auto& item : supportedLayouts) {
+            if (port_desc->hasLayoutType(item)) {
+                formatFrequency[item] += 1;
+            }
+        }
     }
 
     size_t maxCount = 0;
-    auto outDims = getChildEdgeAt(0)->getDims().ToSizeVector();
-    auto convertTo = PartialBlkDesc::makePlain(outDims);
+    auto outDims = getChildEdgeAt(0)->getShape().getStaticDims();
+    LayoutType convertTo = LayoutType::ncsp;
     for (auto &it : formatFrequency) {
         if (it.second > maxCount) {
             maxCount = it.second;
             convertTo = it.first;
         } else if (it.second == maxCount) {
-            if (isInQuantizedGraph && it.first == PartialBlkDesc::makeTailC(outDims)) {
+            if (isInQuantizedGraph && it.first == LayoutType::nspc) {
                 convertTo = it.first;
-            } else if (it.first == PartialBlkDesc::makeCBlocked(outDims, 8) || it.first == PartialBlkDesc::makeCBlocked(outDims, 16)) {
+            } else if (it.first == LayoutType::nCsp8c || it.first == LayoutType::nCsp16c) {
                 convertTo = it.first;
             }
         }
     }
 
-    if (convertTo.isAutoExtendedWith(outDims))
-        convertTo = PartialBlkDesc::makePlain(outDims);
-    for (size_t i = 0; i < getParentEdges().size(); i++) {
-        if (convertTo.isAutoExtendedWith(getParentEdgeAt(i)->getDims().ToSizeVector()))
-            convertTo = PartialBlkDesc::makePlain(outDims);
+    for (auto& item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c) }) {
+        if (convertTo == item.second) {
+            if (outDims[1] % item.first != 0) {
+                convertTo = LayoutType::ncsp;
+                break;
+            }
+            for (size_t i = 0; i < getParentEdges().size(); i++) {
+                auto& inpDims = getParentEdgeAt(i)->getShape().getStaticDims();
+                if (inpDims[1] % item.first != 0) {
+                    convertTo = LayoutType::ncsp;
+                    break;
+                }
+            }
+        }
     }
 
     for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); ++i) {
-        if (PartialBlkDesc::extractFrom(supportedPrimitiveDescriptors[i].getConfig().outConfs[0].desc) == convertTo) {
+        if (supportedPrimitiveDescriptors[i].getConfig().outConfs[0].desc->hasLayoutType(convertTo)) {
             if (IMPLICATION(supportedPrimitiveDescriptors[i].getImplementationType() == impl_desc_type::unknown, canOptimize)) {
                 canSelectPrimitive.push_back(i);
             }
@@ -283,7 +295,7 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() {
         return;
     }
 
-    // if there are more then one PD with similar data layouts - select the optimized one
+    // if there are more than one PD with similar data layouts - select the optimized one
     for (auto indx : canSelectPrimitive) {
         if (supportedPrimitiveDescriptors[indx].getImplementationType() == impl_desc_type::unknown) {
             selectPrimitiveDescriptorByIndex(static_cast<int>(indx));
@@ -321,7 +333,7 @@ void MKLDNNConcatNode::createPrimitive() {
         IE_THROW() << "Preferable primitive descriptor is not set.";
 
     //check if selected Tensor descriptor has nspc layout and concat axis is C
-    if (axis == channelAxis && getChildEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) {
+    if (axis == channelAxis && getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) {
         canOptimizeNspc = true;
         return;
     }
@@ -337,8 +349,8 @@ void MKLDNNConcatNode::createPrimitive() {
         }
 
         auto desc = srcMemPtr->GetDescriptor();
-        auto dims = getParentEdgeAt(i)->getDims();
-        for (size_t j = 0; j < dims.ndims(); j++) {
+        auto& dims = getParentEdgeAt(i)->getShape().getStaticDims();
+        for (size_t j = 0; j < dims.size(); j++) {
             desc.data.dims[j] = dims[j];
         }
 
@@ -346,8 +358,8 @@ void MKLDNNConcatNode::createPrimitive() {
     }
 
     auto desc = getChildEdgeAt(0)->getMemory().GetDescriptor();
-    auto dims = getChildEdgeAt(0)->getDims();
-    for (size_t i = 0; i < dims.ndims(); i++) {
+    auto& dims = getChildEdgeAt(0)->getShape().getStaticDims();
+    for (size_t i = 0; i < dims.size(); i++) {
         desc.data.dims[i] = dims[i];
         desc.data.padded_dims[i] = dims[i];
     }
@@ -370,79 +382,77 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() {
     if (selected_pd == nullptr)
         IE_THROW() << "Preferable primitive descriptor is not set.";
 
-    if (!isOptimized()) {
+   if (!isOptimized()) {
+       MKLDNNNode::initOptimalPrimitiveDescriptor();
         auto config = selected_pd->getConfig();
-        if (!isInitConfig(config)) {
+        if (!isConfigDefined(config)) {
             for (size_t i = 0; i < config.inConfs.size(); i++) {
-                config.inConfs[i].desc = getConfiguredInputDesc(config, i);
+                config.inConfs[i].desc = getDefinedInputDesc(config, i);
                 // Concat doesn't support different precision on inputs
-                config.inConfs[i].desc.setPrecision(inputPrecision);
+                config.inConfs[i].desc->setPrecision(inputPrecision);
             }
 
             for (size_t i = 0; i < config.outConfs.size(); i++) {
-                config.outConfs[i].desc = getConfiguredOutputDesc(config, i);
-                config.outConfs[i].desc.setPrecision(outputPrecision);
+                config.outConfs[i].desc = getDefinedOutputDesc(config, i);
+                config.outConfs[i].desc->setPrecision(outputPrecision);
             }
 
             initDescriptor(config);
         }
-
-        return;
     }
 
     auto config = selected_pd->getConfig();
-    if (isInitConfig(config))
+    if (isConfigDefined(config))
         return;
 
     for (size_t i = 0; i < config.outConfs.size(); i++) {
-        if (!isUninitTensorDesc(config.outConfs[i].desc))
+        if (config.outConfs[i].desc->isDefined())
             continue;
 
         int num = getChildEdgeAt(i)->getOutputNum();
         if (num >= 0) {
             auto childConf = getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num];
-            childConf.desc.setPrecision(config.outConfs[i].desc.getPrecision());
+            childConf.desc->setPrecision(config.outConfs[i].desc->getPrecision());
 
             if (getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()) {
-                if (isUninitTensorDesc(childConf.desc) && childConf.inPlace >= 0)
+                if (!childConf.desc->isDefined() && childConf.inPlace >= 0)
                     getChildEdgeAt(i)->getChild()->initOptimalPrimitiveDescriptor();
 
-                if (!isUninitTensorDesc(childConf.desc) &&
-                        MKLDNNExtensionUtils::initTensorsAreEqual(childConf.desc, config.outConfs[i].desc)) {
-                    config.outConfs[i].desc = childConf.desc;
+                if (childConf.desc->isDefined() && childConf.desc->isCompatible(*config.outConfs[i].desc)) {
+                    config.outConfs[i].desc = childConf.desc->clone();
                     continue;
                 }
             }
         }
-        config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(),
-                                                              config.outConfs[i].desc.getDims(), {
-                                                                      config.outConfs[i].desc.getBlockingDesc().getBlockDims(),
-                                                                      config.outConfs[i].desc.getBlockingDesc().getOrder()
-                                                              });
+
+        // reset undefined offsets
+        config.outConfs[i].desc = MemoryDescUtils::resetOffset(config.outConfs[i].desc.get());
     }
+    auto firstOutBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.outConfs[0].desc);
     size_t offset = 0;
     for (size_t i = 0; i < config.inConfs.size(); i++) {
-        config.inConfs[i].desc = InferenceEngine::TensorDesc(config.inConfs[i].desc.getPrecision(),
-                                                             config.inConfs[i].desc.getDims(), {
-                                                                  config.inConfs[i].desc.getBlockingDesc().getBlockDims(),
-                                                                  config.inConfs[i].desc.getBlockingDesc().getOrder(),
-                                                                  config.outConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset,
-                                                                  config.outConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(),
-                                                                  config.outConfs[0].desc.getBlockingDesc().getStrides()
-                                                             });
+        auto inpBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[i].desc);
+        config.inConfs[i].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(inpBlockingDesc.getPrecision(),
+                                                                inpBlockingDesc.getShape().getStaticDims(),
+                                                                inpBlockingDesc.getBlockDims(),
+                                                                inpBlockingDesc.getOrder(),
+                                                                firstOutBlockingDesc.getOffsetPadding() + offset,
+                                                                firstOutBlockingDesc.getOffsetPaddingToData(),
+                                                                firstOutBlockingDesc.getStrides());
         size_t axisSize = 1;
 
-        if (config.inConfs[0].desc.getLayout() == Layout::NHWC) {
-            // This is more general and works for any "direct" Layout (such as nchw or nhwc), but it doesn't work for nchw8c
-            size_t realAxis = inverseOrder(config.inConfs[0].desc.getBlockingDesc().getOrder(), axis);
-            for (size_t j = realAxis; j < config.inConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) {
-                size_t jj = config.inConfs[0].desc.getBlockingDesc().getOrder()[j];
-                axisSize *= config.inConfs[i].desc.getBlockingDesc().getBlockDims()[jj];
+        auto firstInpBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[0].desc);
+        if (firstInpBlockingDesc.hasLayoutType(LayoutType::nspc)) {
+            // This is more general and works for any "direct" Layout (such as nchw or nhwc), but it doesn't work for blocked
+            size_t realAxis = inverseOrder(firstInpBlockingDesc.getOrder(), axis);
+            for (size_t j = realAxis; j < inpBlockingDesc.getBlockDims().size(); j++) {
+                size_t jj = firstInpBlockingDesc.getOrder()[j];
+                axisSize *= inpBlockingDesc.getBlockDims()[jj];
             }
         } else {
             // This works for nchw and nchw8c/nchw16c
-            for (size_t j = axis; j < config.inConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) {
-                axisSize *= config.inConfs[i].desc.getBlockingDesc().getBlockDims()[j];
+            for (size_t j = axis; j < inpBlockingDesc.getBlockDims().size(); j++) {
+                axisSize *= inpBlockingDesc.getBlockDims()[j];
             }
         }
         offset += axisSize;
@@ -470,7 +480,7 @@ void MKLDNNConcatNode::execute(mkldnn::stream strm) {
 }
 
 InferenceEngine::Precision MKLDNNConcatNode::getRuntimePrecision() const {
-    return MKLDNNExtensionUtils::getMaxPrecision(getInputPrecisions());
+    return getMaxPrecision(getInputPrecisions());
 }
 
 void MKLDNNConcatNode::execNspcSpecCase() {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
index 36de12e94d9..4bff8260c79 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@@ -18,6 +18,7 @@
 #include <ngraph/ops.hpp>
 #include <cpu/x64/jit_generator.hpp>
 #include "common/cpu_convert.h"
+#include <cpu_memory_desc_utils.h>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -68,7 +69,7 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>
         biasesDims = { groupOC };
 
         for (int i = 0; i < convolutionOp->get_strides().size(); i++) {
-            stride.push_back(static_cast<ptrdiff_t>(convolutionOp->get_strides()[i]));
+            stride.push_back(convolutionOp->get_strides()[i]);
         }
         for (int i = 0; i < convolutionOp->get_dilations().size(); i++) {
             dilation.push_back(static_cast<ptrdiff_t>(convolutionOp->get_dilations()[i]) - 1);
@@ -90,7 +91,7 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>
         biasesDims = {groupOC * groupNum};
 
         for (int i = 0; i < groupConvolutionOp->get_strides().size(); i++) {
-            stride.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_strides()[i]));
+            stride.push_back(groupConvolutionOp->get_strides()[i]);
         }
         for (int i = 0; i < groupConvolutionOp->get_dilations().size(); i++) {
             dilation.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_dilations()[i]) - 1);
@@ -142,17 +143,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
                  (withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Input) : true);
     }
 
-    if (isWinograd()) {
-        internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-            return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0));
-        });
-        internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
-            if (!withBiases)
-                return MKLDNNMemoryDesc();
-            return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(1));
-        });
-    }
-
     withSum = false;
     int expectedInputEdgesNum = static_cast<int>(getOriginalInputsNumber());
     for (int i = 0; i < fusedWith.size(); i++) {
@@ -169,36 +159,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
         }
     }
 
-    // we can't convert winograd memory descriptor to TensorDesc, so we removed weight and bias edges and put data into internalBlobs
-    if (isWinograd()) {
-        std::vector<MKLDNNEdgePtr> edgesToRemove;
-        internalBlobs.push_back(createInternalBlob(weightDims, 1, isGrouped));
-        edgesToRemove.push_back(getParentEdgeAt(1));
-
-        if (withBiases) {
-            internalBlobs.push_back(createInternalBlob(biasesDims, 2));
-            edgesToRemove.push_back(getParentEdgeAt(2));
-        }
-
-        if (expectedInputEdgesNum - getOriginalInputsNumber() > 0) {
-            size_t reconnectPort = 1;
-            for (size_t startPort = 2 + (withBiases ? 1 : 0); startPort < expectedInputEdgesNum; startPort++) {
-                getParentEdgeAt(startPort)->setChildPort(reconnectPort);
-                reconnectPort++;
-            }
-        }
-
-        for (size_t i = 0; i < edgesToRemove.size(); i++) {
-            removeEdge(edgesToRemove[i]);
-        }
-
-        expectedInputEdgesNum -= getOriginalInputsNumber() - 1;
-        if (withBiases) {
-            inDims.erase(inDims.begin() + 2);
-        }
-        inDims.erase(inDims.begin() + 1);
-    }
-
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
     if (!inputZeroPoints.empty())
         inputDataType = memory::data_type::u8;
@@ -229,11 +189,12 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
     }
 
     if (getParentEdges().size() != expectedInputEdgesNum)
-        IE_THROW() << "Incorrect number of input edges for layer " << getName();
+        IE_THROW() << "Incorrect number of input edges for layer " << getName() << ", expected: " << expectedInputEdgesNum
+                   << " actual: " << getParentEdges().size();
     if (getChildEdges().empty())
         IE_THROW() << "Incorrect number of output edges for layer " << getName();
 
-    int ndims = getParentEdgesAtPort(0)[0]->getDims().ndims();
+    int ndims = getParentEdgesAtPort(0)[0]->getShape().getRank();
     MKLDNNDims weightsDims = MKLDNNDims(weightDims);
 
     withDWConv = isFusedWith(Convolution);
@@ -241,10 +202,14 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
     for (int i = 0; i < fusedWith.size(); i++) {
         auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
         if (convolutionNode) {
-            dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2];
-            dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1];
-            dw_conv_oc = convolutionNode->outDims[0][1];
-            const auto &dwWeightsDims = convolutionNode->inDims[1].ToSizeVector();
+            auto& inActivationDims = convolutionNode->inputShapes[0].getStaticDims();
+            dw_conv_ih = inActivationDims[convolutionNode->inputShapes[0].getRank() - 2];
+            dw_conv_iw = inActivationDims[convolutionNode->inputShapes[0].getRank() - 1];
+
+            auto& outDims = convolutionNode->outputShapes[0].getStaticDims();
+            dw_conv_oc = outDims[1];
+
+            const auto &dwWeightsDims = convolutionNode->inputShapes[1].getStaticDims();
             dw_conv_kernel.push_back(dwWeightsDims[dwWeightsDims.size() - 1]);
             dw_conv_kernel.push_back(dwWeightsDims[dwWeightsDims.size() - 2]);
             dw_conv_strides = convolutionNode->getStride();
@@ -262,8 +227,8 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
             for (int j = 0; j < paddingR.size(); j++) {
                 int with_group = isGrouped ? 1 : 0;
                 int krn = weightsDims[with_group + 2 + j];
-                int src = getParentEdgeAt(0)->getDims()[2 + j];
-                int dst = getChildEdgeAt(0)->getDims()[2 + j];
+                int src = getParentEdgeAt(0)->getShape().getStaticDims()[2 + j];
+                int dst = getChildEdgeAt(0)->getShape().getStaticDims()[2 + j];
 
                 krn = (krn - 1)*(dilation[j] + 1) + 1;
                 int calc_dst = (src - krn + paddingL[j]) / stride[j] + 1;
@@ -272,18 +237,18 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
         }
     }
 
-    MKLDNNMemoryDesc in_candidate, out_candidate;
+    MemoryDescPtr in_candidate, out_candidate;
     if (canBeExecutedInInt8()) {
         //  We have to extend convolution_x8s8s32x from oneDNN to support BF16 output data type
         if (outputDataType == memory::data_type::bf16)
             outputDataType = memory::data_type::f32;
         if (eltwisePrecision == Precision::BF16)
             eltwisePrecision = Precision::FP32;
-        in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ndims == 5 ? memory::format_tag::ndhwc
-                                                                                                 : memory::format_tag::nhwc);
-        out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ndims == 5 ? memory::format_tag::ndhwc
-                                                                                                  : memory::format_tag::nhwc);
-        createDescriptor({in_candidate}, {out_candidate});
+        in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(),
+                                                     inputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc);
+        out_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(),
+                                                      outputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc);
+        createDescriptor({ in_candidate.get() }, { out_candidate.get() });
     } else {
         inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16
                 && !(isDepthWise() && ndims == 5)) ? memory::data_type::bf16 : memory::data_type::f32;
@@ -320,33 +285,36 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
             memory::format_tag nCsp16c = ndims == 4 ? memory::format_tag::nChw16c : memory::format_tag::nCdhw16c;
             memory::format_tag nCsp8c = ndims == 4 ? memory::format_tag::nChw8c : memory::format_tag::nCdhw8c;
 
+            auto inputDims = getParentEdgeAt(0)->getShape().getStaticDims();
+            auto outputDims = getChildEdgeAt(0)->getShape().getStaticDims();
+
             if (IC == 1 && groupOC == 1) {
-                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp);
-                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ncsp);
-                createDescriptor({in_candidate}, {out_candidate});
+                in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(inputDims, inputDataType, ncsp);
+                out_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(outputDims, outputDataType, ncsp);
+                createDescriptor({ in_candidate.get() }, { out_candidate.get() });
             } else if (IC < 4) {
-                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp);
-                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp16c);
-                createDescriptor({in_candidate}, {out_candidate});
-                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp8c);
-                createDescriptor({in_candidate}, {out_candidate});
+                in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(inputDims, inputDataType, ncsp);
+                out_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(outputDims, outputDataType, nCsp16c);
+                createDescriptor({ in_candidate.get() }, { out_candidate.get() });
+                out_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(outputDims, outputDataType, nCsp8c);
+                createDescriptor({ in_candidate.get() }, { out_candidate.get() });
             } else {
-                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nCsp16c);
-                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp16c);
-                createDescriptor({in_candidate}, {out_candidate});
-                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nCsp8c);
-                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp8c);
-                createDescriptor({in_candidate}, {out_candidate});
+                in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(inputDims, inputDataType, nCsp16c);
+                out_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(outputDims, outputDataType, nCsp16c);
+                createDescriptor({ in_candidate.get() }, { out_candidate.get() });
+                in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(inputDims, inputDataType, nCsp8c);
+                out_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(outputDims, outputDataType, nCsp8c);
+                createDescriptor({ in_candidate.get() }, { out_candidate.get() });
             }
 
-            in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp);
-            out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ncsp);
-            createDescriptor({in_candidate}, {out_candidate});
+            in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(inputDims, inputDataType, ncsp);
+            out_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(outputDims, outputDataType, ncsp);
+            createDescriptor({ in_candidate.get() }, { out_candidate.get() });
 
             if (inputDataType != memory::data_type::bf16 && isNspcAvailable()) {
-                in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nspc);
-                out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nspc);
-                createDescriptor({in_candidate}, {out_candidate});
+                in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(inputDims, inputDataType, nspc);
+                out_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(outputDims, outputDataType, nspc);
+                createDescriptor({ in_candidate.get() }, { out_candidate.get() });
             }
         }
     }
@@ -421,15 +389,18 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
             continue;
         auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
         while (static_cast<bool>(itpd)) {
-            InferenceEngine::LayerConfig config;
+            NodeConfig config;
             config.dynBatchSupport = true;
             for (size_t i = 0; i < descInputNumbers(desc); i++) {
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 dataConfig.inPlace = -1;
                 dataConfig.constant = false;
-                dataConfig.desc = getSrcMemDesc(itpd, i);
-                if (!isGrouped)
-                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
+                auto srcDesc = getSrcMemDesc(itpd, i);
+                if (isGrouped || srcDesc->getFormatKind() != dnnl_format_kind_t::dnnl_blocked)
+                    dataConfig.desc = std::move(srcDesc);
+                else
+                    dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*srcDesc);
+
                 config.inConfs.push_back(dataConfig);
             }
 
@@ -437,34 +408,38 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
                 auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
                 auto biasPrc = memory::data_type::f32;
 
-                MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
-                MKLDNNDims dwBiasesDims({dw_conv_oc});
+                std::vector<size_t> dwWeightsDims({dw_conv_oc, 1, 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
+                std::vector<size_t> dwBiasesDims({dw_conv_oc});
 
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 dataConfig.inPlace = -1;
                 dataConfig.constant = false;
-                dataConfig.desc = MKLDNNMemoryDesc(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g);
+                dataConfig.desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g);
                 config.inConfs.push_back(dataConfig);
 
-                dataConfig.desc = MKLDNNMemoryDesc(dwBiasesDims, biasPrc, memory::format_tag::x);
+                dataConfig.desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(dwBiasesDims, biasPrc, memory::format_tag::x);
                 config.inConfs.push_back(dataConfig);
             }
 
             for (size_t i = 0; i < descOutputNumbers(desc); i++) {
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 if (withSum) {
                     dataConfig.inPlace = getParentEdges().size() - 1;
                 }
 
                 dataConfig.constant = false;
-                dataConfig.desc = getDstMemDesc(itpd, i);
-                if (!isGrouped)
-                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
+
+                auto dstDesc = getDstMemDesc(itpd, i);
+                if (isGrouped || dstDesc->getFormatKind() != dnnl_format_kind_t::dnnl_blocked)
+                    dataConfig.desc = std::move(dstDesc);
+                else
+                    dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*dstDesc);
+
                 config.outConfs.push_back(dataConfig);
 
                 if (withSum) {
                     dataConfig.inPlace = -1;
-                    dataConfig.desc.setPrecision(eltwisePrecision);
+                    dataConfig.desc->setPrecision(eltwisePrecision);
                     config.inConfs.push_back(dataConfig);
                 }
             }
@@ -505,9 +480,10 @@ bool MKLDNNConvolutionNode::created() const {
     return getType() == Convolution;
 }
 
-void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
-                                             const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
-    TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
+void MKLDNNConvolutionNode::createDescriptor(const std::vector<const MemoryDesc*>& inputDesc,
+                                             const std::vector<const MemoryDesc*>& outputDesc) {
+    auto inDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]);
+    auto outDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]);
 
     memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
     memory::data_type bdt = memory::data_type::f32;
@@ -516,12 +492,9 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::
         wdt = memory::data_type::s8;
     }
 
-    MKLDNNMemoryDesc in_candidate(inDesc);
-    MKLDNNMemoryDesc out_candidate(outDesc);
-
     MKLDNNDims blocked_weightDims(weightDims);
     MKLDNNDims blocked_biasesDims(biasesDims);
-    MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::format_tag::any};
+    mkldnn::memory::desc wgh_candidate(blocked_weightDims, wdt, memory::format_tag::any);
 
     std::vector<mkldnn::algorithm> algorithms;
 
@@ -533,17 +506,17 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::
         try {
             std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
             if (withBiases) {
-                MKLDNNMemoryDesc bias_candidate{blocked_biasesDims, bdt, memory::format_tag::any};
+                mkldnn::memory::desc bias_candidate(blocked_biasesDims, bdt, memory::format_tag::any);
 
                 conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
-                            in_candidate, wgh_candidate, bias_candidate, out_candidate,
+                            inDesc, wgh_candidate, bias_candidate, outDesc,
                             mkldnn::memory::dims(stride.begin(), stride.end()),
                             mkldnn::memory::dims(dilation.begin(), dilation.end()),
                             mkldnn::memory::dims(paddingL.begin(), paddingL.end()),
                             mkldnn::memory::dims(paddingR.begin(), paddingR.end())));
             } else {
                 conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg,
-                            in_candidate, wgh_candidate, out_candidate,
+                            inDesc, wgh_candidate, outDesc,
                             mkldnn::memory::dims(stride.begin(), stride.end()),
                             mkldnn::memory::dims(dilation.begin(), dilation.end()),
                             mkldnn::memory::dims(paddingL.begin(), paddingL.end()),
@@ -569,7 +542,7 @@ void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) const {
     }
 }
 
-void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) {
+void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) {
     auto* selectedPD = getSelectedPrimitiveDescriptor();
     if (!selectedPD) {
         return;
@@ -589,14 +562,14 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
     // }
 
     if (isStridedBlobsSupported) {
-        createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc});
+        createDescriptor({config.inConfs[0].desc.get()}, {config.outConfs[0].desc.get()});
     }
 
     mkldnn::primitive_attr attr;
     addZeroPoints(attr);
     setPostOps(attr);
 
-    InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
+    auto rightConfig = selectedPD->getConfig();
     size_t selected_count = 0;
 
     bool containJitImpl = false;
@@ -607,10 +580,10 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
             continue;
         auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
         while (static_cast<bool>(itpd)) {
-            InferenceEngine::LayerConfig cfg;
+            NodeConfig cfg;
             cfg.dynBatchSupport = true;
             for (size_t j = 0; j < descInputNumbers(desc); j++) {
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 dataConfig.inPlace = -1;
                 dataConfig.constant = false;
                 dataConfig.desc = getSrcMemDesc(itpd, j);
@@ -621,27 +594,27 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
                 auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32);
                 auto biasPrc = memory::data_type::f32;
 
-                MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
-                MKLDNNDims dwBiasesDims({dw_conv_oc});
+                std::vector<size_t> dwWeightsDims({dw_conv_oc, 1, 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
+                std::vector<size_t> dwBiasesDims({dw_conv_oc});
 
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 dataConfig.inPlace = -1;
                 dataConfig.constant = false;
-                dataConfig.desc = MKLDNNMemoryDesc(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g);
+                dataConfig.desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g);
                 cfg.inConfs.push_back(dataConfig);
 
-                dataConfig.desc = MKLDNNMemoryDesc(dwBiasesDims, biasPrc, memory::format_tag::x);
+                dataConfig.desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(dwBiasesDims, biasPrc, memory::format_tag::x);
                 cfg.inConfs.push_back(dataConfig);
             }
 
             for (size_t j = 0; j < descOutputNumbers(desc); j++) {
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 dataConfig.inPlace = -1;
                 dataConfig.constant = false;
                 dataConfig.desc = getDstMemDesc(itpd, j);
                 if (withSum) {
                     auto eltwiseConfig = dataConfig;
-                    eltwiseConfig.desc.setPrecision(eltwisePrecision);
+                    eltwiseConfig.desc->setPrecision(eltwisePrecision);
                     cfg.inConfs.push_back(eltwiseConfig);
                     dataConfig.inPlace = getParentEdges().size() - 1;
                 }
@@ -668,7 +641,7 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c
                 break;
         }
     }
-    selectedPD->getConfig() = rightConfig;
+    selectedPD->setConfig(rightConfig);
 }
 
 void MKLDNNConvolutionNode::filterSupportedPrimitiveDescriptors() {
@@ -729,44 +702,17 @@ bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) c
     auto dstMemDesc = MKLDNNMemoryDesc {convDesc->data.dst_desc};
     auto srcDataType = convDesc->data.src_desc.data_type;
     auto dstDataType = convDesc->data.dst_desc.data_type;
-    bool isPlanarFloatConv = srcMemDesc.isPlainFormat()
-                             && dstMemDesc.isPlainFormat()
+    bool isPlanarFloatConv = srcMemDesc.hasLayoutType(LayoutType::ncsp)
+                             && dstMemDesc.hasLayoutType(LayoutType::ncsp)
                              && srcDataType == memory::data_type::f32
                              && dstDataType == memory::data_type::f32;
 
     return !isPossibleJitPlanar && isPlanarFloatConv;
 }
 
-MKLDNNMemoryDesc MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
-    InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1))
-                                               : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx));
-
-    if (desc.getLayout() == InferenceEngine::Layout::ANY) {
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getLayout()));
-    } else {
-        if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(),
-                                                                                       desc.getBlockingDesc().getOrder().end()) + 1) {
-            auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector();
-            auto new_dims = InferenceEngine::SizeVector({groupNum, div_up(old_dims[0], groupNum)});
-            for (int i = 1; i < old_dims.size(); i++) {
-                new_dims.push_back(old_dims[i]);
-            }
-
-            auto td = InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                  new_dims,
-                                                  desc.getBlockingDesc());
-            if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) {
-                td.setLayout(BLOCKED);
-            }
-            return MKLDNNMemoryDesc(td);
-        } else {
-            return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                                getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                                desc.getBlockingDesc()));
-        }
-    }
+std::unique_ptr<MKLDNNMemoryDesc> MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
+    auto desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx));
+    return MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(std::move(desc));
 }
 
 bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
@@ -774,11 +720,11 @@ bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
 }
 
 const mkldnn::memory& MKLDNNConvolutionNode::getWeights() const {
-    return isWinograd() ? internalBlobMemory[0]->GetPrimitive() : getParentEdgeAt(1)->getMemory().GetPrimitive();
+    return getParentEdgeAt(1)->getMemory().GetPrimitive();
 }
 
 const mkldnn::memory& MKLDNNConvolutionNode::getBias() const {
-    return isWinograd() ? internalBlobMemory[1]->GetPrimitive() : getParentEdgeAt(2)->getMemory().GetPrimitive();
+    return getParentEdgeAt(2)->getMemory().GetPrimitive();
 }
 
 InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const {
@@ -792,7 +738,7 @@ InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const {
         }
     }
 
-    return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions);
+    return getMaxPrecision(inputPrecisions);
 }
 
 bool MKLDNNConvolutionNode::isNspcAvailable() const {
@@ -809,8 +755,8 @@ bool MKLDNNConvolutionNode::isNspcAvailable() const {
     }
 
     // A bunch of heuristics are designed to cut off not optimal nspc convolution applications
-    auto inpDims = getParentEdgeAt(0)->getDims().ToSizeVector();
-    auto outDims = getChildEdgeAt(0)->getDims().ToSizeVector();
+    auto inpDims = getParentEdgeAt(0)->getShape().getStaticDims();
+    auto outDims = getChildEdgeAt(0)->getShape().getStaticDims();
     auto ndims = inpDims.size();
 
     if (isDepthWise()) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
index 79b4aef029e..7fa5ed80bb8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
@@ -20,9 +20,9 @@ public:
 
     static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
-    void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
-                          const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
-    void initDescriptor(const InferenceEngine::LayerConfig& config) override;
+    void createDescriptor(const std::vector<const MemoryDesc*>& inputDesc,
+                          const std::vector<const MemoryDesc*>& outputDesc) override;
+    void initDescriptor(const NodeConfig& config) override;
     void createPrimitive() override;
     void selectOptimalPrimitiveDescriptor() override;
     void initSupportedPrimitiveDescriptors() override;
@@ -32,13 +32,13 @@ public:
         return false;
     }
     InferenceEngine::Precision getRuntimePrecision() const override;
-    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
+    std::unique_ptr<MKLDNNMemoryDesc> getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
 
     const mkldnn::memory& getWeights() const;
     const mkldnn::memory& getBias() const;
 
     size_t descInputNumbers(MKLDNNDescriptor desc) override {
-        return static_cast<size_t>(isWinograd() ? 1 : getOriginalInputsNumber());
+        return getOriginalInputsNumber();
     }
 
     bool canBeExecutedInInt8() const;
@@ -49,7 +49,7 @@ public:
     std::vector<int32_t> outputCompensation;
 
     const InferenceEngine::SizeVector &getWeightDims() { return weightDims; }
-    const std::vector<ptrdiff_t> &getStride() { return stride; }
+    const std::vector<size_t> &getStride() { return stride; }
     const std::vector<ptrdiff_t> &getDilation() { return dilation; }
     const std::vector<ptrdiff_t> &getPaddingL() { return paddingL; }
     const std::vector<ptrdiff_t> &getPaddingR() { return paddingR; }
@@ -77,18 +77,18 @@ private:
     bool withDWConv;
     bool isGrouped;
     bool isPrimitivesPriorityDefined = false;
-    std::vector<ptrdiff_t> stride;
+    std::vector<size_t> stride;
     std::vector<ptrdiff_t> dilation;
     std::vector<ptrdiff_t> paddingL;
     std::vector<ptrdiff_t> paddingR;
     InferenceEngine::SizeVector weightDims;
     InferenceEngine::SizeVector biasesDims;
 
-    ptrdiff_t dw_conv_oc;
-    ptrdiff_t dw_conv_ih;
-    ptrdiff_t dw_conv_iw;
-    std::vector<ptrdiff_t> dw_conv_kernel;
-    std::vector<ptrdiff_t> dw_conv_strides;
+    size_t dw_conv_oc;
+    size_t dw_conv_ih;
+    size_t dw_conv_iw;
+    std::vector<size_t> dw_conv_kernel;
+    std::vector<size_t> dw_conv_strides;
     mkldnn::memory::data_type dw_conv_in_dt;
 
     size_t groupNum;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
index 678922f3a4b..00a403c8bb6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
@@ -5,7 +5,7 @@
 #include <mkldnn_extension_utils.h>
 #include "mkldnn_convert_node.h"
 #include "common/cpu_convert.h"
-#include "common/tensor_desc_creator.h"
+#include "common/blocked_desc_creator.h"
 #include <ngraph/opsets/opset1.hpp>
 
 using namespace mkldnn;
@@ -38,9 +38,9 @@ MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, co
 MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
                                      const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
         : MKLDNNNode("Convert", nodeName, eng, cache) {
-    inDims.emplace_back(dims);
+    inputShapes.emplace_back(dims);
     addOriginalInputPrecision(inPrc);
-    outDims.emplace_back(dims);
+    outputShapes.emplace_back(dims);
     addOriginalOutputPrecision(outPrc);
 
     errorPrefix = "Convert node with name '" + getName() + "'";
@@ -49,10 +49,10 @@ MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, co
 void MKLDNNConvertNode::getSupportedDescriptors() {
     // if tensor descriptors are set via setDescs method we need to update the inDims/outDims data
     // from correspond tensor descriptors.
-    if (outDims.empty() && output && output->getLayout() != InferenceEngine::Layout::ANY)
-        outDims.push_back(MKLDNNDims(output->getDims()));
-    if (inDims.empty() && input && input->getLayout() != InferenceEngine::Layout::ANY)
-        inDims.push_back(MKLDNNDims(input->getDims()));
+    if (outputShapes.empty())
+        outputShapes.push_back(output->getShape());
+    if (inputShapes.empty())
+        inputShapes.push_back(input->getShape());
     if (getParentEdges().size() != 1)
         IE_THROW() << errorPrefix << " has incorrect number of input edges";
     if (getChildEdges().empty())
@@ -63,39 +63,40 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    LayerConfig config;
-    DataConfig dataIn;
-    DataConfig dataConfigOut;
+    NodeConfig config;
+    PortConfig dataIn;
+    PortConfig dataConfigOut;
 
     config.dynBatchSupport = false;
 
     // if input and output pointers are not null, then the inp/output tensor descriptors were set using setDescs method, so
     // they should be used as the actual descriptors.
-    if (input && input->getLayout() != InferenceEngine::Layout::ANY && output && output->getLayout() != InferenceEngine::Layout::ANY) {
-        dataIn.desc = *input;
+    if (input && output) {
+        dataIn.desc = input->clone();
         config.inConfs.push_back(dataIn);
 
-        const auto& blockingDesc = config.inConfs[0].desc.getBlockingDesc(); // inp/out layouts must be the same
-        dataConfigOut.desc = TensorDesc(output->getPrecision(), input->getDims(), blockingDesc);
+        // inp/out layouts must be the same
+        dataConfigOut.desc = config.inConfs[0].desc->clone();
+        dataConfigOut.desc->setPrecision(output->getPrecision());
         config.outConfs.push_back(dataConfigOut);
-        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
+        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
     } else if (getOriginalInputsNumber() == 1 && getOriginalOutputsNumber() == 1) {
-        const SizeVector& insDims = getParentEdgeAt(0)->getDims().ToSizeVector();
+        const Shape& insShape = getParentEdgeAt(0)->getShape();
         auto insPrecision = getOriginalInputPrecisionAtPort(0);
-        const SizeVector& outputDims = getChildEdgeAt(0)->getDims().ToSizeVector();
+        const Shape& outputShape = getChildEdgeAt(0)->getShape();
         auto outPrecision = getOriginalOutputPrecisionAtPort(0);
 
         config.inConfs.push_back(dataIn);
         config.outConfs.push_back(dataConfigOut);
 
-        auto creators = TensorDescCreator::getCommonCreators();
-        auto range = TensorDescCreator::makeFilteredRange(creators, insDims.size());
+        auto creators = BlockedDescCreator::getCommonCreators();
+        auto range = BlockedDescCreator::makeFilteredRange(creators, insShape.getRank());
 
         for (auto itr = range.first; itr != range.second; ++itr) {
-            config.inConfs[0].desc = itr->second->createDesc(insPrecision, insDims);
-            config.outConfs[0].desc = itr->second->createDesc(outPrecision, outputDims);
+            config.inConfs[0].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(itr->second->createDesc(insPrecision, insShape.getDims()));
+            config.outConfs[0].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(itr->second->createDesc(outPrecision, outputShape.getDims()));
 
-            supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
+            supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
         }
     } else {
         IE_THROW() << errorPrefix << " has incorrect number of input/output edges";
@@ -121,7 +122,7 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) {
 
     void* srcPtr = parentMem.GetPtr();
     void* dstPtr = childMem.GetPtr();
-    cpu_convert(srcPtr, dstPtr, getParentEdgeAt(0)->getDesc().getPrecision(), getChildEdgeAt(0)->getDesc().getPrecision(), parentMem.GetElementsCount());
+    cpu_convert(srcPtr, dstPtr, parentMem.GetDesc().getPrecision(), childMem.GetDesc().getPrecision(), parentMem.GetElementsCount());
 }
 
 bool MKLDNNConvertNode::created() const {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
index ca43bb3db5c..38707385f7a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
@@ -30,19 +30,19 @@ public:
     // In that case the Convert node is instantiated with default CNNLayer and inp/out tensor descriptors are set via this method.
     // This is useful if the Convert node is added to the graph as an auxiliary operation at the MKLDNNGraph
     // initialization stage.
-    void setDescs(const InferenceEngine::TensorDesc& input, const InferenceEngine::TensorDesc& output) {
-        this->input.reset(new InferenceEngine::TensorDesc(input));
-        this->output.reset(new InferenceEngine::TensorDesc(output));
+    void setDescs(const MemoryDesc& input, const MemoryDesc& output) {
+        this->input = input.clone();
+        this->output = output.clone();
     }
 
-    std::shared_ptr<const InferenceEngine::TensorDesc> getInput() const { return input; }
-    std::shared_ptr<const InferenceEngine::TensorDesc> getOutput() const { return output; }
+    const MemoryDesc& getInput() const { return *input; }
+    const MemoryDesc& getOutput() const { return *output; }
 
     static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
 
 private:
-    std::shared_ptr<InferenceEngine::TensorDesc> input;
-    std::shared_ptr<InferenceEngine::TensorDesc> output;
+    std::unique_ptr<MemoryDesc> input;
+    std::unique_ptr<MemoryDesc> output;
 
     std::string errorPrefix;
 };
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp
index 34c9aaf191e..2bf514fffda 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp
@@ -1,7 +1,6 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "base.hpp"
 
 #include <string>
 #include <vector>
@@ -59,9 +58,9 @@ void MKLDNNCTCGreedyDecoderNode::initSupportedPrimitiveDescriptors() {
     if (seqLenPrecision != Precision::FP32 && seqLenPrecision != Precision::BF16)
         IE_THROW() << errorPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision;
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, Precision::FP32}},
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
@@ -70,9 +69,9 @@ void MKLDNNCTCGreedyDecoderNode::execute(mkldnn::stream strm) {
     const float* sequenceMask = reinterpret_cast<const float *>(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr());
     float* outputSequences = reinterpret_cast<float *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
 
-    const size_t T = getParentEdgeAt(DATA_INDEX)->getDims()[0];
-    const size_t B = getParentEdgeAt(DATA_INDEX)->getDims()[1];
-    const int C = getParentEdgeAt(DATA_INDEX)->getDims()[2];
+    const size_t T = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[0];
+    const size_t B = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[1];
+    const int C = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[2];
     const size_t BC = B * C;
     const size_t CB1 = C * (B - 1);
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp
index 0eccdbfa1b5..acd273a9ad9 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp
@@ -1,7 +1,6 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "base.hpp"
 
 #include <string>
 #include <vector>
@@ -58,15 +57,15 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::initSupportedPrimitiveDescriptors() {
     if (seqLenPrecision != Precision::I32 && seqLenPrecision != Precision::I64)
         IE_THROW() << errorPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision;
 
-    std::vector<DataConfigurator> inDataConf;
+    std::vector<PortConfigurator> inDataConf;
     inDataConf.reserve(getOriginalInputsNumber());
-    inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32);
+    inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32);
     for (int i = 1; i < getOriginalInputsNumber(); ++i)
-        inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32);
+        inDataConf.emplace_back(LayoutType::ncsp, Precision::I32);
 
     addSupportedPrimDesc(inDataConf,
-                         {{TensorDescCreatorTypes::ncsp, Precision::I32},
-                          {TensorDescCreatorTypes::ncsp, Precision::I32}},
+                         {{LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32}},
                          impl_desc_type::ref_any);
 }
 
@@ -76,13 +75,13 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) {
     int* decodedClasses =  reinterpret_cast<int *>(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getMemoryPtr()->GetPtr());
     int* decodedClassesLength = reinterpret_cast<int *>(getChildEdgesAtPort(DECODED_CLASSES_LENGTH_INDEX)[0]->getMemoryPtr()->GetPtr());
 
-    const size_t B = getParentEdgeAt(DATA_INDEX)->getDims()[0];;
-    const size_t T = getParentEdgeAt(DATA_INDEX)->getDims()[1];;
-    const int C = getParentEdgeAt(DATA_INDEX)->getDims()[2];;
+    const size_t B = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[0];;
+    const size_t T = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[1];;
+    const int C = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[2];;
     const size_t TC = T * C;
 
     int blankIndex = C - 1;
-    if (inDims.size() > BLANK_INDEX)
+    if (inputShapes.size() > BLANK_INDEX)
         blankIndex = (reinterpret_cast<const int  *>(getParentEdgeAt(BLANK_INDEX)->getMemoryPtr()->GetPtr()))[0];
 
     size_t workAmount = 0;
@@ -91,7 +90,7 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) {
             std::string errorMsg = errorPrefix
                                    + ". Sequence length " + std::to_string(sequenceLengths[b])
                                    + " cannot be greater than according decoded classes dimension size "
-                                   + std::to_string(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getDims()[1]);
+                                   + std::to_string(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getShape().getStaticDims()[1]);
             IE_THROW() << errorMsg;
         }
         workAmount += sequenceLengths[b];
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp
index b355dcaefcd..47da0501486 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp
@@ -46,14 +46,14 @@ void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    std::vector<DataConfigurator> inDataConf;
+    std::vector<PortConfigurator> inDataConf;
     inDataConf.reserve(getOriginalInputsNumber());
-    inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32);
+    inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32);
     for (int i = 1; i < getOriginalInputsNumber(); ++i)
-        inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32);
+        inDataConf.emplace_back(LayoutType::ncsp, Precision::I32);
 
     addSupportedPrimDesc(inDataConf,
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
@@ -66,12 +66,12 @@ void MKLDNNCTCLossNode::execute(mkldnn::stream strm) {
     const int* labelsLength = reinterpret_cast<const int *>(getParentEdgeAt(3)->getMemoryPtr()->GetPtr());
     float* dstData = reinterpret_cast<float *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
 
-    const size_t batchNum = getParentEdgeAt(0)->getDims()[0];
-    const size_t maxTime = getParentEdgeAt(0)->getDims()[1];
-    const size_t classesNum = getParentEdgeAt(0)->getDims()[2];
+    const size_t batchNum = getParentEdgeAt(0)->getShape().getStaticDims()[0];
+    const size_t maxTime = getParentEdgeAt(0)->getShape().getStaticDims()[1];
+    const size_t classesNum = getParentEdgeAt(0)->getShape().getStaticDims()[2];
 
     int blankIndex = classesNum - 1;
-    if (inDims.size() > 4) {
+    if (inputShapes.size() > 4) {
         blankIndex = reinterpret_cast<const int *>(getParentEdgeAt(4)->getMemoryPtr()->GetPtr())[0];
     }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp
index 3f6c8f90348..5124409cf8b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "list.hpp"
-#include "base.hpp"
 
 #include <string>
 #include <vector>
@@ -78,20 +77,20 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() {
             IE_THROW() << errorPrefix << " has unsupported 'axis' input precision: " << axisTensorPrec.name();
     }
 
-    std::vector<DataConfigurator> inDataConf;
+    std::vector<PortConfigurator> inDataConf;
     inDataConf.reserve(getOriginalInputsNumber());
-    inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, dataPrecision);
+    inDataConf.emplace_back(LayoutType::ncsp, dataPrecision);
     for (int i = 1; i < getOriginalInputsNumber(); ++i)
-        inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32);
+        inDataConf.emplace_back(LayoutType::ncsp, Precision::I32);
 
     addSupportedPrimDesc(inDataConf,
-                         {{TensorDescCreatorTypes::ncsp, dataPrecision}},
+                         {{LayoutType::ncsp, dataPrecision}},
                          impl_desc_type::ref_any);
 }
 
 void MKLDNNCumSumNode::execute(mkldnn::stream strm) {
-    if (inDims.size() == numOfInputs)
-        axis = getAxis(getParentEdgeAt(AXIS)->getBlob(), getParentEdgeAt(CUM_SUM_DATA)->getBlob());
+    if (inputShapes.size() == numOfInputs)
+        axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory());
 
     switch (dataPrecision) {
         case Precision::I8   : {
@@ -134,7 +133,7 @@ template <typename dataType>
 void MKLDNNCumSumNode::exec() {
     const auto *input = reinterpret_cast<const dataType *>(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr());
     auto *output = reinterpret_cast<dataType *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
-    const std::vector<size_t> strides = getParentEdgeAt(CUM_SUM_DATA)->getDesc().getBlockingDesc().getStrides();
+    const std::vector<size_t> strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
 
     if (reverse) {
         if (exclusive) {
@@ -248,18 +247,18 @@ inline size_t MKLDNNCumSumNode::getStartOffset(const std::vector<size_t> &forSta
     return startOffset;
 }
 
-size_t MKLDNNCumSumNode::getAxis(const Blob::CPtr& _axis, const Blob::CPtr& _data) const {
-    const auto& axisPrecision = _axis->getTensorDesc().getPrecision();
-    const int64_t dataShapeSize = static_cast<int64_t>(_data->getTensorDesc().getDims().size());
+size_t MKLDNNCumSumNode::getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const {
+    const auto& axisPrecision = _axis.GetDesc().getPrecision();
+    const int64_t dataShapeSize = static_cast<int64_t>(_data.GetDesc().getShape().getRank());
     int64_t axisValueFromBlob;
     switch (axisPrecision) {
         case Precision::I32 : {
-            const auto *axisPtr = _axis->cbuffer().as<const int32_t *>();
+            const auto *axisPtr = reinterpret_cast<const int32_t *>(_axis.GetPtr());
             axisValueFromBlob = static_cast<int64_t>(axisPtr[0]);
             break;
         }
         case Precision::I64 : {
-            const auto *axisPtr = _axis->cbuffer().as<const int64_t *>();
+            const auto *axisPtr = reinterpret_cast<const int64_t *>(_axis.GetPtr());
             axisValueFromBlob = axisPtr[0];
             break;
         }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h
index 794d6bc73f1..bbe180f5544 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h
@@ -34,7 +34,7 @@ private:
 
     inline size_t getStartOffset(const std::vector<size_t> &forStartOffset, const std::vector<size_t>& strides) const;
 
-    size_t getAxis(const InferenceEngine::Blob::CPtr& _axis, const InferenceEngine::Blob::CPtr& _data) const;
+    size_t getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const;
 
     enum { CUM_SUM_DATA, AXIS, numOfInputs };
     bool exclusive;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
index ba44392e66f..62c173c72f5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
@@ -16,6 +16,7 @@
 #include <ngraph/opsets/opset1.hpp>
 #include <cpu/x64/cpu_isa_traits.hpp>
 #include <nodes/common/cpu_memcpy.h>
+#include <cpu_memory_desc_utils.h>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -108,10 +109,10 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE
     // So we disconnect constant node containing weights from the graph and then don't use it.
     if (getParentEdges().size() == 3) {
         removeEdge(getParentEdgeAt(2));
-        inDims.erase(inDims.begin() + 2);
+        inputShapes.erase(inputShapes.begin() + 2);
     }
     removeEdge(getParentEdgeAt(1));
-    inDims.erase(inDims.begin() + 1);
+    inputShapes.erase(inputShapes.begin() + 1);
 
     InferenceEngine::SizeVector dimsForBlockedDesc{dims};
     std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]);
@@ -151,7 +152,7 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const {
     if (!withGroups && stride.back() > 3)
         return false;
     if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) {
-        auto inDims = getChildEdgeAt(0)->getDims().ToSizeVector();
+        auto inDims = getChildEdgeAt(0)->getShape().getStaticDims();
         // heuristicConst = 2^26
         // heuristicParam = IC^2 * SP
         auto heuristicConst = 67108864;
@@ -230,8 +231,8 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
     for (int i = 0; i < paddingR.size(); i++) {
         int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
         int krn = weightDims[with_group + 2 + i];
-        int src = getChildEdgeAt(0)->getDims()[2 + i];
-        int dst = getParentEdgeAt(0)->getDims()[2 + i];
+        int src = getChildEdgeAt(0)->getShape().getStaticDims()[2 + i];
+        int dst = getParentEdgeAt(0)->getShape().getStaticDims()[2 + i];
 
         krn = (krn - 1)*(dilation[i] + 1) + 1;
         int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
@@ -242,15 +243,15 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
         //  WA: if int8 deconvolution is supported, we create internal weights blob in IO format
         std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]);
         internalBlobs.push_back(createWeiBlobAsIO(weightDims));
-        auto format = getParentEdgeAt(0)->getDims().ndims() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc;
-        MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, format);
-        MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, format);
-        createDescriptor({in_candidate}, {out_candidate});
+        auto format = getParentEdgeAt(0)->getShape().getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc;
+        MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format);
+        MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format);
+        createDescriptor({&in_candidate}, {&out_candidate});
     } else {
-        for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) {
-            MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, format);
-            MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, format);
-            createDescriptor({in_candidate}, {out_candidate});
+        for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) {
+            MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format);
+            MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format);
+            createDescriptor({&in_candidate}, {&out_candidate});
         }
     }
     setPostOps(attr);
@@ -346,10 +347,10 @@ void MKLDNNDeconvolutionNode::createPrimitive() {
     }
 }
 
-void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
-                                               const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
-    MKLDNNMemoryDesc in_candidate(inputDesc[0]);
-    MKLDNNMemoryDesc out_candidate(outputDesc[0]);
+void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<const MemoryDesc*> &inputDesc,
+                                               const std::vector<const MemoryDesc*> &outputDesc) {
+    const MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]);
+    const MKLDNNMemoryDesc out_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]);
 
     // grouping and autoblicking is not compatible
     if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
@@ -361,7 +362,7 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine
 
     if (isInt8) {
         MKLDNNDims weightsDims = MKLDNNDims(weightDims);
-        MKLDNNMemoryDesc wgh_candidate{weightsDims, memory::data_type::s8, memory::format_tag::any};
+        mkldnn::memory::desc wgh_candidate(weightsDims, memory::data_type::s8, memory::format_tag::any);
         std::shared_ptr<mkldnn::deconvolution_forward::desc> deconv_desc;
         deconv_desc.reset(new deconvolution_forward::desc(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
                                                           in_candidate, wgh_candidate, out_candidate,
@@ -370,7 +371,7 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine
         descs.emplace_back(deconv_desc);
     } else {
         MKLDNNDims weightsDims = MKLDNNDims(weightDims);
-        MKLDNNMemoryDesc wgh_candidate{weightsDims, in_candidate.getDataType(), memory::format_tag::any};
+        mkldnn::memory::desc wgh_candidate(weightsDims, in_candidate.getDataType(), memory::format_tag::any);
         for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) {
             std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
             conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg,
@@ -399,52 +400,21 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine
     }
 }
 
-MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
+std::unique_ptr<MKLDNNMemoryDesc> MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
     if (idx == 2) {
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(getOriginalInputPrecisionAtPort(2),
-                                                            getParentEdgeAt(2)->getDims().ToSizeVector(),
-                                                            TensorDesc::getLayoutByDims(getParentEdgeAt(2)->getDims().ToSizeVector())));
+        auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(2));
+        return MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(), dataType,
+                                             MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(2)->getShape().getRank()));
     }
 
-    InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1))
+    MKLDNNMemoryDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1))
             : isInt8 ? MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)) : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_desc(idx));
-
-    if (desc.getLayout() == InferenceEngine::Layout::ANY) {
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getLayout()));
-    } else {
-        if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(),
-                                                                                       desc.getBlockingDesc().getOrder().end()) + 1) {
-            auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector();
-            auto new_dims = weightDims;
-
-            auto td = InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                  new_dims,
-                                                  desc.getBlockingDesc());
-            if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) {
-                td.setLayout(BLOCKED);
-            }
-            return MKLDNNMemoryDesc(td);
-        } else {
-            return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                                getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                                desc.getBlockingDesc()));
-        }
-    }
+    return MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(std::move(desc));
 }
 
-MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
-    InferenceEngine::TensorDesc desc = isInt8 ? MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx))
-            : MKLDNNMemoryDesc(primitive_desc_it.diff_src_desc(idx));
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getLayout()));
-    else
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getBlockingDesc()));
+std::unique_ptr<MKLDNNMemoryDesc> MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
+    return isInt8 ? MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(primitive_desc_it.dst_desc(idx)) :
+            MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(primitive_desc_it.diff_src_desc(idx));
 }
 
 InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const {
@@ -458,7 +428,7 @@ InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const
         }
     }
 
-    return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions);
+    return getMaxPrecision(inputPrecisions);
 }
 
 REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
index f3f47c83a9f..15ee71d6af7 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
@@ -17,8 +17,8 @@ public:
     MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
 
     void getSupportedDescriptors() override;
-    void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
-                          const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
+    void createDescriptor(const std::vector<const MemoryDesc*>& inputDesc,
+                          const std::vector<const MemoryDesc*>& outputDesc) override;
     void createPrimitive() override;
     void filterSupportedPrimitiveDescriptors() override;
     void filterSupportedDescriptors();
@@ -31,8 +31,8 @@ public:
         return static_cast<size_t>(getParentEdges().size());
     }
 
-    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
-    MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
+    std::unique_ptr<MKLDNNMemoryDesc> getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
+    std::unique_ptr<MKLDNNMemoryDesc> getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
 
     InferenceEngine::Precision getRuntimePrecision() const override;
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp
index a2fae182a52..4d29550eda0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp
@@ -785,20 +785,20 @@ void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << errorPrefix << "has incorrect number of output edges";
 
-    if (getParentEdgeAt(0)->getDims().ndims() != 4) {
+    if (getParentEdgeAt(0)->getShape().getRank() != 4) {
         IE_THROW() << "Deformable convolution layer. Unsupported mode. Only 4D blobs are supported as input.";
     }
 
-    if (getParentEdgeAt(1)->getDims().ndims() != 4) {
-        IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims();
+    if (getParentEdgeAt(1)->getShape().getRank() != 4) {
+        IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank();
     }
 
-    if (getParentEdgeAt(2)->getDims().ndims() != 4) {
-        IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getDims().ndims();
+    if (getParentEdgeAt(2)->getShape().getRank() != 4) {
+        IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getShape().getRank();
     }
 
-    if (getChildEdgeAt(0)->getDims().ndims() != 4) {
-        IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims();
+    if (getChildEdgeAt(0)->getShape().getRank() != 4) {
+        IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank();
     }
 }
 
@@ -806,7 +806,7 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     config.inConfs.resize(3);
     config.inConfs[0].constant = false;
@@ -838,20 +838,26 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() {
         auto weiFormat = group > 1 ? mayiuse(avx512_common) ? memory::format_tag::gOIhw16i16o : memory::format_tag::gOIhw8i8o
                                    : mayiuse(avx512_common) ? memory::format_tag::OIhw16i16o : memory::format_tag::OIhw8i8o;
 
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::f32, dataFormat);
-        config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::f32, offFormat);
-        config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(2)->getDims(), memory::data_type::f32, weiFormat);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), memory::data_type::f32, dataFormat);
-        supportedPrimitiveDescriptors.push_back({config, impl_type, dataFormat});
+        config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(),
+                                                                              memory::data_type::f32, dataFormat);
+        config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(1)->getShape().getStaticDims(),
+                                                                              memory::data_type::f32, offFormat);
+        config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(),
+                                                                              memory::data_type::f32, weiFormat);
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(),
+                                                                              memory::data_type::f32, dataFormat);
+        supportedPrimitiveDescriptors.push_back({config, impl_type});
     } else {
         // reference implementation
-        auto weiFormat = group > 1 ? memory::format_tag::goihw : memory::format_tag::oihw;
-
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), memory::data_type::f32, memory::format_tag::nchw);
-        config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::f32, memory::format_tag::nchw);
-        config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(2)->getDims(), memory::data_type::f32, memory::format_tag::oihw);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), memory::data_type::f32, memory::format_tag::nchw);
-        supportedPrimitiveDescriptors.push_back({config, impl_type, weiFormat});
+        config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32,
+                                                               memory::format_tag::nchw);
+        config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32,
+                                                               memory::format_tag::nchw);
+        config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::f32,
+                                                               memory::format_tag::oihw);
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32,
+                                                                memory::format_tag::nchw);
+        supportedPrimitiveDescriptors.push_back({config, impl_type});
     }
 }
 
@@ -861,9 +867,9 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() {
         IE_THROW() << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors.";
     auto config = selectedPrimitiveDescriptor->getConfig();
 
-    auto srcDims = config.inConfs[0].desc.getDims();
-    auto weiDims = config.inConfs[2].desc.getDims();
-    auto dstDims = config.outConfs[0].desc.getDims();
+    auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims();
+    auto weiDims = getParentEdgeAt(2)->getShape().getStaticDims();
+    auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims();
 
     jcp.dg = deformable_group;
 
@@ -1062,25 +1068,20 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) {
     const auto *weights = reinterpret_cast<const float *>(srcMemory2.GetPtr());
     float *dst = reinterpret_cast<float *>(dstMemory.GetPtr());
 
-    auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
-    if (!selectedPrimitiveDescriptor)
-        IE_THROW() << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors.";
-    auto config = selectedPrimitiveDescriptor->getConfig();
-
-    auto src_block_desc = config.inConfs[0].desc.getBlockingDesc();
+    auto src_block_desc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
     std::vector<size_t> src_strides(src_block_desc.getStrides().size());
     for (int i = 0; i < src_strides.size(); i++) {
         src_strides[src_block_desc.getOrder()[i]] = src_block_desc.getStrides()[i];
     }
 
-    auto dst_block_desc = config.outConfs[0].desc.getBlockingDesc();
+    auto dst_block_desc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
     std::vector<size_t> dst_strides(dst_block_desc.getStrides().size());
     for (int i = 0; i < dst_strides.size(); i++) {
         dst_strides[dst_block_desc.getOrder()[i]] = dst_block_desc.getStrides()[i];
     }
 
-    auto off_strides = config.inConfs[1].desc.getBlockingDesc().getStrides();
-    auto wei_strides = config.inConfs[2].desc.getBlockingDesc().getStrides();
+    auto off_strides =  getParentEdgeAt(1)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
+    auto wei_strides =  getParentEdgeAt(2)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
 
     if (def_conv_kernel) {
         executeOptimized(src, offsets, weights, dst, src_strides, off_strides, dst_strides);
@@ -1094,7 +1095,7 @@ bool MKLDNNDeformableConvolutionNode::created() const {
 }
 
 InferenceEngine::Precision MKLDNNDeformableConvolutionNode::getRuntimePrecision() const {
-    return MKLDNNExtensionUtils::getMaxPrecision(getInputPrecisions());
+    return getMaxPrecision(getInputPrecisions());
 }
 
 REG_MKLDNN_PRIM_FOR(MKLDNNDeformableConvolutionNode, DeformableConvolution);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
index 38bebcd5271..a117d3acbdc 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp
@@ -6,7 +6,7 @@
 
 #include <cpu/x64/jit_generator.hpp>
 #include <mkldnn_extension_utils.h>
-#include "common/tensor_desc_creator.h"
+#include "common/blocked_desc_creator.h"
 #include <utils/general_utils.h>
 #include <ngraph/opsets/opset1.hpp>
 
@@ -58,7 +58,7 @@ MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Nod
         if (blockSize == 0)
             THROW_ERROR << "has incorrect block_size parameter is zero!";
 
-        size_t nSpatialDims = inDims[0].ndims() - 2;
+        size_t nSpatialDims = inputShapes[0].getRank() - 2;
         blockStep = static_cast<size_t>(std::pow(blockSize, nSpatialDims));
     } else {
         IE_THROW(NotImplemented) << errorMessage;
@@ -66,13 +66,13 @@ MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr<ngraph::Nod
 }
 
 void MKLDNNDepthToSpaceNode::getSupportedDescriptors() {
-    SizeVector srcDims = inDims[0].ToSizeVector();
+    SizeVector srcDims = inputShapes[0].getStaticDims();
     if (srcDims.size() < 3)
         THROW_ERROR << "has incorrect number of input dimensions";
     if (srcDims.size() > 5)
         THROW_ERROR << "doesn't support dimensions with rank greater than 5";
 
-    SizeVector dstDims = outDims[0].ToSizeVector();
+    SizeVector dstDims = outputShapes[0].getStaticDims();
     if (srcDims.size() != dstDims.size())
         THROW_ERROR << "has incorrect number of input/output dimensions";
 
@@ -99,8 +99,8 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
         return;
 
     InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
-    auto srcDims = getParentEdgeAt(0)->getDims();
-    const size_t nDims = srcDims.ndims();
+    auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims();
+    const size_t nDims = srcDims.size();
 
     impl_desc_type impl_type;
     if (mayiuse(impl::cpu::x64::avx512_common)) {
@@ -113,7 +113,7 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
         impl_type = impl_desc_type::ref;
     }
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = true;
     config.inConfs.resize(1);
     config.outConfs.resize(1);
@@ -122,27 +122,27 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() {
     config.outConfs[0].inPlace = -1;
     config.outConfs[0].constant = false;
 
-    std::vector<TensorDescCreatorTypes> supportedTypes;
+    std::vector<LayoutType> supportedTypes;
     if (nDims > 2) {
         auto canUseBlocked = [=](const size_t block) {
             return srcDims[1] % block == 0 && (srcDims[1] / block) % blockStep == 0 &&
                    (mode == Mode::DEPTH_FIRST ? block % blockStep == 0 : true);
         };
 
-        supportedTypes.push_back(TensorDescCreatorTypes::nspc);
+        supportedTypes.push_back(LayoutType::nspc);
         if (canUseBlocked(8lu))
-            supportedTypes.push_back(TensorDescCreatorTypes::nCsp8c);
+            supportedTypes.push_back(LayoutType::nCsp8c);
         if (canUseBlocked(16lu))
-            supportedTypes.push_back(TensorDescCreatorTypes::nCsp16c);
+            supportedTypes.push_back(LayoutType::nCsp16c);
     }
-    supportedTypes.push_back(TensorDescCreatorTypes::ncsp);
-    auto creators = TensorDescCreator::getCommonCreators();
-    auto range = TensorDescCreator::makeFilteredRange(creators, nDims, supportedTypes);
+    supportedTypes.push_back(LayoutType::ncsp);
+    auto creators = BlockedDescCreator::getCommonCreators();
+    auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes);
 
     for (auto itr = range.first; itr != range.second; ++itr) {
-        config.inConfs[0].desc = itr->second->createDesc(precision, getParentEdgeAt(0)->getDims().ToSizeVector());
-        config.outConfs[0].desc = itr->second->createDesc(precision, getChildEdgeAt(0)->getDims().ToSizeVector());
-        supportedPrimitiveDescriptors.emplace_back(config, impl_type, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
+        config.inConfs[0].desc = itr->second->createUniqueDesc(precision, getParentEdgeAt(0)->getShape().getStaticDims());
+        config.outConfs[0].desc = itr->second->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims());
+        supportedPrimitiveDescriptors.emplace_back(config, impl_type);
     }
 }
 
@@ -156,18 +156,19 @@ void MKLDNNDepthToSpaceNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         THROW_ERROR << "has unidentified preferable primitive descriptor";
 
-    SizeVector srcDims = getParentEdgeAt(0)->getBlob()->getTensorDesc().getDims();
-    SizeVector dstDims = getChildEdgeAt(0)->getBlob()->getTensorDesc().getDims();
+    SizeVector srcDims = getParentEdgeAt(0)->getShape().getStaticDims();
+    SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims();
 
     size_t nDims = srcDims.size();
     const size_t nSpatialDims = nDims - 2;
-    const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat();
+    const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) ||
+                           getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c);
     const size_t reshapedRank = nDims + nSpatialDims + static_cast<int>(isBlocked) + static_cast<int>(isBlocked && mode == Mode::DEPTH_FIRST);
     const size_t lastIdx = reshapedRank - 1;
     size_t firstSpatialOrder = 2;
 
     PermuteParams params;
-    params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size();
+    params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size();
     params.order.resize(reshapedRank, 0);
     params.src_block_order.resize(reshapedRank);
     params.dst_block_order.resize(reshapedRank);
@@ -193,8 +194,8 @@ void MKLDNNDepthToSpaceNode::createPrimitive() {
     };
 
     if (isBlocked) {
-        SizeVector srcBlockedDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims();
-        SizeVector dstBlockedDims = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims();
+        SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>().getBlockDims();
+        SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>().getBlockDims();
 
         size_t orderShiftForBlocks, orderShiftForDims;
         if (mode == Mode::BLOCKS_FIRST) {
@@ -223,7 +224,7 @@ void MKLDNNDepthToSpaceNode::createPrimitive() {
         }
 
         reshapeAndSetPermOrder(orderShiftForDims, orderShiftForBlocks, firstSpatialOrder, srcBlockedDims);
-    } else if (getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) {
+    } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) {
         srcDims.push_back(srcDims[1]);
         dstDims.push_back(dstDims[1]);
         srcDims.erase(srcDims.begin() + 1);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp
index 0a2f4fc8140..1d44dd3f747 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp
@@ -1,7 +1,6 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "base.hpp"
 
 #include <string>
 #include <vector>
@@ -116,13 +115,13 @@ void MKLDNNDetectionOutputNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    std::vector<DataConfigurator> inDataConf;
+    std::vector<PortConfigurator> inDataConf;
     inDataConf.reserve(getOriginalInputsNumber());
     for (int i = 0; i < getOriginalInputsNumber(); ++i)
-        inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32);
+        inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32);
 
     addSupportedPrimDesc(inDataConf,
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
@@ -132,12 +131,12 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) {
     const float *loc_data    = reinterpret_cast<const float *>(getParentEdgeAt(idx_location)->getMemoryPtr()->GetPtr());
     const float *conf_data   = reinterpret_cast<const float *>(getParentEdgeAt(idx_confidence)->getMemoryPtr()->GetPtr());
     const float *prior_data  = reinterpret_cast<const float *>(getParentEdgeAt(idx_priors)->getMemoryPtr()->GetPtr());
-    const float *arm_conf_data = inDims.size() > 3 ?
+    const float *arm_conf_data = inputShapes.size() > 3 ?
             reinterpret_cast<const float *>(getParentEdgeAt(idx_arm_confidence)->getMemoryPtr()->GetPtr()) : nullptr;
-    const float *arm_loc_data = inDims.size() > 4 ?
+    const float *arm_loc_data = inputShapes.size() > 4 ?
             reinterpret_cast<const float *>(getParentEdgeAt(idx_arm_location)->getMemoryPtr()->GetPtr()) : nullptr;
 
-    const int N = getParentEdgeAt(idx_confidence)->getDims()[0];
+    const int N = getParentEdgeAt(idx_confidence)->getShape().getStaticDims()[0];
 
     float *decoded_bboxes_data = _decoded_bboxes.data();
     float *reordered_conf_data = _reordered_conf.data();
@@ -286,8 +285,8 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) {
         }
     }
 
-    const int num_results = getChildEdgesAtPort(0)[0]->getDims()[2];
-    const int DETECTION_SIZE = getChildEdgesAtPort(0)[0]->getDims()[3];
+    const int num_results = getChildEdgesAtPort(0)[0]->getShape().getStaticDims()[2];
+    const int DETECTION_SIZE = getChildEdgesAtPort(0)[0]->getShape().getStaticDims()[3];
     if (DETECTION_SIZE != 7) {
         IE_THROW() << NOT_IMPLEMENTED;
     }
@@ -300,7 +299,7 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) {
     else
         dst_data_size = N * _num_classes * _num_priors * DETECTION_SIZE * sizeof(float);
 
-    if (dst_data_size > getChildEdgesAtPort(0)[0]->getBlob()->byteSize()) {
+    if (dst_data_size > getChildEdgesAtPort(0)[0]->getMemory().GetSize()) {
         IE_THROW() << OUT_OF_BOUNDS;
     }
     memset(dst_data, 0, dst_data_size);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp
index b9ef511d010..1796d49989e 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp
@@ -48,20 +48,20 @@ MKLDNNDFTNode::MKLDNNDFTNode(const std::shared_ptr<ngraph::Node>& op, const mkld
     }
 
     /* Data */
-    inputShape = inDims[DATA_INDEX].ToSizeVector();
+    inputShape = inputShapes[DATA_INDEX].getStaticDims();
     if (inputShape.size() < 2) {
         IE_THROW() << layerErrorPrefix << " has invalid 'data' input tensor with rank: " << inputShape.size();
     }
 
     /* Axes */
-    const auto axesRank = inDims[AXES_INDEX].ndims();
+    const auto axesRank = inputShapes[AXES_INDEX].getRank();
     if (axesRank != 1) {
         IE_THROW() << layerErrorPrefix << " has invalid 'axes' input tensor with rank: " << axesRank;
     }
 
     /* Signal size */
     if (inputsNumber > SIGNAL_SIZE_INDEX) {
-        const auto signalSizeRank = inDims[SIGNAL_SIZE_INDEX].ndims();
+        const auto signalSizeRank = inputShapes[SIGNAL_SIZE_INDEX].getRank();
         if (signalSizeRank != 1) {
             IE_THROW() << layerErrorPrefix << " has invalid 'signal_size' input tensor with rank: " << signalSizeRank;
         }
@@ -93,12 +93,12 @@ void MKLDNNDFTNode::initSupportedPrimitiveDescriptors() {
         }
     }
 
-    std::vector<DataConfigurator> inDataConfigurators({{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                                                       {TensorDescCreatorTypes::ncsp, Precision::I32}});
+    std::vector<PortConfigurator> inDataConfigurators({{LayoutType::ncsp, Precision::FP32},
+                                                       {LayoutType::ncsp, Precision::I32}});
     if (getOriginalInputsNumber() > SIGNAL_SIZE_INDEX)
-        inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp,  Precision::I32});
+        inDataConfigurators.push_back({LayoutType::ncsp,  Precision::I32});
 
-    addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, Precision::FP32}}, impl_desc_type::ref_any);
+    addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any);
 }
 
 namespace {
@@ -225,7 +225,7 @@ void copyDataToOutputWithSignalSize(const float* input, const std::vector<size_t
 void MKLDNNDFTNode::execute(mkldnn::stream strm) {
     auto axesEdge = getParentEdgeAt(AXES_INDEX);
     const auto* axesStartPtr = reinterpret_cast<const int32_t*>(axesEdge->getMemoryPtr()->GetPtr());
-    axes = std::vector<int32_t>(axesStartPtr, axesStartPtr + axesEdge->getDims()[0]);
+    axes = std::vector<int32_t>(axesStartPtr, axesStartPtr + axesEdge->getShape().getStaticDims()[0]);
     for (auto& axis : axes) {
         if (axis < 0) {
             axis += inputShape.size() - 1;
@@ -233,7 +233,7 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) {
     }
     std::sort(axes.begin(), axes.end());
 
-    outputShape = getChildEdgeAt(0)->getDims().ToSizeVector();
+    outputShape = getChildEdgeAt(0)->getShape().getStaticDims();
     for (size_t axis : axes) {
         size_t nComplex = outputShape[axis];
         // FFT uses different twiddle factors
@@ -247,8 +247,8 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) {
     const auto *input = reinterpret_cast<const float*>(inputDataEdge->getMemoryPtr()->GetPtr());
     auto *output = reinterpret_cast<float*>(outputDataEdge->getMemoryPtr()->GetPtr());
 
-    auto inputStrides = inputDataEdge->getDesc().getBlockingDesc().getStrides();
-    auto outputStrides = outputDataEdge->getDesc().getBlockingDesc().getStrides();
+    auto inputStrides = inputDataEdge->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
+    auto outputStrides = outputDataEdge->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
     if (inputShape != outputShape) {
         copyDataToOutputWithSignalSize(input, inputShape, inputStrides, output, outputShape, outputStrides);
     } else {
@@ -257,7 +257,7 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) {
     }
 
     // 1d case
-    if (inputDataEdge->getDesc().getDims().size() == 2) {
+    if (inputDataEdge->getShape().getRank() == 2) {
         size_t nComplex = outputShape[0];
         if (IsPowerOfTwo(nComplex)) {
             fft(output, nComplex * 2, true);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
index 9dd250d7b96..d777e22210f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
@@ -125,11 +125,11 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu
             if (eltwiseNode.getFusedWith()[i].get()->getType() == Eltwise) {
                 post_op_emitters.push_back(create_eltwise_emitter(*eltwiseNode.getFusedWith()[i].get(), exec_prc));
             } else if (eltwiseNode.getFusedWith()[i].get()->getType() == FakeQuantize) {
-                auto fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode*>(eltwiseNode.getFusedWith()[i].get());
-                fakeQuantizeNode->appendPostOps(post_ops);
+               auto fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode*>(eltwiseNode.getFusedWith()[i].get());
+               fakeQuantizeNode->appendPostOps(post_ops);
 
-                quantization_injectors.push_back(std::make_shared<jit_uni_quantization_injector_f32<isa>>(
-                        this, post_ops.get()->entry_[post_ops.len() - 1], vmm_d_weights, vmm_d_bias, reg_d_weights, reg_d_bias));
+               quantization_injectors.push_back(std::make_shared<jit_uni_quantization_injector_f32<isa>>(
+                       this, post_ops.get()->entry_[post_ops.len() - 1], vmm_d_weights, vmm_d_bias, reg_d_weights, reg_d_bias));
             }
         }
 
@@ -965,9 +965,9 @@ size_t MKLDNNEltwiseNode::getOpInputsNum() const {
 }
 
 bool MKLDNNEltwiseNode::isWithBroadcast() {
-    auto oDims = outDims[0].ToSizeVector();
-    for (size_t i = 0; i < inDims.size(); i++) {
-        auto iDims = inDims[i].ToSizeVector();
+    auto oDims = outputShapes[0].getStaticDims();
+    for (size_t i = 0; i < inputShapes.size(); i++) {
+        auto iDims = inputShapes[i].getStaticDims();
         if (iDims != oDims)
             return true;
     }
@@ -1080,10 +1080,10 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
         Blocked
     };
 
-    auto initDesc = [&] (LayoutType lt) -> PrimitiveDescInfo {
-        auto createMemoryDesc = [lt](MKLDNNEdgePtr edge, Precision prc, size_t offset) -> TensorDesc {
-            if (lt == ChannelsFirst && edge->getDims().ndims() != 1) {
-                auto dims = edge->getDims().ToSizeVector();
+    auto initDesc = [&] (LayoutType lt) -> NodeDesc {
+        auto createMemoryDesc = [lt](MKLDNNEdgePtr edge, Precision prc, size_t offset) -> std::unique_ptr<BlockedMemoryDesc> {
+            if (lt == ChannelsFirst && edge->getShape().getRank() != 1) {
+                auto dims = edge->getShape().getStaticDims();
                 auto ndims = dims.size();
                 std::vector<size_t> order(ndims);
                 std::iota(order.begin(), order.end(), 0);
@@ -1097,11 +1097,11 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
                     blocks[i] = dims[order[i]];
                 }
 
-                return TensorDesc(prc, edge->getDims().ToSizeVector(), {blocks, order, offset});
-            } else if (lt == Blocked && edge->getDims().ndims() != 1 && edge->getDims()[1] != 1) {
+                return MKLDNNPlugin::make_unique<BlockedMemoryDesc>(prc, edge->getShape().getStaticDims(), blocks, order, offset);
+            } else if (lt == Blocked && edge->getShape().getRank() != 1 && edge->getShape().getStaticDims()[1] != 1) {
                 size_t blockSize = mayiuse(x64::avx512_common) ? 16 : 8;
 
-                std::vector<size_t> blocks = edge->getDims().ToSizeVector();
+                std::vector<size_t> blocks = edge->getShape().getStaticDims();
                 std::vector<size_t> order(blocks.size());
                 std::iota(order.begin(), order.end(), 0);
 
@@ -1109,37 +1109,38 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
                 blocks.push_back(blockSize);
                 order.push_back(1);
 
-                return TensorDesc(prc, edge->getDims().ToSizeVector(), {blocks, order, offset});
+                return MKLDNNPlugin::make_unique<BlockedMemoryDesc>(prc, edge->getShape().getStaticDims(), blocks, order, offset);
             } else {
-                std::vector<size_t> blocks = edge->getDims().ToSizeVector();
+                std::vector<size_t> blocks = edge->getShape().getStaticDims();
                 std::vector<size_t> order(blocks.size());
                 std::iota(order.begin(), order.end(), 0);
 
-                return TensorDesc(prc, edge->getDims().ToSizeVector(), {blocks, order, offset});
+                return MKLDNNPlugin::make_unique<BlockedMemoryDesc>(prc, edge->getShape().getStaticDims(), blocks, order, offset);
             }
         };
 
         size_t offset = std::numeric_limits<size_t>::max();
-        InferenceEngine::LayerConfig config;
-        config.dynBatchSupport = getChildEdgeAt(0)->getDims().ndims() > 1 && getChildEdgeAt(0)->getDims() == getParentEdgeAt(0)->getDims();
+        NodeConfig config;
+        config.dynBatchSupport = getChildEdgeAt(0)->getShape().getRank() > 1 && getChildEdgeAt(0)->getShape() ==
+                                                                                getParentEdgeAt(0)->getShape();
 
         for (size_t i = 0; i < getParentEdges().size(); i++) {
-            InferenceEngine::DataConfig dataConfig;
-            dataConfig.inPlace = (!i && canBeInPlace() && inputPrecisions[i] == outputPrecision) ? 0 : -1;
-            dataConfig.constant = false;
+            PortConfig portConfig;
+            portConfig.inPlace = (!i && canBeInPlace() && inputPrecisions[i] == outputPrecision) ? 0 : -1;
+            portConfig.constant = false;
 
-            dataConfig.desc = createMemoryDesc(getParentEdgeAt(i), inputPrecisions[i], offset);
+            portConfig.desc = createMemoryDesc(getParentEdgeAt(i), inputPrecisions[i], offset);
 
-            config.inConfs.push_back(dataConfig);
+            config.inConfs.push_back(portConfig);
         }
 
-        InferenceEngine::DataConfig dataConfig;
-        dataConfig.inPlace = -1;
-        dataConfig.constant = false;
+        PortConfig portConfig;
+        portConfig.inPlace = -1;
+        portConfig.constant = false;
 
-        dataConfig.desc = createMemoryDesc(getChildEdgeAt(0), outputPrecision, offset);
+        portConfig.desc = createMemoryDesc(getChildEdgeAt(0), outputPrecision, offset);
 
-        config.outConfs.push_back(dataConfig);
+        config.outConfs.push_back(portConfig);
 
         impl_desc_type impl_type;
         if (mayiuse(x64::avx512_common)) {
@@ -1155,18 +1156,20 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
         return {config, impl_type};
     };
 
-    bool isChannelsFirstApplicable = one_of(getChildEdgeAt(0)->getDims().ndims(), 1, 2, 4, 5);
+    bool isChannelsFirstApplicable = one_of(getChildEdgeAt(0)->getShape().getRank(), 1, 2, 4, 5);
     for (size_t i = 0; i < getParentEdges().size(); i++) {
-        isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getParentEdgeAt(i)->getDims().ndims(), 1, 2, 4, 5);
-        isChannelsFirstApplicable = isChannelsFirstApplicable && implication(getParentEdgeAt(i)->getDims().ndims() != 1,
-                                                                             getChildEdgeAt(0)->getDims().ndims() == getParentEdgeAt(i)->getDims().ndims());
+        isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getParentEdgeAt(i)->getShape().getRank(), 1, 2, 4, 5);
+        isChannelsFirstApplicable = isChannelsFirstApplicable && implication(getParentEdgeAt(i)->getShape().getRank() != 1,
+                                                                             getChildEdgeAt(0)->getShape().getRank() ==
+                                                                                     getParentEdgeAt(i)->getShape().getRank());
     }
 
-    bool isBlockedApplicable = one_of(getChildEdgeAt(0)->getDims().ndims(), 1, 4, 5);
+    bool isBlockedApplicable = one_of(getChildEdgeAt(0)->getShape().getRank(), 1, 4, 5);
     for (size_t i = 0; i < getParentEdges().size(); i++) {
-        isBlockedApplicable = isBlockedApplicable && one_of(getParentEdgeAt(i)->getDims().ndims(), 1, 4, 5);
-        isBlockedApplicable = isBlockedApplicable && implication(getParentEdgeAt(i)->getDims().ndims() != 1,
-                                                                 getChildEdgeAt(0)->getDims().ndims() == getParentEdgeAt(i)->getDims().ndims());
+        isBlockedApplicable = isBlockedApplicable && one_of(getParentEdgeAt(i)->getShape().getRank(), 1, 4, 5);
+        isBlockedApplicable = isBlockedApplicable && implication(getParentEdgeAt(i)->getShape().getRank() != 1,
+                                                                 getChildEdgeAt(0)->getShape().getRank() ==
+                                                                 getParentEdgeAt(i)->getShape().getRank());
     }
 
     if (isChannelsFirstApplicable)
@@ -1177,9 +1180,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
 }
 
 void MKLDNNEltwiseNode::createPrimitive() {
-    auto config = getSelectedPrimitiveDescriptor()->getConfig();
-
-    auto initDims = [this, config](size_t maxInputSize) {
+    auto initDims = [this](size_t maxInputSize) {
         size_t inputNum = getParentEdges().size();
 
         dims_in.resize(inputNum);
@@ -1189,8 +1190,9 @@ void MKLDNNEltwiseNode::createPrimitive() {
 
         dims_out.resize(maxInputSize, 1);
 
+        auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
         std::vector<size_t> order(maxInputSize);
-        auto outOrder = config.outConfs[0].desc.getBlockingDesc().getOrder();
+        auto outOrder = outBlockingDesc.getOrder();
         for (size_t i = 0; i < order.size(); i++) {
             if (i < order.size() - outOrder.size())
                 order[i] = i;
@@ -1198,17 +1200,18 @@ void MKLDNNEltwiseNode::createPrimitive() {
                 order[i] = outOrder[i - (order.size() - outOrder.size())] + (order.size() - outOrder.size());
         }
 
-        size_t outRank = config.outConfs[0].desc.getBlockingDesc().getBlockDims().size();
+        size_t outRank = outBlockingDesc.getBlockDims().size();
         for (int i = 0; i < outRank; i++) {
-            dims_out[dims_out.size() - 1 - i] = config.outConfs[0].desc.getBlockingDesc().getBlockDims()[outRank - 1 - i];
+            dims_out[dims_out.size() - 1 - i] = outBlockingDesc.getBlockDims()[outRank - 1 - i];
         }
 
         for (int i = 0; i < inputNum; i++) {
-            size_t inRank = config.inConfs[i].desc.getBlockingDesc().getBlockDims().size();
+            auto inBlockingDesc = getParentEdgeAt(i)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+            size_t inRank = inBlockingDesc.getBlockDims().size();
 
             // WA to normalize blocked and planar layouts
-            auto inOrder = config.inConfs[i].desc.getBlockingDesc().getOrder();
-            size_t startOff = outOrder.size() != config.outConfs[0].desc.getDims().size() &&
+            auto inOrder = inBlockingDesc.getOrder();
+            size_t startOff = outOrder.size() != outBlockingDesc.getShape().getRank() &&
                               outOrder[outOrder.size() - 1] != inOrder[inOrder.size() - 1] ? 1 : 0;
 
             // WA to handle nspc layout with 1D tensors
@@ -1217,7 +1220,7 @@ void MKLDNNEltwiseNode::createPrimitive() {
             }
 
             for (int j = 0; j < inRank; j++) {
-                dims_in[i][dims_in[i].size() - 1 - j - startOff] = config.inConfs[i].desc.getBlockingDesc().getBlockDims()[inRank - 1 - j];
+                dims_in[i][dims_in[i].size() - 1 - j - startOff] = inBlockingDesc.getBlockDims()[inRank - 1 - j];
             }
         }
 
@@ -1229,13 +1232,13 @@ void MKLDNNEltwiseNode::createPrimitive() {
         }
     };
 
-    auto initOffsets = [this, config](size_t maxInputSize) {
+    auto initOffsets = [this](size_t maxInputSize) {
         size_t inputNum = getParentEdges().size();
 
         offsets_out.resize(maxInputSize, 1);
         offset_out_calc(offsets_out, dims_out);
         for (int j = 0; j < maxInputSize; j++) {
-            offsets_out[j] *= config.outConfs[0].desc.getPrecision().size();
+            offsets_out[j] *= getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().size();
         }
 
         offsets_in.resize(inputNum);
@@ -1243,7 +1246,7 @@ void MKLDNNEltwiseNode::createPrimitive() {
             offsets_in[i].resize(maxInputSize, 1);
             offset_in_calc(offsets_in[i], dims_in[i], dims_out);
             for (int j = 0; j < maxInputSize; j++) {
-                offsets_in[i][j] *= config.inConfs[i].desc.getPrecision().size();
+                offsets_in[i][j] *= getParentEdgeAt(i)->getMemory().GetDesc().getPrecision().size();
             }
         }
 
@@ -1287,10 +1290,11 @@ void MKLDNNEltwiseNode::createPrimitive() {
         }
     };
 
-    tensorRank = std::max(static_cast<size_t>(optimalTensorRank), config.outConfs[0].desc.getBlockingDesc().getBlockDims().size());
+    auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    tensorRank = std::max(static_cast<size_t>(optimalTensorRank), outBlockingDesc.getBlockDims().size());
     initDims(tensorRank);
 
-    auto outOrder = config.outConfs[0].desc.getBlockingDesc().getOrder();
+    auto outOrder = outBlockingDesc.getOrder();
     size_t oc_size = 0;
     offsets_oc.resize(tensorRank, 0);
     if (isFusedWith(FakeQuantize)) {
@@ -1310,7 +1314,7 @@ void MKLDNNEltwiseNode::createPrimitive() {
         fullWorkAmount *= dims_out[i];
     }
 
-    isDynBatchEnabled = config.dynBatchSupport;
+    isDynBatchEnabled = getSelectedPrimitiveDescriptor()->getConfig().dynBatchSupport;
 
     size_t minimalConcurrency = parallel_get_max_threads();
     size_t minimalJitWorkAmount = 256;
@@ -1320,7 +1324,7 @@ void MKLDNNEltwiseNode::createPrimitive() {
         bool hasDifferentDims = false;
         while (currentJitWorkAmount < minimalJitWorkAmount && currentJitWorkAmount < fullWorkAmount &&
                // we shouldn't collapse batch dimension in case dynamic batch is enabled
-               (!isDynBatchEnabled || (config.outConfs[0].desc.getBlockingDesc().getBlockDims().size() - collapsedDims > 2))) {
+               (!isDynBatchEnabled || (outBlockingDesc.getBlockDims().size() - collapsedDims > 2))) {
             if (dims_out.size() - collapsedDims - 2 < 0)
                 break;
 
@@ -1372,22 +1376,24 @@ void MKLDNNEltwiseNode::createPrimitive() {
         }
     }
 
-    batchDimIdx = tensorRank - config.outConfs[0].desc.getBlockingDesc().getBlockDims().size() + collapsedDims;
+    batchDimIdx = tensorRank - outBlockingDesc.getBlockDims().size() + collapsedDims;
     schedulerWorkAmount = fullWorkAmount / dims_out[dims_out.size() - 1];
 
     initOffsets(tensorRank);
 
-    jep.inputs_number = config.inConfs.size();
+    const size_t inpuPortsCount = getSelectedPrimitiveDescriptor()->getConfig().inConfs.size();
+
+    jep.inputs_number = inpuPortsCount;
     jep.input_size = tensorRank;
 
-    for (int i = 0; i < config.inConfs.size(); i++) {
+    for (int i = 0; i < inpuPortsCount; i++) {
         jep.src_size[i] = dims_in[i][dims_in[i].size() - 1];
-        jep.src_prc[i] = config.inConfs[i].desc.getPrecision();
+        jep.src_prc[i] = getParentEdgesAtPort(i).front()->getMemory().GetDesc().getPrecision();
     }
     jep.dst_size = dims_out[dims_out.size() - 1];
-    jep.dst_prc = config.outConfs[0].desc.getPrecision();
+    jep.dst_prc = getChildEdgesAtPort(0).front()->getMemory().GetDesc().getPrecision();
 
-    for (int i = 0; i < config.inConfs.size(); i++) {
+    for (int i = 0; i < inpuPortsCount; i++) {
         jep.src_offsets[i] = offsets_in[i];
     }
     jep.dst_offsets = offsets_out;
@@ -1415,13 +1421,13 @@ void MKLDNNEltwiseNode::initOptimalPrimitiveDescriptor() {
     if (selected_pd == nullptr)
         IE_THROW() << "Preferable primitive descriptor is not set.";
     auto config = selected_pd->getConfig();
-    if (!isInitConfig(config)) {
+    if (!isConfigDefined(config)) {
         for (size_t i = 0; i < config.inConfs.size(); i++) {
-            config.inConfs[i].desc = getConfiguredInputDesc(config, i);
+            config.inConfs[i].desc = std::move(getDefinedInputDesc(config, i));
         }
 
         for (size_t i = 0; i < config.outConfs.size(); i++) {
-            config.outConfs[i].desc = getConfiguredOutputDesc(config, i);
+            config.outConfs[i].desc = std::move(getDefinedOutputDesc(config, i));
         }
 
         initDescriptor(config);
@@ -1641,13 +1647,13 @@ bool MKLDNNEltwiseNode::canBeInPlace() const {
         }
     }
 
-    return getParentEdgesAtPort(0)[0].get()->getDims() == getChildEdgesAtPort(0)[0].get()->getDims();
+    return getParentEdgesAtPort(0)[0].get()->getShape() == getChildEdgesAtPort(0)[0].get()->getShape();
 }
 
 void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
     // Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API.
     specialConvolutionAddFusing = (parentNode->getType() == Convolution || parentNode->getType() == BinaryConvolution) && getAlgorithm() == EltwiseAdd &&
-            getParentEdgesAtPort(0)[0]->getDims().ToSizeVector() == getParentEdgesAtPort(1)[0]->getDims().ToSizeVector();
+            getParentEdgesAtPort(0)[0]->getShape() == getParentEdgesAtPort(1)[0]->getShape();
     if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
         fillScalesAndShifts(parentNode.get(), scales, shifts, 16);
     }
@@ -1770,7 +1776,7 @@ InferenceEngine::Precision MKLDNNEltwiseNode::getRuntimePrecision() const {
         }
     }
 
-    return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions);
+    return getMaxPrecision(inputPrecisions);
 }
 
 REG_MKLDNN_PRIM_FOR(MKLDNNEltwiseNode, Eltwise);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp
index f59b69b023d..4499e91dacb 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp
@@ -62,15 +62,15 @@ void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() {
             IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name();
     }
 
-    std::vector<DataConfigurator> inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision},
-                                                       {TensorDescCreatorTypes::ncsp, Precision::I32},
-                                                       {TensorDescCreatorTypes::ncsp, Precision::I32}});
+    std::vector<PortConfigurator> inDataConfigurators({{LayoutType::ncsp, inDataPrecision},
+                                                       {LayoutType::ncsp, Precision::I32},
+                                                       {LayoutType::ncsp, Precision::I32}});
     if (getOriginalInputsNumber() > DEFAULT_INDEX_IDX)
-        inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32});
+        inDataConfigurators.push_back({LayoutType::ncsp, Precision::I32});
     if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX)
-        inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision});
+        inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision});
 
-    addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
+    addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
 }
 
 void MKLDNNEmbeddingBagOffsetSumNode::initFromInputs() {
@@ -122,7 +122,8 @@ void MKLDNNEmbeddingBagOffsetSumNode::execute(mkldnn::stream strm) {
     if (_withWeights)
         weightsData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr());
 
-    MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc());
+    MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(),
+                                       getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims());
 }
 
 bool MKLDNNEmbeddingBagOffsetSumNode::created() const {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp
index 3318e1089fa..f185d085881 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp
@@ -58,12 +58,12 @@ void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() {
             IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name();
     }
 
-    std::vector<DataConfigurator> inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision},
-                                                       {TensorDescCreatorTypes::ncsp, Precision::I32}});
+    std::vector<PortConfigurator> inDataConfigurators({{LayoutType::ncsp, inDataPrecision},
+                                                       {LayoutType::ncsp, Precision::I32}});
     if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX)
-        inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision});
+        inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision});
 
-    addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
+    addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
 }
 
 void MKLDNNEmbeddingBagPackedSumNode::initFromInputs() {
@@ -89,7 +89,8 @@ void MKLDNNEmbeddingBagPackedSumNode::execute(mkldnn::stream strm) {
     if (_withWeights)
         weightsData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr());
 
-    MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc());
+    MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(),
+                                       getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims());
 }
 
 bool MKLDNNEmbeddingBagPackedSumNode::created() const {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp
index 8abeee76d76..853da79accf 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp
@@ -44,13 +44,12 @@ MKLDNNEmbeddingBagSumNode::MKLDNNEmbeddingBagSumNode(
 
 template<typename T>
 void MKLDNNEmbeddingBagSumNode::processData(const T* srcData, const T* weightsData, T* dstData,
-                                            const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc) {
+                                            const InferenceEngine::SizeVector& inDataDims, const InferenceEngine::SizeVector& outDataDims) {
     std::string msgPrefix = std::string("Node EmbeddingBagSum with name '") + _layerName + "' ";
 
     initFromInputs();
 
-    const auto& inDataDims = srcDesc.getDims();
-    const size_t outputBagsNum = dstDesc.getDims()[0];
+    const size_t outputBagsNum = outDataDims[0];
 
     auto threadBody = [&](const int ithr, const int nthr) {
         size_t start(0lu), end(0lu);
@@ -115,27 +114,27 @@ void MKLDNNEmbeddingBagSumNode::processData(const T* srcData, const T* weightsDa
     parallel_nt(0, threadBody);
 }
 
-void MKLDNNEmbeddingBagSumNode::execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData,
-                                        const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc) {
-    switch (srcDesc.getPrecision()) {
+void MKLDNNEmbeddingBagSumNode::execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, const InferenceEngine::Precision &srcPrc,
+                                        const InferenceEngine::SizeVector& inDims, const InferenceEngine::SizeVector& outDims) {
+    switch (srcPrc) {
         case Precision::FP32: {
             return processData<PrecisionTrait<Precision::FP32>::value_type>(reinterpret_cast<const float*>(srcData),
-                    reinterpret_cast<const float*>(weightsData), reinterpret_cast<float*>(dstData), srcDesc, dstDesc);
+                    reinterpret_cast<const float*>(weightsData), reinterpret_cast<float*>(dstData), inDims, outDims);
         }
         case Precision::I8: {
             return processData<PrecisionTrait<Precision::I8>::value_type>(reinterpret_cast<const int8_t*>(srcData),
-                    reinterpret_cast<const int8_t*>(weightsData), reinterpret_cast<int8_t*>(dstData), srcDesc, dstDesc);
+                    reinterpret_cast<const int8_t*>(weightsData), reinterpret_cast<int8_t*>(dstData), inDims, outDims);
         }
         case Precision::U8: {
-            return processData<PrecisionTrait<Precision::U8>::value_type>(srcData, weightsData, dstData, srcDesc, dstDesc);
+            return processData<PrecisionTrait<Precision::U8>::value_type>(srcData, weightsData, dstData, inDims, outDims);
         }
         case Precision::I32: {
             return processData<PrecisionTrait<Precision::I32>::value_type>(reinterpret_cast<const int32_t*>(srcData),
-                    reinterpret_cast<const int32_t*>(weightsData), reinterpret_cast<int32_t*>(dstData), srcDesc, dstDesc);
+                    reinterpret_cast<const int32_t*>(weightsData), reinterpret_cast<int32_t*>(dstData), inDims, outDims);
         }
         default: {
             IE_THROW() << "EmbeddingBagSum layer does not support precision '"
-                        + std::string(srcDesc.getPrecision().name()) + "'";
+                        + std::string(srcPrc.name()) + "'";
         }
     }
 }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h
index f3513501b5c..ef5e7ed9a2f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h
@@ -21,8 +21,8 @@ public:
             size_t perSampleWeightsIdx,
             size_t defaultIndexIdx);
 
-    void execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData,
-                 const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc);
+    void execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, const InferenceEngine::Precision &srcPrc,
+                 const InferenceEngine::SizeVector& inDims, const InferenceEngine::SizeVector& outDims);
 
     ~MKLDNNEmbeddingBagSumNode() = default;
 
@@ -37,7 +37,7 @@ protected:
 
     template<typename T>
     void processData(const T* srcData, const T* weightsData, T* dstData,
-                     const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc);
+                     const InferenceEngine::SizeVector& inDataDims, const InferenceEngine::SizeVector& outDataDims);
 
     const size_t EMB_TABLE_IDX = 0lu;
     const size_t INDICES_IDX;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp
index 82eae04dcc2..1cea74dc5fb 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp
@@ -62,21 +62,21 @@ void MKLDNNEmbeddingSegmentsSumNode::initSupportedPrimitiveDescriptors() {
             IE_THROW() << logPrefix << "has unsupported precision: " << inDataPrecision.name();
     }
 
-    std::vector<DataConfigurator> inDataConfigurators({{TensorDescCreatorTypes::ncsp, inDataPrecision},
-                                                       {TensorDescCreatorTypes::ncsp, Precision::I32},
-                                                       {TensorDescCreatorTypes::ncsp, Precision::I32},
-                                                       {TensorDescCreatorTypes::ncsp, Precision::I32}});
+    std::vector<PortConfigurator> inDataConfigurators({{LayoutType::ncsp, inDataPrecision},
+                                                       {LayoutType::ncsp, Precision::I32},
+                                                       {LayoutType::ncsp, Precision::I32},
+                                                       {LayoutType::ncsp, Precision::I32}});
     if (getOriginalInputsNumber() > DEFAULT_INDEX_IDX)
-        inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, Precision::I32});
+        inDataConfigurators.push_back({LayoutType::ncsp, Precision::I32});
     if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX)
-        inDataConfigurators.push_back({TensorDescCreatorTypes::ncsp, inDataPrecision});
+        inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision});
 
-    addSupportedPrimDesc(inDataConfigurators, {{TensorDescCreatorTypes::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
+    addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
 }
 
 void MKLDNNEmbeddingSegmentsSumNode::initFromInputs() {
     indices_ = reinterpret_cast<const int *>(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr());
-    indicesSize_ = getParentEdgeAt(INDICES_IDX)->getBlob()->size();
+    indicesSize_ = getParentEdgeAt(INDICES_IDX)->getShape().getElementsCount();
 
     segmentIds_ = reinterpret_cast<const int *>(getParentEdgeAt(SEGMENT_ID_IDX)->getMemoryPtr()->GetPtr());
 
@@ -124,7 +124,8 @@ void MKLDNNEmbeddingSegmentsSumNode::execute(mkldnn::stream strm) {
     if (_withWeights)
         weightsData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr());
 
-    MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getDesc(), getChildEdgeAt(0)->getDesc());
+    MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(),
+                                       getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims());
 }
 
 bool MKLDNNEmbeddingSegmentsSumNode::created() const {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp
index fe2362003f3..d04b80b0086 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp
@@ -1,7 +1,6 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "base.hpp"
 
 #include <string>
 #include <vector>
@@ -253,22 +252,22 @@ void MKLDNNExperimentalDetectronDetectionOutputNode::initSupportedPrimitiveDescr
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    std::vector<DataConfigurator> inDataConf;
+    std::vector<PortConfigurator> inDataConf;
     inDataConf.reserve(getOriginalInputsNumber());
     for (int i = 0; i < getOriginalInputsNumber(); ++i)
-        inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32);
+        inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32);
 
     addSupportedPrimDesc(inDataConf,
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, Precision::I32},
-                          {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
 void MKLDNNExperimentalDetectronDetectionOutputNode::execute(mkldnn::stream strm) {
-    const int rois_num = getParentEdgeAt(INPUT_ROIS)->getDims()[0];
-    assert(classes_num_ == static_cast<int>(getParentEdgeAt(INPUT_SCORES)->getDims()[1]));
-    assert(4 * classes_num_ == static_cast<int>(getParentEdgeAt(INPUT_DELTAS)->getDims()[1]));
+    const int rois_num = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0];
+    assert(classes_num_ == static_cast<int>(getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[1]));
+    assert(4 * classes_num_ == static_cast<int>(getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[1]));
 
     const auto* boxes = reinterpret_cast<const float *>(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr());
     const auto* deltas = reinterpret_cast<const float *>(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetPtr());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp
index 255f8443765..8bd70dd2a6e 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "base.hpp"
-
 #include <cstring>
 #include <cassert>
 #include <cmath>
@@ -313,36 +311,36 @@ void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::initSupportedP
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, Precision::FP32}},
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
 void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::execute(mkldnn::stream strm) {
     try {
-        if (inDims.size() != 4 || outDims.size() != 2) {
+        if (inputShapes.size() != 4 || outputShapes.size() != 2) {
             IE_THROW() << "Incorrect number of input or output edges!";
         }
 
         size_t anchor_dims_size = 1;
-        for (size_t i = 0; i < getParentEdgeAt(INPUT_ANCHORS)->getDims().ToSizeVector().size(); i++) {
-            anchor_dims_size *= getParentEdgeAt(INPUT_ANCHORS)->getDims().ToSizeVector()[i];
+        for (size_t i = 0; i < getParentEdgeAt(INPUT_ANCHORS)->getShape().getRank(); i++) {
+            anchor_dims_size *= getParentEdgeAt(INPUT_ANCHORS)->getShape().getStaticDims()[i];
         }
 
         size_t deltas_dims_size = 1;
-        for (size_t i = 0; i < getParentEdgeAt(INPUT_DELTAS)->getDims().ToSizeVector().size(); i++) {
-            deltas_dims_size *= getParentEdgeAt(INPUT_DELTAS)->getDims().ToSizeVector()[i];
+        for (size_t i = 0; i < getParentEdgeAt(INPUT_DELTAS)->getShape().getRank(); i++) {
+            deltas_dims_size *= getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[i];
         }
         if (anchor_dims_size != deltas_dims_size)
             IE_THROW() << "'Anchors' blob size for ONNXProposal is incompatible with 'deltas' blob size!";
 
         size_t score_dims_size = 1;
-        for (size_t i = 0; i < getParentEdgeAt(INPUT_SCORES)->getDims().ToSizeVector().size(); i++) {
-            score_dims_size *= getParentEdgeAt(INPUT_SCORES)->getDims().ToSizeVector()[i];
+        for (size_t i = 0; i < getParentEdgeAt(INPUT_SCORES)->getShape().getRank(); i++) {
+            score_dims_size *= getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[i];
         }
         if (deltas_dims_size != (4 * score_dims_size))
             IE_THROW() << "'Deltas' blob size for ONNXProposal is incompatible with 'scores' blob size!";
@@ -356,11 +354,11 @@ void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::execute(mkldnn
         float *p_roi_item       = reinterpret_cast<float *>(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr());
         float *p_roi_score_item = reinterpret_cast<float *>(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetPtr());
 
-        const int anchors_num = getParentEdgeAt(INPUT_SCORES)->getDims()[0];
+        const int anchors_num = getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[0];
 
         // bottom shape: (num_anchors) x H x W
-        const int bottom_H = getParentEdgeAt(INPUT_DELTAS)->getDims()[1];
-        const int bottom_W = getParentEdgeAt(INPUT_DELTAS)->getDims()[2];
+        const int bottom_H = getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[1];
+        const int bottom_W = getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[2];
 
         // input image height & width
         const float img_H = p_img_info_cpu[0];
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp
index b5d073a0b35..001257c443d 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "base.hpp"
-
 #include <string>
 
 #include <ngraph/opsets/opset6.hpp>
@@ -55,22 +53,22 @@ void MKLDNNExperimentalDetectronPriorGridGeneratorNode::initSupportedPrimitiveDe
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, Precision::FP32}},
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
 void MKLDNNExperimentalDetectronPriorGridGeneratorNode::execute(mkldnn::stream strm) {
-    const int num_priors_ = getParentEdgeAt(INPUT_PRIORS)->getDims()[0];
-    assert(getParentEdgeAt(INPUT_PRIORS)->getDims()[1] == 4);
+    const int num_priors_ = getParentEdgeAt(INPUT_PRIORS)->getShape().getStaticDims()[0];
+    assert(getParentEdgeAt(INPUT_PRIORS)->getShape().getStaticDims()[1] == 4);
 
     // Execute
-    const int layer_width = grid_w_ ? grid_w_ : getParentEdgeAt(INPUT_FEATUREMAP)->getDims()[3];
-    const int layer_height = grid_h_ ? grid_h_ : getParentEdgeAt(INPUT_FEATUREMAP)->getDims()[2];
-    const float step_w = stride_w_ ? stride_w_ : static_cast<float>(getParentEdgeAt(INPUT_IMAGE)->getDims()[3]) / layer_width;
-    const float step_h = stride_h_ ? stride_h_ : static_cast<float>(getParentEdgeAt(INPUT_IMAGE)->getDims()[2]) / layer_height;
+    const int layer_width = grid_w_ ? grid_w_ : getParentEdgeAt(INPUT_FEATUREMAP)->getShape().getStaticDims()[3];
+    const int layer_height = grid_h_ ? grid_h_ : getParentEdgeAt(INPUT_FEATUREMAP)->getShape().getStaticDims()[2];
+    const float step_w = stride_w_ ? stride_w_ : static_cast<float>(getParentEdgeAt(INPUT_IMAGE)->getShape().getStaticDims()[3]) / layer_width;
+    const float step_h = stride_h_ ? stride_h_ : static_cast<float>(getParentEdgeAt(INPUT_IMAGE)->getShape().getStaticDims()[2]) / layer_height;
 
     const auto *bottom_data_0 = reinterpret_cast<const float *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     auto *top_data_0 = reinterpret_cast<float *>(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp
index 94e7f033a95..09313e30bd6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "base.hpp"
-
 #include <string>
 #include <vector>
 #include <algorithm>
@@ -341,27 +339,27 @@ void MKLDNNExperimentalDetectronROIFeatureExtractorNode::initSupportedPrimitiveD
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    std::vector<DataConfigurator> inDataConf;
+    std::vector<PortConfigurator> inDataConf;
     inDataConf.reserve(getOriginalInputsNumber());
     for (int i = 0; i < getOriginalInputsNumber(); ++i)
-        inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32);
+        inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32);
 
     addSupportedPrimDesc(inDataConf,
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
 void MKLDNNExperimentalDetectronROIFeatureExtractorNode::execute(mkldnn::stream strm) {
-    const int levels_num = inDims.size() - INPUT_FEATURES_START;
-    const int num_rois = getParentEdgeAt(INPUT_ROIS)->getDims()[0];
-    const int channels_num = getParentEdgeAt(INPUT_FEATURES_START)->getDims()[1];
+    const int levels_num = inputShapes.size() - INPUT_FEATURES_START;
+    const int num_rois = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0];
+    const int channels_num = getParentEdgeAt(INPUT_FEATURES_START)->getShape().getStaticDims()[1];
     const int feaxels_per_roi = pooled_height_ * pooled_width_ * channels_num;
 
     auto *input_rois = reinterpret_cast<const float *>(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr());
     auto *output_rois_features = reinterpret_cast<float *>(getChildEdgesAtPort(OUTPUT_ROI_FEATURES)[0]->getMemoryPtr()->GetPtr());
     float *output_rois = nullptr;
-    if (OUTPUT_ROIS < outDims.size()) {
+    if (OUTPUT_ROIS < outputShapes.size()) {
         output_rois = reinterpret_cast<float *>(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr());
     }
 
@@ -381,8 +379,8 @@ void MKLDNNExperimentalDetectronROIFeatureExtractorNode::execute(mkldnn::stream
         const int level_rois_num = rois_per_level[i + 1] - level_rois_offset;
         if (level_rois_num > 0) {
             auto *featuremap = reinterpret_cast<const float *>(getParentEdgeAt(INPUT_FEATURES_START + i)->getMemoryPtr()->GetPtr());
-            const int featuremap_height = getParentEdgeAt(INPUT_FEATURES_START + i)->getDims()[2];
-            const int featuremap_width = getParentEdgeAt(INPUT_FEATURES_START + i)->getDims()[3];
+            const int featuremap_height = getParentEdgeAt(INPUT_FEATURES_START + i)->getShape().getStaticDims()[2];
+            const int featuremap_width = getParentEdgeAt(INPUT_FEATURES_START + i)->getShape().getStaticDims()[3];
             ROIAlignForward_cpu_kernel<float>(feaxels_per_roi * level_rois_num,
                                               featuremap,
                                               1.0f / pyramid_scales_[i],
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp
index d543658f78e..f77c3fcb2b0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "base.hpp"
-
 #include <string>
 #include <vector>
 #include <algorithm>
@@ -51,14 +49,14 @@ void MKLDNNExperimentalDetectronTopKROIsNode::initSupportedPrimitiveDescriptors(
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, Precision::FP32}},
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
 void MKLDNNExperimentalDetectronTopKROIsNode::execute(mkldnn::stream strm) {
-    const int input_rois_num = getParentEdgeAt(INPUT_ROIS)->getDims()[0];
+    const int input_rois_num = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0];
     const int top_rois_num = (std::min)(max_rois_num_, input_rois_num);
 
     auto *input_rois = reinterpret_cast<const float *>(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp
index d4c5d303796..13ada3cf81d 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "base.hpp"
-
 #include <cstring>
 #include <string>
 #include <cmath>
@@ -411,8 +409,8 @@ void MKLDNNExtractImagePatchesNode::initSupportedPrimitiveDescriptors() {
     if (_supported_precisions_sizes.find(precision.size()) == _supported_precisions_sizes.end())
         IE_THROW() << errorPrefix << "has unsupported precision: " << precision.name();
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision}},
-                         {{TensorDescCreatorTypes::ncsp, precision}},
+    addSupportedPrimDesc({{LayoutType::ncsp, precision}},
+                         {{LayoutType::ncsp, precision}},
                          impl_desc_type::ref_any);
 }
 
@@ -421,12 +419,12 @@ void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) {
     char *dst_data = reinterpret_cast<char *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
     const size_t dtype_size = getOriginalInputPrecisionAtPort(0).size();
 
-    const auto& inDims = getParentEdgeAt(0)->getDims().ToSizeVector();
+    const auto& inDims = getParentEdgeAt(0)->getShape().getStaticDims();
     const size_t IC = inDims[1];
     const size_t IH = inDims[2];
     const size_t IW = inDims[3];
 
-    const auto& outDims = getChildEdgesAtPort(0)[0]->getDims().ToSizeVector();
+    const auto& outDims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims();
     const size_t OB = outDims[0];
     const size_t OH = outDims[2];
     const size_t OW = outDims[3];
@@ -436,8 +434,8 @@ void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) {
     const size_t RH = _rates[0], RW = _rates[1];
     const size_t PT = _pad_top, PL = _pad_left;
 
-    const std::vector<size_t> istrides = getParentEdgeAt(0)->getDesc().getBlockingDesc().getStrides();
-    const std::vector<size_t> ostrides = getChildEdgesAtPort(0)[0]->getDesc().getBlockingDesc().getStrides();
+    const std::vector<size_t> istrides = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
+    const std::vector<size_t> ostrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
     const std::vector<size_t> ostrides_partial = {ostrides[0], KW * IC * ostrides[1], IC * ostrides[1], ostrides[1]};
 
     if (extract_image_patches_kernel) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp
index b12bed6a476..b08ebae30f4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp
@@ -19,6 +19,7 @@
 #include "ie_parallel.hpp"
 
 #include <ngraph/opsets/opset1.hpp>
+#include <cpu_memory_desc_utils.h>
 
 // Quantization ranges validation is switched off by default in order to avoid regressions on user side
 // #define VALIDATE_QUANTIZATION_RANGES
@@ -219,7 +220,7 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_
 
         this->preamble();
 
-        if (jqp_.src_layout == Layout::CHW || jqp_.src_layout == Layout::NCHW || jqp_.src_layout == Layout::NCDHW)
+        if (jqp_.is_planar)
             compute_planar();
         else
             compute_generic();
@@ -1090,31 +1091,23 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
     }
 }
 
-std::vector<mkldnn::memory::format_tag> MKLDNNFakeQuantizeNode::getDataFormats() const {
+std::vector<LayoutType> MKLDNNFakeQuantizeNode::getDataFormats() const {
     // Special case for first FQ in the network
-    if (getParentEdgesAtPort(0)[0]->getDims()[getAxis()] == 3) {
-        return { MKLDNNMemory::GetPlainFormat(getParentEdgesAtPort(0)[0]->getDims()) };
+    if (getParentEdgesAtPort(0)[0]->getShape().getStaticDims()[getAxis()] == 3) {
+        return { LayoutType::ncsp };
     } else {
         if (isBinarization()) {
-            return {memory::format_tag::nhwc};
+            return { LayoutType::nspc };
         } else {
-            switch (getParentEdgesAtPort(0)[0]->getDims().ndims()) {
-                case 4:
-                    if (getAxis() == 1) {
-                        auto blkFormat = mayiuse(cpu::x64::avx512_common) ? memory::format_tag::nChw16c : memory::format_tag::nChw8c;
-                        return {blkFormat, memory::format_tag::nhwc, memory::format_tag::nchw};
-                    } else {
-                        return {memory::format_tag::nchw};
-                    }
-                case 5:
-                    if (getAxis() == 1) {
-                        auto blkFormat = mayiuse(cpu::x64::avx512_common) ? memory::format_tag::nCdhw16c : memory::format_tag::nCdhw8c;
-                        return {blkFormat, memory::format_tag::ndhwc, memory::format_tag::ncdhw};
-                    } else {
-                        return {memory::format_tag::ncdhw};
-                    }
-                default:
-                    return {MKLDNNMemory::GetPlainFormat(getParentEdgesAtPort(0)[0]->getDims())};
+            if (one_of(getParentEdgesAtPort(0)[0]->getShape().getRank(), 4, 5)) {
+                if (getAxis() == 1) {
+                    auto blkFormat = mayiuse(cpu::x64::avx512_common) ? LayoutType::nCsp16c : LayoutType::nCsp8c;
+                    return { blkFormat, LayoutType::nspc, LayoutType::ncsp };
+                } else {
+                    return { LayoutType::ncsp };
+                }
+            } else {
+                return { LayoutType::ncsp };
             }
         }
     }
@@ -1147,12 +1140,12 @@ void MKLDNNFakeQuantizeNode::getSupportedDescriptors() {
             IE_THROW() << errorPrefix << "has unsupported number of parent edges at port " << i;
     }
 
-    if (getParentEdgesAtPort(0)[0]->getDims().ndims() != getChildEdgesAtPort(0)[0]->getDims().ndims()) {
+    if (getParentEdgesAtPort(0)[0]->getShape().getRank() != getChildEdgesAtPort(0)[0]->getShape().getRank()) {
         IE_THROW() << errorPrefix << "has different ranks for input and output tensors";
     }
 
     if (isBinarization()) {
-        if (getParentEdgesAtPort(0)[0]->getDims().ndims() != 4ul) {
+        if (getParentEdgesAtPort(0)[0]->getShape().getRank() != 4ul) {
             IE_THROW() << errorPrefix << "doesn't support input/output rank != 4";
         }
     }
@@ -1189,47 +1182,52 @@ void MKLDNNFakeQuantizeNode::initSupportedPrimitiveDescriptors() {
         }
     }
 
-    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getInputPrecision());
-    auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOutputPrecision());
-
     for (auto& fmt : getDataFormats()) {
-        LayerConfig config;
+        NodeConfig config;
         config.dynBatchSupport = true;
         for (size_t i = 0; i < getParentEdges().size(); i++) {
-            DataConfig dataConfig;
+            PortConfig dataConfig;
             dataConfig.inPlace = -1;
             dataConfig.constant = false;
 
             if (i == 0) {
-                dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType, fmt);
+                auto descCreator = BlockedDescCreator::getCommonCreators().at(fmt);
+                dataConfig.desc = descCreator->createUniqueDesc(getInputPrecision(), getParentEdgeAt(i)->getShape().getStaticDims());
             } else {
-                dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), memory::data_type::f32,
-                                                   MKLDNNMemory::GetPlainFormat(getParentEdgeAt(i)->getDims()));
+                auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp);
+                dataConfig.desc = descCreator->createUniqueDesc(Precision::FP32, getParentEdgeAt(i)->getShape().getStaticDims());
             }
             config.inConfs.push_back(dataConfig);
         }
 
-        DataConfig dataConfig;
+        PortConfig dataConfig;
         dataConfig.inPlace = -1;
         dataConfig.constant = false;
-        dataConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmt);
+        auto descCreator = BlockedDescCreator::getCommonCreators().at(fmt);
+        dataConfig.desc = descCreator->createUniqueDesc(getOutputPrecision(), getChildEdgeAt(0)->getShape().getStaticDims());
         config.outConfs.push_back(dataConfig);
 
-        supportedPrimitiveDescriptors.push_back({config, impl_type, fmt});
+        supportedPrimitiveDescriptors.push_back({config, impl_type});
     }
 }
 
 void MKLDNNFakeQuantizeNode::createPrimitive() {
     auto config = getSelectedPrimitiveDescriptor()->getConfig();
 
-    auto inDims = config.inConfs[0].desc.getDims();
+    auto inDims = config.inConfs[0].desc->getShape().getStaticDims();
     jqp.c = inDims.size() > 1 ? inDims[1] : 1;
 
-    jqp.src_prc = config.inConfs[0].desc.getPrecision();
+    jqp.src_prc = config.inConfs[0].desc->getPrecision();
     jqp.wei_prc = Precision::FP32;
-    jqp.dst_prc = config.outConfs[0].desc.getPrecision();
+    jqp.dst_prc = config.outConfs[0].desc->getPrecision();
 
-    jqp.src_layout = config.inConfs[0].desc.getLayout();
+    auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    jqp.s_str = srcDesc.getStrides();
+
+    auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    jqp.d_str = dstDesc.getStrides();
+
+    jqp.is_planar = srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5);
 
     jqp.op_type = getAlgorithm();
 
@@ -1258,7 +1256,7 @@ void MKLDNNFakeQuantizeNode::createPrimitive() {
     if (quantize_kernel)
         quantize_kernel->create_ker();
 
-    size_t axisSize = getParentEdgeAt(0)->getDims()[getAxis()];
+    size_t axisSize = getParentEdgeAt(0)->getShape().getStaticDims()[getAxis()];
     size_t axisPaddedSize = rnd_up(axisSize, 16);
 
     MKLDNNMemoryDesc weightsDataDesc = {{(uint32_t)axisPaddedSize}, memory::data_type::f32, memory::format_tag::x};
@@ -1297,12 +1295,11 @@ void MKLDNNFakeQuantizeNode::executeReference() {
 
     auto src = reinterpret_cast<const float *>(srcMemory->GetPtr());
 
-    auto config = getSelectedPrimitiveDescriptor()->getConfig();
-    auto srcDims = config.inConfs[0].desc.getDims();
-    auto dstDims = config.outConfs[0].desc.getDims();
+    auto srcDims = srcMemory->GetDesc().getShape().getStaticDims();
+    auto dstDims = dstMemory->GetDesc().getShape().getStaticDims();
 
-    auto s_str = config.inConfs[0].desc.getBlockingDesc().getStrides();
-    auto d_str = config.outConfs[0].desc.getBlockingDesc().getStrides();
+    auto s_str = jqp.s_str;
+    auto d_str = jqp.d_str;
 
     const int N = srcDims[0];
     const int C = srcDims.size() > 1 ? srcDims[1] : 1;
@@ -1419,10 +1416,9 @@ void MKLDNNFakeQuantizeNode::executeBinarization() {
     auto thresholds = reinterpret_cast<const float*>(internalBlobMemory[0]->GetData());
     auto output_mask = reinterpret_cast<const float*>(internalBlobMemory[1]->GetData());
 
-    auto config = getSelectedPrimitiveDescriptor()->getConfig();
-    auto src_dims = config.inConfs[0].desc.getDims();
+    auto src_dims = srcMemory->GetDesc().getShape().getStaticDims();
 
-    std::vector<size_t> s_str = config.inConfs[0].desc.getBlockingDesc().getStrides();
+    std::vector<size_t> s_str = jqp.s_str;
     size_t tmp = s_str[s_str.size() - 1];
     for (int i = s_str.size() - 1; i > 1; i--) {
         s_str[i] = s_str[i - 1];
@@ -1463,24 +1459,23 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
     auto output_scale = reinterpret_cast<const float*>(internalBlobMemory[4]->GetData());
     auto output_shift = reinterpret_cast<const float*>(internalBlobMemory[5]->GetData());
 
-    auto config = getSelectedPrimitiveDescriptor()->getConfig();
-    auto srcDims = config.inConfs[0].desc.getDims();
+    auto& srcDesc = srcMemory->GetDesc();
+    auto srcDims = srcDesc.getShape().getStaticDims();
 
-    bool is_blk_format = jqp.src_layout != Layout::NHWC && jqp.src_layout != Layout::NDHWC;
-    int blk_size = (jqp.src_layout == Layout::CHW ||
-                    jqp.src_layout == Layout::NCHW ||
-                    jqp.src_layout == Layout::NCDHW) ? 1 : mayiuse(cpu::x64::avx512_common) ? 16 : 8;
+    bool is_blk_format = !srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5);
+    int blk_size = (srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5))
+                    ? 1 : mayiuse(cpu::x64::avx512_common) ? 16 : 8;
 
     auto src_type_size = jqp.src_prc.size();
     auto dst_type_size = jqp.dst_prc.size();
 
-    std::vector<size_t> s_str = config.inConfs[0].desc.getBlockingDesc().getStrides();
+    auto s_str = jqp.s_str;
 
-    if (jqp.src_layout == BLOCKED) {
+    if (is_blk_format) {
         s_str[1] /= blk_size;
     }
 
-    if (jqp.src_layout == Layout::NHWC || jqp.src_layout == Layout::NDHWC) {
+    if (srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5)) {
         size_t tmp = s_str[s_str.size() - 1];
         for (int i = s_str.size() - 1; i > 1; i--) {
             s_str[i] = s_str[i - 1];
@@ -1495,7 +1490,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
     const int H = srcDims.size() == 3 ? srcDims[2] : srcDims.size() > 3 ? srcDims[srcDims.size() - 2] : 1;
     const int W = srcDims.size() > 3 ? srcDims[srcDims.size() - 1] : 1;
 
-    if (jqp.src_layout == Layout::CHW) {
+    if (srcDesc.hasLayoutType(LayoutType::ncsp) && srcDesc.getShape().getRank() == 3) {
         parallel_nd(N, CB, D, [&](int n, int cb, int d) {
             auto arg = jit_quantize_call_args();
 
@@ -1542,7 +1537,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
 
             arg.src_step = is_blk_format ? (size_t) blk_size * src_type_size : (size_t) C * src_type_size;
             arg.dst_step = is_blk_format ? (size_t) blk_size * dst_type_size : (size_t) C * dst_type_size;
-            arg.block_size = (is_blk_format && jqp.src_layout != Layout::NC) ? (size_t) blk_size : nstl::min(blk_size, C - c);
+            arg.block_size = (is_blk_format && srcDims.size() != 2) ? (size_t) blk_size : nstl::min(blk_size, C - c);
             arg.work_amount = (size_t) W;
 
             (*quantize_kernel)(&arg);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h
index 4430acac9ba..eb6a49b1210 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h
@@ -17,12 +17,14 @@ namespace MKLDNNPlugin {
 
 struct jit_quantize_params {
     int c;
+    bool is_planar;
 
     InferenceEngine::Precision src_prc;
     InferenceEngine::Precision wei_prc;
     InferenceEngine::Precision dst_prc;
 
-    InferenceEngine::Layout src_layout;
+    std::vector<size_t> s_str;
+    std::vector<size_t> d_str;
 
     Algorithm op_type;
 };
@@ -109,7 +111,7 @@ public:
 
 private:
     void init() override;
-    std::vector<mkldnn::memory::format_tag> getDataFormats() const;
+    std::vector<LayoutType> getDataFormats() const;
     void executeReference();
     void executeBinarization();
     void executeQuantization();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
index e5b9ade8567..ee8dc1b730b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
@@ -12,6 +12,7 @@
 #include <mkldnn_extension_utils.h>
 #include <mkldnn.hpp>
 #include "utils/general_utils.h"
+#include <cpu_memory_desc_utils.h>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -50,18 +51,18 @@ MKLDNNFullyConnectedNode::MKLDNNFullyConnectedNode(const std::shared_ptr<ngraph:
     }
 }
 
-std::vector<memory::format_tag> MKLDNNFullyConnectedNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const {
-    if (dims.ndims() == 0)
+std::vector<memory::format_tag> MKLDNNFullyConnectedNode::getAvailableFormatsForDims(const Shape &dims) const {
+    if (dims.getRank() == 0)
         return {memory::format_tag::x};
-    else if (dims.ndims() == 1)
+    else if (dims.getRank() == 1)
         return {memory::format_tag::x};
-    else if (dims.ndims() == 2)
+    else if (dims.getRank() == 2)
         return {memory::format_tag::nc};
-    else if (dims.ndims() == 3)
+    else if (dims.getRank() == 3)
         return {memory::format_tag::tnc};
-    else if (dims.ndims() == 4)
+    else if (dims.getRank() == 4)
         return {memory::format_tag::nChw8c, memory::format_tag::nChw16c, memory::format_tag::nhwc, memory::format_tag::nchw};
-    else if (dims.ndims() == 5)
+    else if (dims.getRank() == 5)
         return {memory::format_tag::nCdhw8c, memory::format_tag::nCdhw16c, memory::format_tag::ndhwc, memory::format_tag::ncdhw};
     return {memory::format_tag::any};
 }
@@ -100,23 +101,23 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() {
         outputDataType = memory::data_type::bf16;
     }
 
-    MKLDNNDims inDims = getParentEdgeAt(0)->getDims();
-    MKLDNNDims outDims = getChildEdgeAt(0)->getDims();
+    const auto inDims = getParentEdgeAt(0)->getShape().getStaticDims();
+    const auto outDims = getChildEdgeAt(0)->getShape().getStaticDims();
 
-    if (inDims.ndims() == 3) {
+    if (inDims.size() == 3) {
         weightsDims = InferenceEngine::SizeVector({static_cast<size_t>(outDims[2]), static_cast<size_t>(inDims[2])});
     } else {
         weightsDims.push_back(outDims[1]);
-        for (int i = 1; i < inDims.ndims(); i++)
+        for (int i = 1; i < inDims.size(); i++)
             weightsDims.push_back(inDims[i]);
     }
     biasesDims.push_back(weightsDims[0]);
 
-    for (auto format : getAvailableFormatsForDims(inDims)) {
-        MKLDNNMemoryDesc in_candidate(inDims, inputDataType, format);
-        MKLDNNMemoryDesc out_candidate(outDims, outputDataType, memory::format_tag::any);
+    for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) {
+        auto in_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(inDims), inputDataType, format);
+        auto out_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(outDims), outputDataType, mkldnn::memory::format_tag::any);
 
-        createDescriptor({in_candidate}, {out_candidate});
+        createDescriptorInternal(in_candidate, out_candidate);
     }
 }
 
@@ -236,35 +237,40 @@ std::shared_ptr<mkldnn::primitive_attr> MKLDNNFullyConnectedNode::initPrimitiveA
     return attr;
 }
 
-void MKLDNNFullyConnectedNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
-                                                const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
-    TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
+// WA: creation MKLDNNMemoryDesc with format == any is prohibited
+// so we create mkldnn::memory::desc directly
+// we need specific method and can't remove createDescriptor from base class because its used into initDescriptor
+void MKLDNNFullyConnectedNode::createDescriptorInternal(const mkldnn::memory::desc &inputDesc,
+                                                        const mkldnn::memory::desc &outputDesc) {
+    auto in_candidate = inputDesc;
+    auto out_candidate = outputDesc;
 
-    mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
-    mkldnn::memory::data_type bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
-    if (inDesc.getPrecision() == Precision::BF16) {
+    mkldnn::memory::data_type wdt = in_candidate.data_type();
+    mkldnn::memory::data_type bdt = out_candidate.data_type();
+    if (in_candidate.data_type() == mkldnn::memory::data_type::bf16) {
         bdt = mkldnn::memory::data_type::f32;
-    } else if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) {
+    } else if (in_candidate.data_type() == mkldnn::memory::data_type::u8 || in_candidate.data_type() == mkldnn::memory::data_type::s8) {
         wdt = memory::data_type::s8;
         if (withBiases)
             bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(BIAS_ID));
     }
 
-    if (inDesc.getDims().size() == 3) {
-        auto inDims = inDesc.getDims();
-        auto outDims = outDesc.getDims();
-        InferenceEngine::SizeVector normalizedInDims = {inDims[0] * inDims[1], inDims[2]};
-        InferenceEngine::SizeVector normalizedOutDims = {outDims[0] * outDims[1], outDims[2]};
-        inDesc = InferenceEngine::TensorDesc(inDesc.getPrecision(), normalizedInDims, TensorDesc::getLayoutByDims(normalizedInDims));
-        outDesc = InferenceEngine::TensorDesc(outDesc.getPrecision(), normalizedOutDims, TensorDesc::getLayoutByDims(normalizedOutDims));
+    if (in_candidate.dims().size() == 3) {
+        auto inDims = in_candidate.dims();
+        auto outDims = out_candidate.dims();
+        auto normalizedInDims = {inDims[0] * inDims[1], inDims[2]};
+        auto normalizedOutDims = {outDims[0] * outDims[1], outDims[2]};
+        in_candidate = mkldnn::memory::desc(normalizedInDims, in_candidate.data_type(),
+                                         MKLDNNMemory::GetPlainFormatByRank(normalizedInDims.size()));
+        out_candidate = mkldnn::memory::desc(normalizedOutDims, out_candidate.data_type(),
+                                             MKLDNNMemory::GetPlainFormatByRank(normalizedOutDims.size()));
     }
 
-    MKLDNNMemoryDesc in_candidate(inDesc);
-    MKLDNNMemoryDesc out_candidate(outDesc);
-    MKLDNNMemoryDesc wgh_candidate(MKLDNNDims(weightsDims), wdt, mkldnn::memory::format_tag::any);
+    mkldnn::memory::desc wgh_candidate(MKLDNNDims(weightsDims), wdt, mkldnn::memory::format_tag::any);
 
     if (withBiases) {
-        MKLDNNMemoryDesc bias_candidate(MKLDNNDims(inDims[BIAS_ID]), bdt, memory::format_tag::any);
+        mkldnn::memory::desc bias_candidate(MKLDNNExtensionUtils::convertToDnnlDims(inputShapes[BIAS_ID].getStaticDims()), bdt,
+                                            mkldnn::memory::format_tag::any);
         MKLDNNDescriptor desc(std::shared_ptr<inner_product_forward::desc>(
                 new inner_product_forward::desc(prop_kind::forward_scoring, in_candidate, wgh_candidate,
                                                 bias_candidate, out_candidate)));
@@ -277,40 +283,28 @@ void MKLDNNFullyConnectedNode::createDescriptor(const std::vector<InferenceEngin
     }
 }
 
-MKLDNNMemoryDesc MKLDNNFullyConnectedNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
-    InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1))
-                                               : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx));
-
-    if (desc.getLayout() == InferenceEngine::Layout::ANY) {
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getLayout()));
-    } else if (getParentEdgeAt(idx)->getDims().ndims() == 3) {
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            TensorDesc::getLayoutByDims(getParentEdgeAt(idx)->getDims().ToSizeVector())));
-    } else {
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getBlockingDesc()));
-    }
+void MKLDNNFullyConnectedNode::createDescriptor(const std::vector<const MemoryDesc*> &inputDesc,
+                                                const std::vector<const MemoryDesc*> &outputDesc) {
+    createDescriptorInternal(MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]), MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]));
 }
 
-MKLDNNMemoryDesc MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
-    InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx));
-    if (desc.getLayout() == InferenceEngine::Layout::ANY) {
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getLayout()));
-    } else if (getChildEdgeAt(idx)->getDims().ndims() == 3) {
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            TensorDesc::getLayoutByDims(getChildEdgeAt(idx)->getDims().ToSizeVector())));
-    } else {
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
-                                                            getChildEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            desc.getBlockingDesc()));
+std::unique_ptr<MKLDNNMemoryDesc> MKLDNNFullyConnectedNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
+    auto desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx));
+
+    if (getParentEdgeAt(idx)->getShape().getRank() == 3) {
+        desc = MKLDNNMemoryDesc(getParentEdgeAt(idx)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()),
+                                MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(idx)->getShape().getRank()));
     }
+    return MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(std::move(desc));
+}
+
+std::unique_ptr<MKLDNNMemoryDesc> MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
+    auto desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx));
+    if (getChildEdgeAt(idx)->getShape().getRank() == 3) {
+        desc = MKLDNNMemoryDesc(getChildEdgeAt(idx)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()),
+                                MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(idx)->getShape().getRank()));
+    }
+    return MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(std::move(desc));
 }
 
 InferenceEngine::Precision MKLDNNFullyConnectedNode::getRuntimePrecision() const {
@@ -324,7 +318,7 @@ InferenceEngine::Precision MKLDNNFullyConnectedNode::getRuntimePrecision() const
         }
     }
 
-    return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions);
+    return getMaxPrecision(inputPrecisions);
 }
 
 REG_MKLDNN_PRIM_FOR(MKLDNNFullyConnectedNode, FullyConnected);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h
index 63b1e88ae6f..01820fdfcc3 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h
@@ -16,7 +16,7 @@ class MKLDNNFullyConnectedNode : public MKLDNNNode {
 public:
     MKLDNNFullyConnectedNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
 
-    std::vector<mkldnn::memory::format_tag> getAvailableFormatsForDims(const MKLDNNDims &dims) const override;
+    std::vector<mkldnn::memory::format_tag> getAvailableFormatsForDims(const Shape &dims) const override;
     void getSupportedDescriptors() override;
     void createPrimitive() override;
     void execute(mkldnn::stream strm) override;
@@ -27,15 +27,15 @@ public:
     }
 
     const std::vector<impl_desc_type>& getPrimitivesPriority() override;
-    void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
-                          const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
+    void createDescriptor(const std::vector<const MemoryDesc*>& inputDesc,
+                          const std::vector<const MemoryDesc*>& outputDesc) override;
 
     size_t descInputNumbers(MKLDNNDescriptor desc) override {
         return static_cast<size_t>(getOriginalInputsNumber());
     }
 
-    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
-    MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
+    std::unique_ptr<MKLDNNMemoryDesc> getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
+    std::unique_ptr<MKLDNNMemoryDesc> getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
 
     InferenceEngine::Precision getRuntimePrecision() const override;
 
@@ -47,6 +47,9 @@ protected:
     std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr();
 
 private:
+    void createDescriptorInternal(const mkldnn::memory::desc &inputDesc,
+                                  const mkldnn::memory::desc &outputDesc);
+
     InferenceEngine::SizeVector weightsDims;
     InferenceEngine::SizeVector biasesDims;
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp
index e3e14e35691..e4da50abe8a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp
@@ -86,9 +86,9 @@ void MKLDNNGatherElementsNode::initSupportedPrimitiveDescriptors() {
 
     dataTypeSize_ = inDataPrecision.size();
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, inDataPrecision},
-                          {TensorDescCreatorTypes::ncsp, Precision::I32}},
-                         {{TensorDescCreatorTypes::ncsp, inDataPrecision}},
+    addSupportedPrimDesc({{LayoutType::ncsp, inDataPrecision},
+                          {LayoutType::ncsp, Precision::I32}},
+                         {{LayoutType::ncsp, inDataPrecision}},
                          impl_desc_type::ref_any);
 }
 
@@ -98,7 +98,7 @@ void MKLDNNGatherElementsNode::directExecution() {
     const auto *indices = reinterpret_cast<const int *>(getParentEdgeAt(indicesIndex_)->getMemoryPtr()->GetPtr());
     auto *dstData = reinterpret_cast<dataType *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
 
-    const int outSize = getChildEdgeAt(0)->getBlob()->size();
+    const int outSize = getChildEdgeAt(0)->getShape().getElementsCount();
     auto threadBody = [&](const int ithr, const int nthr) {
         int start(0lu), end(0lu);
         splitter(outSize, nthr, ithr, start, end);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp
index ee7623f9b48..75ee34dbda5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp
@@ -89,9 +89,9 @@ void MKLDNNGatherNDNode::initSupportedPrimitiveDescriptors() {
 
     _dataTypeSize = inDataPrecision.size();
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, inDataPrecision},
-                          {TensorDescCreatorTypes::ncsp, Precision::I32}},
-                         {{TensorDescCreatorTypes::ncsp, inDataPrecision}},
+    addSupportedPrimDesc({{LayoutType::ncsp, inDataPrecision},
+                          {LayoutType::ncsp, Precision::I32}},
+                         {{LayoutType::ncsp, inDataPrecision}},
                          impl_desc_type::ref_any);
 }
 
@@ -101,10 +101,11 @@ void MKLDNNGatherNDNode::gatherElementwise() {
     const auto *indices = reinterpret_cast<const int *>(getParentEdgeAt(_indicesIndex)->getMemoryPtr()->GetPtr());
     auto *dstData = reinterpret_cast<dataType *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
 
-    auto strides = getParentEdgeAt(_dataIndex)->getDesc().getBlockingDesc().getStrides();
+    auto strides = getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
     const size_t* srcMultipliers = strides.data() + _batchDims;
 
-    const size_t cycles = getChildEdgeAt(0)->getBlob()->byteSize() / (sizeof(dataType) * _batchNum);
+    const size_t cycles = getChildEdgeAt(0)->getShape().getElementsCount() *
+                          getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().size() / (sizeof(dataType) * _batchNum);
     const size_t CS = cycles * _sliceRank;
     const size_t CB = cycles * _blockSize;
     const size_t workAmount = _batchNum * cycles;
@@ -149,11 +150,11 @@ void MKLDNNGatherNDNode::gatherBlocks() {
 
     std::vector<size_t> srcMultipliers(_sliceRank);
     for (size_t i = 0; i < _sliceRank ; i++)
-        srcMultipliers[i] = _dataTypeSize * getParentEdgeAt(_dataIndex)->getDesc().getBlockingDesc().getStrides()[i + _batchDims];
+        srcMultipliers[i] = _dataTypeSize * getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides()[i + _batchDims];
 
     const size_t batchStep = _batchStep * _dataTypeSize;
     const size_t dataStep = _blockSize * _dataTypeSize;
-    const size_t cycles = getChildEdgeAt(0)->getBlob()->byteSize() / (dataStep * _batchNum);
+    const size_t cycles = getChildEdgeAt(0)->getMemory().GetSize() / (dataStep * _batchNum);
     const size_t CS = cycles * _sliceRank;
     const size_t CB = cycles * dataStep;
     const size_t workAmount = _batchNum * cycles;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp
index ade92f6a4a0..f41a57730a5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp
@@ -75,10 +75,10 @@ void MKLDNNGatherNode::initSupportedPrimitiveDescriptors() {
         return;
 
     Precision dataPrecision = getOriginalInputPrecisionAtPort(GATHER_DATA);
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision},
-                          {TensorDescCreatorTypes::ncsp, Precision::I32},
-                          {TensorDescCreatorTypes::ncsp, Precision::I32}},
-                         {{TensorDescCreatorTypes::ncsp, dataPrecision}},
+    addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32}},
+                         {{LayoutType::ncsp, dataPrecision}},
                          impl_desc_type::ref_any);
 }
 
@@ -92,10 +92,10 @@ void MKLDNNGatherNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         IE_THROW() << errorPrefix_ << " has unidentified preferable primitive descriptor.";
 
-    const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getDims().ToSizeVector();
-    const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getDims().ToSizeVector();
-    const SizeVector dstDims = getChildEdgeAt(0)->getDims().ToSizeVector();
-    dataSize = getParentEdgeAt(GATHER_DATA)->getDesc().getPrecision().size();
+    const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getShape().getStaticDims();
+    const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getShape().getStaticDims();
+    const SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims();
+    dataSize = getParentEdgeAt(GATHER_DATA)->getMemory().GetDesc().getPrecision().size();
 
     indexRange = srcDims[axis];
     batchSize = std::accumulate(srcDims.begin(), srcDims.begin() + batchDims, 1, std::multiplies<size_t>());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp
index ce396446df2..89fb6c08167 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp
@@ -1,7 +1,6 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "base.hpp"
 
 #include <string>
 #include <vector>
@@ -66,11 +65,11 @@ void MKLDNNGatherTreeNode::initSupportedPrimitiveDescriptors() {
             IE_THROW() << errorPrefix << " has incorrect input/output data precision. Must be the same.";
     }
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision},
-                            {TensorDescCreatorTypes::ncsp, precision},
-                            {TensorDescCreatorTypes::ncsp, precision},
-                            {TensorDescCreatorTypes::ncsp, precision}},
-                         {{TensorDescCreatorTypes::ncsp, precision}},
+    addSupportedPrimDesc({{LayoutType::ncsp, precision},
+                            {LayoutType::ncsp, precision},
+                            {LayoutType::ncsp, precision},
+                            {LayoutType::ncsp, precision}},
+                         {{LayoutType::ncsp, precision}},
                          impl_desc_type::ref_any);
 }
 
@@ -85,16 +84,16 @@ template<typename DATA_T>
 void MKLDNNGatherTreeNode::gatherTreeKernel() noexcept {
     const auto *step_idx = reinterpret_cast<DATA_T *>(getParentEdgeAt(GATHER_TREE_STEP_IDX)->getMemoryPtr()->GetPtr());
     const auto * const parent_idx = reinterpret_cast<DATA_T *>(getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemoryPtr()->GetPtr());
-    const size_t parent_idx_size = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getDims().size()
-                                   - getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getDesc().getBlockingDesc().getOffsetPadding();
+    const size_t parent_idx_size = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getShape().getElementsCount()
+                                   - getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemory().GetDescWithType<BlockedMemoryDesc>().getOffsetPadding();
     const auto *max_seq_len = reinterpret_cast<DATA_T *>(getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getMemoryPtr()->GetPtr());
     auto end_token = (reinterpret_cast<DATA_T *>(getParentEdgeAt(GATHER_TREE_END_TOKEN)->getMemoryPtr()->GetPtr()))[0];
     auto * final_idx = reinterpret_cast<DATA_T *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
 
-    SizeVector step_idx_dims = getParentEdgeAt(GATHER_TREE_STEP_IDX)->getDims().ToSizeVector();
-    SizeVector parent_idx_dims = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getDims().ToSizeVector();
-    SizeVector max_seq_len_dims = getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getDims().ToSizeVector();
-    SizeVector final_idx_dims = getChildEdgesAtPort(0)[0]->getDims().ToSizeVector();
+    SizeVector step_idx_dims = getParentEdgeAt(GATHER_TREE_STEP_IDX)->getShape().getStaticDims();
+    SizeVector parent_idx_dims = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getShape().getStaticDims();
+    SizeVector max_seq_len_dims = getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getShape().getStaticDims();
+    SizeVector final_idx_dims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims();
     int32_t max_time = step_idx_dims[0];
     const size_t batch_size = step_idx_dims[1];
     const size_t beam_width = step_idx_dims[2];
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp
index 86f89ccea7c..ef87345daae 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp
@@ -8,6 +8,7 @@
 #include <vector>
 #include <string>
 #include <blob_factory.hpp>
+#include "cpu_memory_desc_utils.h"
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -22,6 +23,42 @@ void MKLDNNGenericNode::getSupportedDescriptors() {
     }
 }
 
+NodeConfig MKLDNNGenericNode::convertLayerToNodeConfig(const InferenceEngine::LayerConfig &layerConfig) {
+    NodeConfig config;
+    config.dynBatchSupport = layerConfig.dynBatchSupport;
+    config.inConfs.resize(layerConfig.inConfs.size());
+    for (size_t i = 0; i < layerConfig.inConfs.size(); i++) {
+        config.inConfs[i].inPlace = layerConfig.inConfs[i].inPlace;
+        config.inConfs[i].constant = layerConfig.inConfs[i].constant;
+        config.inConfs[i].desc = MemoryDescUtils::convertToMKLDNNMemoryDesc(layerConfig.inConfs[i].desc).clone();
+    }
+    config.outConfs.resize(layerConfig.outConfs.size());
+    for (size_t i = 0; i < layerConfig.outConfs.size(); i++) {
+        config.outConfs[i].inPlace = layerConfig.outConfs[i].inPlace;
+        config.outConfs[i].constant = layerConfig.outConfs[i].constant;
+        config.outConfs[i].desc = MemoryDescUtils::convertToMKLDNNMemoryDesc(layerConfig.outConfs[i].desc).clone();
+    }
+    return config;
+}
+
+InferenceEngine::LayerConfig MKLDNNGenericNode::convertNodeToLayerConfig(const NodeConfig &nodeConfig) {
+    InferenceEngine::LayerConfig config;
+    config.dynBatchSupport = nodeConfig.dynBatchSupport;
+    config.inConfs.resize(nodeConfig.inConfs.size());
+    for (size_t i = 0; i < nodeConfig.inConfs.size(); i++) {
+        config.inConfs[i].inPlace = nodeConfig.inConfs[i].inPlace;
+        config.inConfs[i].constant = nodeConfig.inConfs[i].constant;
+        config.inConfs[i].desc = MemoryDescUtils::convertToTensorDesc(*nodeConfig.inConfs[i].desc);
+    }
+    config.outConfs.resize(nodeConfig.outConfs.size());
+    for (size_t i = 0; i < nodeConfig.outConfs.size(); i++) {
+        config.outConfs[i].inPlace = nodeConfig.outConfs[i].inPlace;
+        config.outConfs[i].constant = nodeConfig.outConfs[i].constant;
+        config.outConfs[i].desc = MemoryDescUtils::convertToTensorDesc(*nodeConfig.outConfs[i].desc);
+    }
+    return config;
+}
+
 void MKLDNNGenericNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
@@ -35,7 +72,7 @@ void MKLDNNGenericNode::initSupportedPrimitiveDescriptors() {
         }
 
         for (auto& config : configs) {
-            supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
+            supportedPrimitiveDescriptors.emplace_back(convertLayerToNodeConfig(config), impl_desc_type::unknown);
         }
     }
     if (impls.empty()) {
@@ -109,9 +146,9 @@ void MKLDNNGenericNode::execLayer() {
     std::vector<InferenceEngine::Blob::Ptr> inputs;
     std::vector<InferenceEngine::Blob::CPtr> constInputs;
     std::vector<InferenceEngine::TensorDesc> inputDescs;
-    std::vector<InferenceEngine::SizeVector> outputShapes;
+    std::vector<InferenceEngine::SizeVector> execOutputShapes;
     for (size_t i = 0; i < getParentEdges().size(); i++) {
-        auto inputBlob = getParentEdgeAt(i)->getBlob();
+        auto inputBlob = MemoryDescUtils::interpretAsBlob(getParentEdgeAt(i)->getMemory());
         inputs.push_back(inputBlob);
         constInputs.push_back(inputBlob);
         if (isDynBatch && dynBatchLim >= inputs[inputs.size() - 1]->getTensorDesc().getDims()[0]) {
@@ -137,14 +174,14 @@ void MKLDNNGenericNode::execLayer() {
         }
     }
     std::vector<InferenceEngine::Blob::Ptr> outputs;
-    for (size_t i = 0; i < outDims.size(); i++) {
+    for (size_t i = 0; i < outputShapes.size(); i++) {
         if (isDynBatch) {
             auto out_edge = getChildEdgesAtPort(i)[0];
-            auto td = out_edge->getBlob()->getTensorDesc();
-            td.setDims(outputShapes[i]);
+            auto td = MemoryDescUtils::convertToTensorDesc(out_edge->getMemory().GetDesc());
+            td.setDims(execOutputShapes[i]);
             outputs.push_back(make_blob_with_precision(td, out_edge->getMemory().GetData()));
         } else {
-            outputs.push_back(getChildEdgesAtPort(i)[0]->getBlob());
+            outputs.push_back(MemoryDescUtils::interpretAsBlob(getChildEdgesAtPort(i)[0]->getMemory()));
         }
     }
     InferenceEngine::ResponseDesc resp;
@@ -154,8 +191,8 @@ void MKLDNNGenericNode::execLayer() {
     }
 }
 
-void MKLDNNGenericNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
-    InferenceEngine::LayerConfig rightConfig = config;
+void MKLDNNGenericNode::initDescriptor(const NodeConfig &config) {
+    NodeConfig rightConfig = config;
     InferenceEngine::StatusCode rc;
     InferenceEngine::ResponseDesc resp;
 
@@ -190,14 +227,15 @@ void MKLDNNGenericNode::initDescriptor(const InferenceEngine::LayerConfig &confi
 
     impls.clear();
     impls.emplace_back(selectedImpl);
-    rc = impls[0]->init(rightConfig, &resp);
+    auto ieConfig = convertNodeToLayerConfig(rightConfig);
+    rc = impls[0]->init(ieConfig, &resp);
     if (rc != InferenceEngine::OK) {
         IE_THROW() << resp.msg;
     }
-
+    rightConfig = convertLayerToNodeConfig(ieConfig);
     auto descriptor = getSelectedPrimitiveDescriptor();
     if (descriptor != nullptr) {
-        descriptor->getConfig() = rightConfig;
+        descriptor->setConfig(rightConfig);
     }
     bool isConst = !rightConfig.inConfs.empty() || !rightConfig.outConfs.empty();
     for (const auto &inConf : rightConfig.inConfs) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h
index f93b79c7852..63d0d5e20f0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h
@@ -29,12 +29,15 @@ public:
         return false;
     }
 
-    void initDescriptor(const InferenceEngine::LayerConfig& config) override;
+    void initDescriptor(const NodeConfig& config) override;
 
     void execLayer();
     void cleanup() override;
 
 protected:
+    NodeConfig convertLayerToNodeConfig(const InferenceEngine::LayerConfig &layerConfig);
+    InferenceEngine::LayerConfig convertNodeToLayerConfig(const NodeConfig &nodeConfig);
+
     InferenceEngine::ILayerImplFactory::Ptr extFactory;
     std::vector<InferenceEngine::ILayerExecImpl::Ptr> impls;
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp
index 0dbe8dee59e..605aa2d6af5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "base.hpp"
-
 #include <string>
 
 #include <ngraph/opsets/opset1.hpp>
@@ -46,8 +44,8 @@ void MKLDNNGRNNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32, false, 0}},
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32, false, 0}},
+    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32, false, 0}},
+                         {{LayoutType::ncsp, Precision::FP32, false, 0}},
                          impl_desc_type::ref_any);
 }
 
@@ -55,7 +53,7 @@ void MKLDNNGRNNode::execute(mkldnn::stream strm) {
     const float* src_data = reinterpret_cast<const float *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     float* dst_data = reinterpret_cast<float *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
 
-    SizeVector dims = getParentEdgeAt(0)->getDims().ToSizeVector();
+    SizeVector dims = getParentEdgeAt(0)->getShape().getStaticDims();
 
     int N = static_cast<int>((dims.size() > 0) ? dims[0] : 1);
     int C = static_cast<int>((dims.size() > 1) ? dims[1] : 1);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
index cdb553309b8..33b6fdab4f4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
@@ -246,7 +246,7 @@ MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr<ngraph::Node>& op, const
 }
 
 void MKLDNNInputNode::cloneBlobIfRequired() {
-    MKLDNNDims dims(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape());
+    std::vector<size_t> dims(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape());
     const auto prec = convertPrecision(constOp->get_element_type());
     const size_t size = dims.size();
     MKLDNNMemoryDesc memDesc(dims, MKLDNNExtensionUtils::IEPrecisionToDataType(prec));
@@ -349,15 +349,15 @@ void MKLDNNInputNode::cloneBlobIfRequired() {
     }
 }
 
-MKLDNNInputNode::MKLDNNInputNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &prc, const std::string &name,
+MKLDNNInputNode::MKLDNNInputNode(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name,
                                  const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
         : MKLDNNNode(type, name, eng, cache) {
     constant = ConstantType::NoConst;
     if (getType() == Input) {
-        outDims.emplace_back(dims);
+        outputShapes.emplace_back(shape);
         addOriginalOutputPrecision(prc);
     }  else if (getType() == Output) {
-        inDims.emplace_back(dims);
+        inputShapes.emplace_back(shape);
         addOriginalInputPrecision(prc);
     }
 }
@@ -388,42 +388,29 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    LayerConfig config;
-    config.dynBatchSupport = true;
+    std::vector<PortConfigurator> inPortConfs;
+    std::vector<PortConfigurator> outPortConfs;
+
     if (getType() == Input || getType() == MemoryInput) {
         precision = getOriginalOutputPrecisionAtPort(0);
         if (precision == Precision::U16 || isMeanImage) {
             precision = Precision::FP32;
         }
-        DataConfig dataConfig;
-        dataConfig.inPlace = -1;
-        dataConfig.constant = false;
 
-        auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
-        auto mem_tdesc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType);
-        dataConfig.desc = mem_tdesc;
-        config.outConfs.push_back(dataConfig);
-        // ReadValue operation expects constant input
+        outPortConfs.push_back({LayoutType::ncsp, precision});
         if (!getParentEdges().empty()) {
-            DataConfig inConfig;
-            inConfig.inPlace = -1;
-            inConfig.constant = true;
-            inConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType);
-            config.inConfs.push_back(inConfig);
+            inPortConfs.push_back({LayoutType::ncsp, precision, true});
         }
     } else if (getType() == Output) {
         precision = getOriginalInputPrecisionAtPort(0);
         if (precision == Precision::U16) precision = Precision::FP32;
-        DataConfig dataConfig;
-        dataConfig.inPlace = -1;
-        dataConfig.constant = false;
 
-        auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
-        auto mem_tdesc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType);
-        dataConfig.desc = mem_tdesc;
-        config.inConfs.push_back(dataConfig);
+        inPortConfs.push_back({LayoutType::ncsp, precision});
     }
-    supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
+
+    addSupportedPrimDesc(inPortConfs,
+                         outPortConfs,
+                         impl_desc_type::unknown);
 }
 
 void MKLDNNInputNode::createPrimitive() {
@@ -440,7 +427,7 @@ void MKLDNNInputNode::createPrimitive() {
                                << " from node " << getParentEdgeAt(i)->getParent()->getName() << ".";
     }
 
-    const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor();
+    const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
     if (selected_pd == nullptr)
         IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
 }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
index 872f8e14f8e..8c57ac88730 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
@@ -14,7 +14,7 @@ namespace MKLDNNPlugin {
 class MKLDNNInputNode : public MKLDNNNode {
 public:
     MKLDNNInputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
-    MKLDNNInputNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &prc, const std::string &name,
+    MKLDNNInputNode(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name,
                     const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
 
     void getSupportedDescriptors() override;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
index caedec83ee8..df6e4930b54 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
@@ -1829,7 +1829,7 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << errorPrefix << " has incorrect number of output edges";
 
-    srcDim = getParentEdgeAt(DATA_ID)->getDims().ToSizeVector();
+    srcDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
     int dataRank = srcDim.size();
 
     // get pad
@@ -1868,7 +1868,7 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() {
     } else {
         srcDimPad = srcDim;
     }
-    dstDim = getChildEdgeAt(0)->getDims().ToSizeVector();
+    dstDim = getChildEdgeAt(0)->getShape().getStaticDims();
 }
 
 void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() {
@@ -1902,7 +1902,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() {
     inputPrec = inputPrecision;
     outputPrec = outputPrecision;
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     if (isAxesSpecified) {
         config.inConfs.resize(4);
@@ -1916,22 +1916,26 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() {
     auto axesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32);
 
     auto pushDesc = [&](memory::format_tag dataFormat, impl_desc_type implDetail) {
-        config.inConfs[DATA_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), inputDataType, dataFormat);
-        config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(TARGET_SHAPE_ID)->getDims(), targetShapeType, memory::format_tag::x);
-        config.inConfs[SCALES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(SCALES_ID)->getDims(), scalesType, memory::format_tag::x);
+        config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(),
+                                                                                   inputDataType, dataFormat);
+        config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(TARGET_SHAPE_ID)->getShape().getStaticDims(),
+                                                                             targetShapeType, memory::format_tag::x);
+        config.inConfs[SCALES_ID].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(SCALES_ID)->getShape().getStaticDims(), scalesType,
+                                                                       memory::format_tag::x);
         if (isAxesSpecified)
-            config.inConfs[AXES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXES_ID)->getDims(), axesType, memory::format_tag::x);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, dataFormat);
-        supportedPrimitiveDescriptors.push_back({config, implDetail, dataFormat});
+            config.inConfs[AXES_ID].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(AXES_ID)->getShape().getStaticDims(), axesType,
+                                                                         memory::format_tag::x);
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, dataFormat);
+        supportedPrimitiveDescriptors.push_back({config, implDetail});
     };
 
-    auto channels = getParentEdgeAt(DATA_ID)->getDims().ndims() > 1 ? getParentEdgeAt(DATA_ID)->getDims()[1] : 1;
+    auto channels = getParentEdgeAt(DATA_ID)->getShape().getRank() > 1 ? getParentEdgeAt(DATA_ID)->getShape().getStaticDims()[1] : 1;
 
     if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) {
-        pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), ref);
+        pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), ref);
     } else {
         // blk and by_channel JIT kernel on sse41 or above machine
-        if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 4) {
+        if (getParentEdgeAt(DATA_ID)->getShape().getRank() == 4) {
             if (mayiuse(cpu::x64::avx512_common)) {
                 pushDesc(memory::format_tag::nhwc, jit_avx512);
                 if (channels != 1)
@@ -1945,7 +1949,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() {
                 if (channels != 1)
                     pushDesc(memory::format_tag::nChw8c, jit_sse42);
             }
-        } else if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 5 && mode != InterpolateMode::cubic) {
+        } else if (getParentEdgeAt(DATA_ID)->getShape().getRank() == 5 && mode != InterpolateMode::cubic) {
             if (mayiuse(cpu::x64::avx512_common)) {
                 pushDesc(memory::format_tag::ndhwc, jit_avx512);
                 if (channels != 1)
@@ -1963,7 +1967,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() {
 
         // planar for 1.ref on machine without sse41(if no sse41, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse)
         if (mayiuse(cpu::x64::avx2) && inputPrec == Precision::FP32) {
-            pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), jit_avx2);
+            pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), jit_avx2);
         }
     }
 }
@@ -1989,11 +1993,10 @@ void MKLDNNInterpolateNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         IE_THROW() << errorPrefix << " did not set preferable primitive descriptor";
 
-    auto selectedPD = getSelectedPrimitiveDescriptor();
     auto jcp = jit_interpolate_config_params();
     jcp.mode = mode;
-    jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc.getPrecision());
-    jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision());
+    jcp.src_dt = getParentEdgeAt(0)->getMemory().GetDataType();
+    jcp.dst_dt = getChildEdgeAt(0)->getMemory().GetDataType();
     jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt);
     jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt);
     jcp.indices_size = sizeof(int);
@@ -2008,9 +2011,10 @@ void MKLDNNInterpolateNode::createPrimitive() {
     jcp.ID = srcDimPad5d[2];
     jcp.spatial_dim_size = spatialDimSize;
 
-    if (getChildEdgeAt(0)->getMemory().GetDesc().isPlainFormat()) {
+    if (getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) {
         jcp.layout = InterpolateLayoutType::planar;
-    } else if (getChildEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat()) {
+    } else if (getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) ||
+               getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c)) {
         jcp.layout = InterpolateLayoutType::block;
     } else {
         jcp.layout = InterpolateLayoutType::by_channel;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp
index 5750f8517b0..702d5b42f0c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp
@@ -64,8 +64,8 @@ void MKLDNNLogSoftmaxNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}},
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp
index a3460091ecf..b107fca7834 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp
@@ -6,6 +6,7 @@
 #include <string>
 #include <mkldnn_extension_utils.h>
 #include <ngraph/opsets/opset1.hpp>
+#include <cpu_memory_desc_utils.h>
 
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -88,19 +89,20 @@ void MKLDNNLrnNode::getSupportedDescriptors() {
         precision = InferenceEngine::Precision::FP32;
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
-    auto parentDims = getParentEdgeAt(0)->getDims();
+    const auto &parentShape = getParentEdgeAt(0)->getShape();
+    const auto parentStaticDims = parentShape.getStaticDims();
 
-    for (auto format : getAvailableFormatsForDims(parentDims)) {
-        MKLDNNMemoryDesc in_candidate(parentDims, inputDataType, format);
-        createDescriptor({in_candidate}, {});
+    for (auto format : getAvailableFormatsForDims(parentShape)) {
+        auto in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(parentStaticDims, inputDataType, format);
+        createDescriptor({in_candidate.get()}, {});
     }
 }
 
-MKLDNNMemoryDesc MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
+std::unique_ptr<MKLDNNMemoryDesc> MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
     if (idx > 0) {
-        return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(getOriginalInputPrecisions()[idx],
-                                                            getParentEdgeAt(idx)->getDims().ToSizeVector(),
-                                                            TensorDesc::getLayoutByDims(getParentEdgeAt(idx)->getDims().ToSizeVector())));
+        return MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(idx)->getShape().getStaticDims(),
+                                             MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[idx]),
+                                             MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(idx)->getShape().getRank()));
     } else {
         return MKLDNNNode::getSrcMemDesc(primitive_desc_it, idx);
     }
@@ -123,12 +125,12 @@ bool MKLDNNLrnNode::created() const {
     return getType() == Lrn;
 }
 
-void MKLDNNLrnNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
-                                     const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
+void MKLDNNLrnNode::createDescriptor(const std::vector<const MemoryDesc*> &inputDesc,
+                                     const std::vector<const MemoryDesc*> &outputDesc) {
     mkldnn::algorithm alg = isAcrossMaps ? mkldnn::algorithm::lrn_across_channels : mkldnn::algorithm::lrn_within_channel;
-    MKLDNNMemoryDesc in_candidate(inputDesc[0]);
     MKLDNNDescriptor desc(std::shared_ptr<mkldnn::lrn_forward::desc>(
-            new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, in_candidate, size, alpha, beta, k)));
+            new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]),
+                                          size, alpha, beta, k)));
     descs.push_back(desc);
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h
index 53cfaa79682..295d16b369c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h
@@ -17,12 +17,12 @@ public:
     MKLDNNLrnNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
 
     void getSupportedDescriptors() override;
-    void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
-                          const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
+    void createDescriptor(const std::vector<const MemoryDesc*>& inputDesc,
+                          const std::vector<const MemoryDesc*>& outputDesc) override;
     size_t descInputNumbers(MKLDNNDescriptor desc) override {
         return static_cast<size_t>(getOriginalInputsNumber());
     }
-    MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
+    std::unique_ptr<MKLDNNMemoryDesc> getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
     void createPrimitive() override;
     bool created() const override;
     bool canBeInPlace() const override {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp
index 908686bf6df..fed1158f97e 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp
@@ -49,18 +49,18 @@ void MKLDNNMathNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    std::vector<DataConfigurator> inDataConf;
+    std::vector<PortConfigurator> inDataConf;
     inDataConf.reserve(getOriginalInputsNumber());
     for (int i = 0; i < getOriginalInputsNumber(); ++i)
-        inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32);
+        inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32);
 
     addSupportedPrimDesc(inDataConf,
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
 void MKLDNNMathNode::execute(mkldnn::stream strm) {
-    size_t dataSize = getChildEdgeAt(0)->getBlob()->size();
+    size_t dataSize = getChildEdgeAt(0)->getShape().getElementsCount();
     const float *src_data = reinterpret_cast<const float *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     float* dst_data = reinterpret_cast<float *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp
index b7f2c0a4277..a0a7f7eafa4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp
@@ -55,8 +55,8 @@ MKLDNNMatMulNode::MKLDNNMatMulNode(const std::shared_ptr<ngraph::Node>& op, cons
         errorPrefix = "Gemm node with name '" + getName() + "'";
 
         const auto matMul = std::dynamic_pointer_cast<const ngraph::opset1::MatMul>(op);
-        alpha = 1;
-        beta = 1;
+        alpha = 1.f;
+        beta = 0.f;
         transposeA = matMul->get_transpose_a();
         transposeB = matMul->get_transpose_b();
     } else {
@@ -70,14 +70,14 @@ void MKLDNNMatMulNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW()  << errorPrefix << " has incorrect number of output edges for layer " << getName();
 
-    auto inDims0 = getParentEdgeAt(0)->getDims();
-    auto inDims1 = getParentEdgeAt(1)->getDims();
-    auto outDims = getChildEdgeAt(0)->getDims();
+    auto inDims0 = getParentEdgeAt(0)->getShape().getStaticDims();
+    auto inDims1 = getParentEdgeAt(1)->getShape().getStaticDims();
+    auto outDims = getChildEdgeAt(0)->getShape().getStaticDims();
 
-    if (inDims0.ndims() != inDims1.ndims() || inDims0.ndims() != outDims.ndims())
+    if (inDims0.size() != inDims1.size() || inDims0.size() != outDims.size())
         IE_THROW()  << errorPrefix << " has invalid dims count";
 
-    int nDims = inDims0.ndims();
+    int nDims = inDims0.size();
     xAxis = nDims - 1;
     yAxis = nDims - 2;
     auto xAxis0 = transposeA ? yAxis : xAxis;
@@ -135,22 +135,22 @@ void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() {
     auto inputDataType1 = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrec1);
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = true;
 
-    auto createDataConfig = [](const MKLDNNDims& dims, memory::data_type dataType) -> InferenceEngine::DataConfig {
-        InferenceEngine::DataConfig dataConfig;
+    auto createDataConfig = [](const std::vector<size_t>& dims, memory::data_type dataType) -> PortConfig {
+        PortConfig dataConfig;
         dataConfig.inPlace = -1;
         dataConfig.constant = false;
-        dataConfig.desc = MKLDNNMemoryDesc(dims, dataType, MKLDNNMemory::GetPlainFormat(dims));
+        dataConfig.desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(dims, dataType, MKLDNNMemory::GetPlainFormatByRank(dims.size()));
         return dataConfig;
     };
 
-    config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getDims(), inputDataType0));
-    config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getDims(), inputDataType1));
-    config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getDims(), outputDataType));
+    config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType0));
+    config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getShape().getStaticDims(), inputDataType1));
+    config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType));
 
-    supportedPrimitiveDescriptors.push_back(PrimitiveDescInfo(config, impl_desc_type::gemm_any, MKLDNNMemory::GetPlainFormat(getChildEdgeAt(0)->getDims())));
+    supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::gemm_any);
 }
 
 void MKLDNNMatMulNode::initOptimalPrimitiveDescriptor() {
@@ -158,8 +158,9 @@ void MKLDNNMatMulNode::initOptimalPrimitiveDescriptor() {
     if (selected_pd == nullptr)
         IE_THROW()  << errorPrefix << " did not set preferable primitive descriptor";
     auto config = selected_pd->getConfig();
-    if (isInitConfig(config))
-        return;
+
+     if (isConfigDefined(config))
+         return;
 
     MKLDNNNode::initOptimalPrimitiveDescriptor();
 
@@ -179,6 +180,34 @@ void MKLDNNMatMulNode::createPrimitive() {
         IE_THROW()  << errorPrefix << " did not allocate input memory";
     if (getSelectedPrimitiveDescriptor() == nullptr)
         IE_THROW()  << errorPrefix << " did not set preferable primitive descriptor";
+
+    auto inDims0 = src0MemPtr->GetDims();
+    auto outDims = dstMemPtr->GetDims();
+
+    params.src0_mem_ptr = src0MemPtr;
+    params.src1_mem_ptr = src1MemPtr;
+    params.dst_mem_ptr = dstMemPtr;
+
+    params.ndims = outDims.size();
+
+    params.MB1 = 1;
+    params.MB2 = outDims.size() > 3 ? outDims[params.ndims - 3] : 1;
+
+    params.M = outDims[yAxis];
+    params.N = outDims[xAxis];
+    params.K = transposeA ? inDims0[yAxis] : inDims0[xAxis];
+
+    params.transa = transposeA ? 'T' : 'N';
+    params.transb = transposeB ? 'T' : 'N';
+
+    params.lda = transposeA ? params.M : params.K;
+    params.ldb = transposeB ? params.K : params.N;
+    params.ldc = params.N;
+
+    params.shift1 = params.M * params.N * params.MB2;
+    params.shift2 = params.M * params.N;
+
+    runtimePrecision = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision();
 }
 
 inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const float *A, int lda,
@@ -212,67 +241,57 @@ inline void process_gemm(char transa, char transb, int M, int N, int K, float al
 }
 
 template<typename T0, typename T1>
-void MKLDNNMatMulNode::process_data() {
-    auto inDims0 = getParentEdgeAt(0)->getDims();
-    auto inDims1 = getParentEdgeAt(1)->getDims();
-    auto outDims = getChildEdgeAt(0)->getDims();
+inline void MKLDNNMatMulNode::process_data() {
+    const T0* src0_ptr = reinterpret_cast<const T0*>(params.src0_mem_ptr->GetPtr());
+    const T1* src1_ptr = reinterpret_cast<const T1*>(params.src1_mem_ptr->GetPtr());
+    float* dst_ptr = reinterpret_cast<float*>(params.dst_mem_ptr->GetPtr());
 
-    auto& srcMemory0 = getParentEdgeAt(0)->getMemory();
-    auto& srcMemory1 = getParentEdgeAt(1)->getMemory();
-    auto& dstMemory0 = getChildEdgeAt(0)->getMemory();
+    const int MB = batchToProcess();
+    if (params.ndims == 4) {
+        params.MB1 = MB;
+    } else if (params.ndims == 3) {
+        params.shift1 = params.shift1 * MB / params.MB2;
+        params.MB2 = MB;
+    }
 
-    const T0 *src0_ptr = reinterpret_cast<const T0*>(srcMemory0.GetPtr());
-    const T1 *src1_ptr = reinterpret_cast<const T1*>(srcMemory1.GetData());
-    float *dst_ptr = reinterpret_cast<float*>(dstMemory0.GetData());
-
-    int MB1 = outDims.ndims() == 4 ? batchToProcess() : 1;
-    int MB2 = outDims.ndims() == 3 ? batchToProcess() : outDims.ndims() > 3 ? outDims[outDims.ndims() - 3] : 1;
-    int M = outDims[yAxis];
-    int N = outDims[xAxis];
-    int K = transposeA ? inDims0[yAxis] : inDims0[xAxis];
-
-    const char transa = transposeA ? 'T' : 'N';
-    const char transb = transposeB ? 'T' : 'N';
-
-    int lda = transposeA ? M : K;
-    int ldb = transposeB ? K : N;
-    int ldc = N;
-
-    beta = 0.f;
-
-    for (int b1 = 0; b1 < MB1; b1++) {
+    for (int b1 = 0; b1 < params.MB1; ++b1) {
         const T0 *a_ptr = src0_ptr;
         const T1 *b_ptr = src1_ptr;
         float *d_ptr = dst_ptr;
 
-        for (int b2 = 0; b2 < MB2; b2++) {
-            process_gemm(transa, transb, M, N, K, alpha, a_ptr, lda, b_ptr, ldb, beta, d_ptr, ldc);
+        for (int b2 = 0; b2 < params.MB2; ++b2) {
+            process_gemm(params.transa, params.transb, params.M, params.N, params.K,
+                         alpha, a_ptr, params.lda, b_ptr, params.ldb, beta, d_ptr, params.ldc);
 
             a_ptr += aOffsets[0];
             b_ptr += bOffsets[0];
-            d_ptr += M * N;
+            d_ptr += params.shift2;
         }
 
         src0_ptr += aOffsets[1];
         src1_ptr += bOffsets[1];
-        dst_ptr += MB2 * M * N;
+        dst_ptr += params.shift1;
     }
 }
 
 void MKLDNNMatMulNode::execute(mkldnn::stream strm) {
-    switch (getParentEdgeAt(0)->getDesc().getPrecision()) {
-        case Precision::FP32:
+    switch (runtimePrecision) {
+        case Precision::FP32: {
             process_data<float, float>();
             break;
-        case Precision::BF16:
+        }
+        case Precision::BF16: {
             process_data<uint16_t, uint16_t>();
             break;
-        case Precision::I8:
+        }
+        case Precision::I8: {
             process_data<int8_t, int8_t>();
             break;
-        case Precision::U8:
+        }
+        case Precision::U8: {
             process_data<uint8_t, int8_t>();
             break;
+        }
         default:
             IE_THROW()  << errorPrefix << " has incorrect precision on first input";
     }
@@ -283,13 +302,13 @@ bool MKLDNNMatMulNode::created() const {
 }
 
 int MKLDNNMatMulNode::getMaxBatch() {
-    if (!outDims.empty())
-        return outDims[0][0];
+    if (!outputShapes.empty())
+        return outputShapes[0].getStaticDims()[0];
     return 0;
 }
 
 InferenceEngine::Precision MKLDNNMatMulNode::getRuntimePrecision() const {
-    return MKLDNNExtensionUtils::getMaxPrecision(getInputPrecisions());
+    return getMaxPrecision(getInputPrecisions());
 }
 
 REG_MKLDNN_PRIM_FOR(MKLDNNMatMulNode, MatMul);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h
index 6196665aabc..3f056cc9953 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h
@@ -28,8 +28,8 @@ public:
     static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
 
 private:
-    float alpha = 1.0f;
-    float beta = 1.0f;
+    float alpha = 1.f;
+    float beta = 0.f;
     bool transposeA = false;
     bool transposeB = false;
 
@@ -40,9 +40,36 @@ private:
     std::vector<int> bOffsets;
     std::vector<int> cOffsets;
 
-    template<typename T0, typename T1> void process_data();
+    InferenceEngine::Precision runtimePrecision;
+
+    template<typename T0, typename T1> inline void process_data();
 
     std::string errorPrefix;
+
+    struct {
+        MKLDNNMemoryPtr src0_mem_ptr = nullptr;
+        MKLDNNMemoryPtr src1_mem_ptr = nullptr;
+        MKLDNNMemoryPtr dst_mem_ptr = nullptr;
+
+        char transa = 'N';
+        char transb = 'N';
+
+        int MB1 = 1;
+        int MB2 = 1;
+
+        int M = 0;
+        int N = 0;
+        int K = 0;
+
+        int lda = 0;
+        int ldb = 0;
+        int ldc = 0;
+
+        int shift1 = 0;
+        int shift2 = 0;
+
+        size_t ndims = 0;
+    } params;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp
new file mode 100644
index 00000000000..ade776e8ce3
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp
@@ -0,0 +1,383 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mkldnn_matrix_nms_node.h"
+
+#include <algorithm>
+#include <chrono>
+#include <cmath>
+#include <string>
+#include <vector>
+
+#include "ie_parallel.hpp"
+#include "ngraph/opsets/opset8.hpp"
+#include "ngraph_ops/nms_static_shape_ie.hpp"
+#include "utils/general_utils.h"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+using MatrixNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE<ngraph::op::v8::MatrixNms>;
+
+using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType;
+using ngNmseDcayFunction = ngraph::op::v8::MatrixNms::DecayFunction;
+
+bool MKLDNNMatrixNmsNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto nms = std::dynamic_pointer_cast<const MatrixNmsIEInternal>(op);
+        if (!nms) {
+            errorMessage = "Only internal MatrixNms operation is supported";
+            return false;
+        }
+        const auto& attrs = nms->get_attrs();
+        const auto& sortType = attrs.sort_result_type;
+        if (!one_of(sortType, ngNmsSortResultType::NONE, ngNmsSortResultType::SCORE, ngNmsSortResultType::CLASSID)) {
+            errorMessage = "Does not support SortResultType mode: " + ngraph::as_string(sortType);
+            return false;
+        }
+        const auto& decayType = attrs.decay_function;
+        if (!one_of(decayType, ngNmseDcayFunction::LINEAR, ngNmseDcayFunction::GAUSSIAN)) {
+            errorMessage = "Does not support DcayFunction " + ngraph::as_string(decayType);
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNMatrixNmsNode::MKLDNNMatrixNmsNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache)
+    : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
+    errorPrefix = "MatrixNMS layer with name '" + getName() + "' ";
+    const auto matrix_nms = std::dynamic_pointer_cast<const MatrixNmsIEInternal>(op);
+
+    if (getOriginalInputsNumber() != 2)
+        IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();
+
+    if (getOriginalOutputsNumber() != 3)
+        IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber();
+
+    const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims();
+    const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims();
+    if (!(boxes_dims[0] == scores_dims[0] && boxes_dims[1] == scores_dims[2])) {
+        IE_THROW() << errorPrefix << "has incompatible 'boxes' and 'scores' input dmensions";
+    }
+
+    m_numBatches = boxes_dims[0];
+    m_numBoxes = boxes_dims[1];
+    if (boxes_dims.size() != 3)
+        IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
+    if (boxes_dims[2] != 4)
+        IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
+
+    m_numClasses = scores_dims[1];
+    if (scores_dims.size() != 3)
+        IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
+
+    if (m_numBatches != scores_dims[0])
+        IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs";
+    if (m_numBoxes != scores_dims[2])
+        IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs";
+    auto& attrs = matrix_nms->get_attrs();
+    if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::CLASSID)
+        m_sortResultType = MatrixNmsSortResultType::CLASSID;
+    else if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::SCORE)
+        m_sortResultType = MatrixNmsSortResultType::SCORE;
+    else if (attrs.sort_result_type == ngraph::op::util::NmsBase::SortResultType::NONE)
+        m_sortResultType = MatrixNmsSortResultType::NONE;
+
+    if (attrs.decay_function == ngraph::op::v8::MatrixNms::DecayFunction::GAUSSIAN)
+        m_decayFunction = GAUSSIAN;
+    else if (attrs.decay_function == ngraph::op::v8::MatrixNms::DecayFunction::LINEAR)
+        m_decayFunction = LINEAR;
+
+    m_sortResultAcrossBatch = attrs.sort_result_across_batch;
+    m_scoreThreshold = attrs.score_threshold;
+    m_nmsTopk = attrs.nms_top_k;
+    m_keepTopk = attrs.keep_top_k;
+    m_backgroundClass = attrs.background_class;
+
+    m_gaussianSigma = attrs.gaussian_sigma;
+    m_postThreshold = attrs.post_threshold;
+    m_normalized = attrs.normalized;
+    int64_t max_output_boxes_per_class = 0;
+    size_t real_num_classes = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1;
+    if (m_nmsTopk >= 0)
+        max_output_boxes_per_class = std::min(m_numBoxes, static_cast<size_t>(m_nmsTopk));
+    else
+        max_output_boxes_per_class = m_numBoxes;
+
+    m_maxBoxesPerBatch = max_output_boxes_per_class * real_num_classes;
+    if (m_keepTopk >= 0)
+        m_maxBoxesPerBatch = std::min(m_maxBoxesPerBatch, static_cast<size_t>(m_keepTopk));
+}
+
+void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    m_realNumClasses = m_backgroundClass == -1 ? m_numClasses : m_numClasses - 1;
+    m_realNumBoxes = m_nmsTopk == -1 ? m_numBoxes : std::min(m_nmsTopk, static_cast<int>(m_numBoxes));
+    m_numPerBatch.resize(m_numBatches);
+    m_filteredBoxes.resize(m_numBatches * m_realNumClasses * m_realNumBoxes);
+    m_numPerBatchClass.resize(m_numBatches, std::vector<int64_t>(m_numClasses, 0));
+    m_classOffset.resize(m_numClasses, 0);
+
+    for (size_t i = 0, count = 0; i < m_numClasses; i++) {
+        if (i == m_backgroundClass)
+            continue;
+        m_classOffset[i] = (count++) * m_realNumBoxes;
+    }
+
+    if (m_decayFunction == MatrixNmsDecayFunction::LINEAR) {
+        m_decay_fn = [](float iou, float max_iou, float sigma) -> float {
+            return (1. - iou) / (1. - max_iou + 1e-10f);
+        };
+    } else {
+        m_decay_fn = [](float iou, float max_iou, float sigma) -> float {
+            return std::exp((max_iou * max_iou - iou * iou) * sigma);
+        };
+    }
+
+    const std::vector<Precision> supportedFloatPrecision = {Precision::FP32};
+    const std::vector<Precision> supportedIntOutputPrecision = {Precision::I32, Precision::I64};
+
+    checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType);
+
+    checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType);
+
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_INDICES), supportedIntOutputPrecision, "selected_indices", outType);
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTED_OUTPUTS), supportedFloatPrecision, "selected_outputs", outType);
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_VALID_OUTPUTS), supportedIntOutputPrecision, "valid_outputs", outType);
+
+    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32}},
+                         impl_desc_type::ref_any);
+}
+
+bool MKLDNNMatrixNmsNode::created() const {
+    return getType() == MatrixNms;
+}
+
+namespace {
+
+static inline float boxArea(const float* bbox, const bool normalized) {
+    if (bbox[2] < bbox[0] || bbox[3] < bbox[1]) {
+        return static_cast<float>(0.);
+    } else {
+        const float width = bbox[2] - bbox[0];
+        const float height = bbox[3] - bbox[1];
+        if (normalized) {
+            return width * height;
+        } else {
+            return (width + 1) * (height + 1);
+        }
+    }
+}
+
+static inline float intersectionOverUnion(const float* bbox1, const float* bbox2, const bool normalized) {
+    if (bbox2[0] > bbox1[2] || bbox2[2] < bbox1[0] || bbox2[1] > bbox1[3] || bbox2[3] < bbox1[1]) {
+        return static_cast<float>(0.);
+    } else {
+        const float xMin = std::max(bbox1[0], bbox2[0]);
+        const float yMin = std::max(bbox1[1], bbox2[1]);
+        const float xMax = std::min(bbox1[2], bbox2[2]);
+        const float yMax = std::min(bbox1[3], bbox2[3]);
+        float norm = normalized ? static_cast<float>(0.) : static_cast<float>(1.);
+        float width = xMax - xMin + norm;
+        float height = yMax - yMin + norm;
+        const float interArea = width * height;
+        const float bbox1Area = boxArea(bbox1, normalized);
+        const float bbox2Area = boxArea(bbox2, normalized);
+        return interArea / (bbox1Area + bbox2Area - interArea);
+    }
+}
+}  // namespace
+
+size_t MKLDNNMatrixNmsNode::nmsMatrix(const float* boxesData, const float* scoresData, BoxInfo* filterBoxes, const int64_t batchIdx, const int64_t classIdx) {
+    std::vector<int32_t> candidateIndex(m_numBoxes);
+    std::iota(candidateIndex.begin(), candidateIndex.end(), 0);
+    auto end = std::remove_if(candidateIndex.begin(), candidateIndex.end(), [&scoresData, this](int32_t idx) {
+        return scoresData[idx] <= m_scoreThreshold;
+    });
+    int64_t numDet = 0;
+    int64_t originalSize = std::distance(candidateIndex.begin(), end);
+    if (originalSize <= 0) {
+        return 0;
+    }
+    if (m_nmsTopk > -1 && originalSize > m_nmsTopk) {
+        originalSize = m_nmsTopk;
+    }
+
+    std::partial_sort(candidateIndex.begin(), candidateIndex.begin() + originalSize, end, [&scoresData](int32_t a, int32_t b) {
+        return scoresData[a] > scoresData[b];
+    });
+
+    std::vector<float> iouMatrix((originalSize * (originalSize - 1)) >> 1);
+    std::vector<float> iouMax(originalSize);
+
+    iouMax[0] = 0.;
+    InferenceEngine::parallel_for(originalSize - 1, [&](size_t i) {
+        float max_iou = 0.;
+        size_t actual_index = i + 1;
+        auto idx_a = candidateIndex[actual_index];
+        for (int64_t j = 0; j < actual_index; j++) {
+            auto idx_b = candidateIndex[j];
+            auto iou = intersectionOverUnion(boxesData + idx_a * 4, boxesData + idx_b * 4, m_normalized);
+            max_iou = std::max(max_iou, iou);
+            iouMatrix[actual_index * (actual_index - 1) / 2 + j] = iou;
+        }
+        iouMax[actual_index] = max_iou;
+    });
+
+    if (scoresData[candidateIndex[0]] > m_postThreshold) {
+        auto box_index = candidateIndex[0];
+        auto box = boxesData + box_index * 4;
+        filterBoxes[0].box.x1 = box[0];
+        filterBoxes[0].box.y1 = box[1];
+        filterBoxes[0].box.x2 = box[2];
+        filterBoxes[0].box.y2 = box[3];
+        filterBoxes[0].index = batchIdx * m_numBoxes + box_index;
+        filterBoxes[0].score = scoresData[candidateIndex[0]];
+        filterBoxes[0].batchIndex = batchIdx;
+        filterBoxes[0].classIndex = classIdx;
+        numDet++;
+    }
+
+    for (int64_t i = 1; i < originalSize; i++) {
+        float minDecay = 1.;
+        for (int64_t j = 0; j < i; j++) {
+            auto maxIou = iouMax[j];
+            auto iou = iouMatrix[i * (i - 1) / 2 + j];
+            auto decay = m_decay_fn(iou, maxIou, m_gaussianSigma);
+            minDecay = std::min(minDecay, decay);
+        }
+        auto ds = minDecay * scoresData[candidateIndex[i]];
+        if (ds <= m_postThreshold)
+            continue;
+        auto boxIndex = candidateIndex[i];
+        auto box = boxesData + boxIndex * 4;
+        filterBoxes[numDet].box.x1 = box[0];
+        filterBoxes[numDet].box.y1 = box[1];
+        filterBoxes[numDet].box.x2 = box[2];
+        filterBoxes[numDet].box.y2 = box[3];
+        filterBoxes[numDet].index = batchIdx * m_numBoxes + boxIndex;
+        filterBoxes[numDet].score = ds;
+        filterBoxes[numDet].batchIndex = batchIdx;
+        filterBoxes[numDet].classIndex = classIdx;
+        numDet++;
+    }
+    return numDet;
+}
+
+void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) {
+    const float* boxes = reinterpret_cast<const float*>(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr());
+    const float* scores = reinterpret_cast<const float*>(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr());
+
+    InferenceEngine::parallel_for2d(m_numBatches, m_numClasses, [&](size_t batchIdx, size_t classIdx) {
+        if (classIdx == m_backgroundClass) {
+            m_numPerBatchClass[batchIdx][classIdx] = 0;
+            return;
+        }
+        const float* boxesPtr = boxes + batchIdx * m_numBoxes * 4;
+        const float* scoresPtr = scores + batchIdx * (m_numClasses * m_numBoxes) + classIdx * m_numBoxes;
+        size_t classNumDet = 0;
+        size_t batchOffset = batchIdx * m_realNumClasses * m_realNumBoxes;
+        classNumDet = nmsMatrix(boxesPtr, scoresPtr, m_filteredBoxes.data() + batchOffset + m_classOffset[classIdx], batchIdx, classIdx);
+        m_numPerBatchClass[batchIdx][classIdx] = classNumDet;
+    });
+
+    InferenceEngine::parallel_for(m_numBatches, [&](size_t batchIdx) {
+        size_t batchOffset = batchIdx * m_realNumClasses * m_realNumBoxes;
+        BoxInfo* batchFilteredBox = m_filteredBoxes.data() + batchOffset;
+        auto& numPerClass = m_numPerBatchClass[batchIdx];
+        auto numDet = std::accumulate(numPerClass.begin(), numPerClass.end(), 0);
+        auto start_offset = numPerClass[0];
+
+        for (size_t i = 1; i < numPerClass.size(); i++) {
+            auto offset_class = m_classOffset[i];
+            for (size_t j = 0; j < numPerClass[i]; j++) {
+                batchFilteredBox[start_offset + j] = batchFilteredBox[offset_class + j];
+            }
+            start_offset += numPerClass[i];
+        }
+        auto keepNum = numDet;
+        if (m_keepTopk > -1) {
+            auto k = static_cast<size_t>(m_keepTopk);
+            if (keepNum > k)
+                keepNum = k;
+        }
+
+        std::partial_sort(batchFilteredBox, batchFilteredBox + keepNum, batchFilteredBox + numDet, [](const BoxInfo& lhs, const BoxInfo rhs) {
+            return lhs.score > rhs.score || (lhs.score == rhs.score && lhs.classIndex < rhs.classIndex) ||
+                   (lhs.score == rhs.score && lhs.classIndex == rhs.classIndex && lhs.index < rhs.index);
+        });
+        m_numPerBatch[batchIdx] = keepNum;
+    });
+
+    auto startOffset = m_numPerBatch[0];
+    for (size_t i = 1; i < m_numPerBatch.size(); i++) {
+        auto offset_batch = i * m_realNumClasses * m_realNumBoxes;
+        for (size_t j = 0; j < m_numPerBatch[i]; j++) {
+            m_filteredBoxes[startOffset + j] = m_filteredBoxes[offset_batch + j];
+        }
+        startOffset += m_numPerBatch[i];
+    }
+
+    if (m_sortResultAcrossBatch) { /* sort across batch */
+        if (m_sortResultType == MatrixNmsSortResultType::SCORE) {
+            parallel_sort(m_filteredBoxes.begin(), m_filteredBoxes.begin() + startOffset, [](const BoxInfo& l, const BoxInfo& r) {
+                return (l.score > r.score) || (l.score == r.score && l.batchIndex < r.batchIndex) ||
+                       (l.score == r.score && l.batchIndex == r.batchIndex && l.classIndex < r.classIndex) ||
+                       (l.score == r.score && l.batchIndex == r.batchIndex && l.classIndex == r.classIndex && l.index < r.index);
+            });
+        } else if (m_sortResultType == MatrixNmsSortResultType::CLASSID) {
+            parallel_sort(m_filteredBoxes.begin(), m_filteredBoxes.begin() + startOffset, [](const BoxInfo& l, const BoxInfo& r) {
+                return (l.classIndex < r.classIndex) || (l.classIndex == r.classIndex && l.batchIndex < r.batchIndex) ||
+                       (l.classIndex == r.classIndex && l.batchIndex == r.batchIndex && l.score > r.score) ||
+                       (l.classIndex == r.classIndex && l.batchIndex == r.batchIndex && l.score == r.score && l.index < r.index);
+            });
+        }
+    }
+
+    float* selectedOutputs = reinterpret_cast<float*>(getChildEdgesAtPort(NMS_SELECTED_OUTPUTS)[0]->getMemoryPtr()->GetPtr());
+    int* selectedIndices = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_SELECTED_INDICES)[0]->getMemoryPtr()->GetPtr());
+    int* validOutputs = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_VALID_OUTPUTS)[0]->getMemoryPtr()->GetPtr());
+    std::copy(m_numPerBatch.begin(), m_numPerBatch.end(), validOutputs);
+
+    int64_t outputOffset = 0;
+    int64_t originalOffset = 0;
+    for (size_t i = 0; i < m_numBatches; i++) {
+        auto real_boxes = m_numPerBatch[i];
+        for (size_t j = 0; j < real_boxes; j++) {
+            auto originalIndex = originalOffset + j;
+            selectedIndices[j + outputOffset] = static_cast<int>(m_filteredBoxes[originalIndex].index);
+            auto selectedBase = selectedOutputs + (outputOffset + j) * 6;
+            selectedBase[0] = m_filteredBoxes[originalIndex].classIndex;
+            selectedBase[1] = m_filteredBoxes[originalIndex].score;
+            selectedBase[2] = m_filteredBoxes[originalIndex].box.x1;
+            selectedBase[3] = m_filteredBoxes[originalIndex].box.y1;
+            selectedBase[4] = m_filteredBoxes[originalIndex].box.x2;
+            selectedBase[5] = m_filteredBoxes[originalIndex].box.y2;
+        }
+        std::fill_n(selectedOutputs + (outputOffset + real_boxes) * 6, (m_maxBoxesPerBatch - real_boxes) * 6, -1);
+        std::fill_n(selectedIndices + (outputOffset + real_boxes), m_maxBoxesPerBatch - real_boxes, -1);
+        outputOffset += m_maxBoxesPerBatch;
+        originalOffset += real_boxes;
+    }
+}
+
+void MKLDNNMatrixNmsNode::checkPrecision(const Precision prec, const std::vector<Precision> precList, const std::string name, const std::string type) {
+    if (std::find(precList.begin(), precList.end(), prec) == precList.end())
+        IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNMatrixNmsNode, MatrixNms);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h
new file mode 100644
index 00000000000..5d85a366952
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h
@@ -0,0 +1,100 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+enum MatrixNmsSortResultType {
+    CLASSID,  // sort selected boxes by class id (ascending) in each batch element
+    SCORE,    // sort selected boxes by score (descending) in each batch element
+    NONE      // do not guarantee the order in each batch element
+};
+
+enum MatrixNmsDecayFunction { GAUSSIAN, LINEAR };
+
+class MKLDNNMatrixNmsNode : public MKLDNNNode {
+public:
+    MKLDNNMatrixNmsNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache);
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    // input
+    static const size_t NMS_BOXES = 0;
+    static const size_t NMS_SCORES = 1;
+
+    // output
+    static const size_t NMS_SELECTED_OUTPUTS = 0;
+    static const size_t NMS_SELECTED_INDICES = 1;
+    static const size_t NMS_VALID_OUTPUTS = 2;
+
+    size_t m_numBatches;
+    size_t m_numBoxes;
+    size_t m_numClasses;
+    size_t m_maxBoxesPerBatch;
+
+    MatrixNmsSortResultType m_sortResultType;
+    bool m_sortResultAcrossBatch;
+    float m_scoreThreshold;
+    int m_nmsTopk;
+    int m_keepTopk;
+    int m_backgroundClass;
+    MatrixNmsDecayFunction m_decayFunction;
+    float m_gaussianSigma;
+    float m_postThreshold;
+    bool m_normalized;
+
+    struct Rectangle {
+        Rectangle(float x_left, float y_left, float x_right, float y_right) : x1 {x_left}, y1 {y_left}, x2 {x_right}, y2 {y_right} {}
+
+        Rectangle() = default;
+
+        float x1 = 0.0f;
+        float y1 = 0.0f;
+        float x2 = 0.0f;
+        float y2 = 0.0f;
+    };
+
+    struct BoxInfo {
+        BoxInfo(const Rectangle& r, int64_t idx, float sc, int64_t batch_idx, int64_t class_idx)
+            : box {r}, index {idx}, batchIndex {batch_idx}, classIndex {class_idx}, score {sc} {}
+
+        BoxInfo() = default;
+
+        Rectangle box;
+        int64_t index = -1;
+        int64_t batchIndex = -1;
+        int64_t classIndex = -1;
+        float score = 0.0f;
+    };
+    std::string errorPrefix;
+    const std::string inType = "input", outType = "output";
+    std::vector<int64_t> m_numPerBatch;
+    std::vector<std::vector<int64_t>> m_numPerBatchClass;
+    std::vector<BoxInfo> m_filteredBoxes;
+    std::vector<int> m_classOffset;
+    size_t m_realNumClasses;
+    size_t m_realNumBoxes;
+    float (*m_decay_fn)(float, float, float);
+    void checkPrecision(const InferenceEngine::Precision prec, const std::vector<InferenceEngine::Precision> precList, const std::string name,
+                        const std::string type);
+
+    size_t nmsMatrix(const float* boxesData, const float* scoresData, BoxInfo* filterBoxes, const int64_t batchIdx, const int64_t classIdx);
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
index 4aa0281a114..3218bc54eb0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
@@ -60,13 +60,14 @@ void MKLDNNMemoryOutputNode::initSupportedPrimitiveDescriptors() {
 
     InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = true;
     config.inConfs.resize(1);
     config.inConfs[0].inPlace = -1;
     config.inConfs[0].constant = false;
-    config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims()));
-    supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, memory::format_tag::any);
+    config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType,
+                                                           MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(0)->getShape().getRank()));
+    supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
 }
 
 void MKLDNNMemoryOutputNode::execute(mkldnn::stream strm)  {
@@ -105,8 +106,7 @@ MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const std::shared_ptr<ngraph::Node>
 void MKLDNNMemoryInputNode::createPrimitive() {
     MKLDNNInputNode::createPrimitive();
 
-    auto mem_desc = getChildEdgeAt(0)->getMemoryPtr()->GetDescriptor();
-    dataStore->Create(mem_desc);
+    dataStore->Create(getChildEdgeAt(0)->getMemory().GetDesc());
 
     // default memory state is zero filled
     dataStore->FillZero();
@@ -119,7 +119,7 @@ void MKLDNNMemoryInputNode::createPrimitive() {
  * @param src source memory object
  */
 inline
-static void simple_copy(MKLDNNMemory& dst, const MKLDNNMemory& src) {
+static void simple_copy(const MKLDNNMemory& dst, const MKLDNNMemory& src) {
     auto srcPtr = static_cast<uint8_t*>(src.GetPtr());
     auto dstPtr = static_cast<uint8_t*>(dst.GetPtr());
     auto srcSizeInByte = src.GetSize();
@@ -146,11 +146,10 @@ void MKLDNNMemoryInputNode::storeState(const MKLDNNMemory &new_state) {
 }
 
 void MKLDNNMemoryInputNode::execute(mkldnn::stream strm) {
-    auto dst_mem = getChildEdgeAt(0)->getMemory();
     // TODO: Should be simple call of:
     //           dst_mem.SetData(dataStore, false);
     //       But because of performance reason we use simple manual copy
-    simple_copy(dst_mem, *dataStore);
+    simple_copy(getChildEdgeAt(0)->getMemory(), *dataStore);
 }
 
 MKLDNNMemoryNodeVirtualEdge::Holder* MKLDNNMemoryNodeVirtualEdge::registerInput(MKLDNNMemoryInputNode * node) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp
new file mode 100644
index 00000000000..64dccbdaeab
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp
@@ -0,0 +1,413 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mkldnn_multiclass_nms.hpp"
+
+#include <algorithm>
+#include <cassert>
+#include <chrono>
+#include <cmath>
+#include <ie_ngraph_utils.hpp>
+#include <ngraph_ops/nms_static_shape_ie.hpp>
+#include <queue>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "ie_parallel.hpp"
+#include "utils/general_utils.h"
+
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType;
+using MulticlassNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE<ngraph::op::v8::MulticlassNms>;
+
+bool MKLDNNMultiClassNmsNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        const auto nms = std::dynamic_pointer_cast<const MulticlassNmsIEInternal>(op);
+        if (!nms) {
+            errorMessage = "Only internal MulitClassNonMaxSuppression operation is supported";
+            return false;
+        }
+        const auto& atrri = nms->get_attrs();
+        const auto& sortType = atrri.sort_result_type;
+        if (!one_of(sortType, ngNmsSortResultType::NONE, ngNmsSortResultType::SCORE, ngNmsSortResultType::CLASSID)) {
+            errorMessage = "Does not support SortResultType mode: " + ngraph::as_string(sortType);
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNMultiClassNmsNode::MKLDNNMultiClassNmsNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache)
+    : MKLDNNNode(op, eng, cache) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+    errorPrefix = "MultiClassNms layer with name '" + getName() + "' ";
+    const auto nms = std::dynamic_pointer_cast<const MulticlassNmsIEInternal>(op);
+
+    if (getOriginalInputsNumber() != 2)
+        IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber();
+
+    if (getOriginalOutputsNumber() != 3)
+        IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber();
+
+    auto& atrri = nms->get_attrs();
+    sort_result_across_batch = atrri.sort_result_across_batch;
+    max_output_boxes_per_class = atrri.nms_top_k;
+    iou_threshold = atrri.iou_threshold;
+    score_threshold = atrri.score_threshold;
+    background_class = atrri.background_class;
+    keep_top_k = atrri.keep_top_k;
+    if (atrri.sort_result_type == ngNmsSortResultType::CLASSID)
+        sort_result_type = MulticlassNmsSortResultType::CLASSID;
+    else if (atrri.sort_result_type == ngNmsSortResultType::SCORE)
+        sort_result_type = MulticlassNmsSortResultType::SCORE;
+    else if (atrri.sort_result_type == ngNmsSortResultType::NONE)
+        sort_result_type = MulticlassNmsSortResultType::NONE;
+    nms_eta = atrri.nms_eta;
+    normalized = atrri.normalized;
+
+    const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims();
+    if (boxes_dims.size() != 3)
+        IE_THROW() << errorPrefix << "has unsupported 'boxes' input rank: " << boxes_dims.size();
+    if (boxes_dims[2] != 4)
+        IE_THROW() << errorPrefix << "has unsupported 'boxes' input 3rd dimension size: " << boxes_dims[2];
+
+    const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims();
+    if (scores_dims.size() != 3)
+        IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size();
+
+    if (boxes_dims[0] != scores_dims[0])
+        IE_THROW() << errorPrefix << " num_batches is different in 'boxes' and 'scores' inputs";
+    if (boxes_dims[1] != scores_dims[2])
+        IE_THROW() << errorPrefix << " num_boxes is different in 'boxes' and 'scores' inputs";
+
+    const SizeVector& valid_outputs_dims = outputShapes[NMS_SELECTEDNUM].getStaticDims();
+    if (valid_outputs_dims.size() != 1)
+        IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_dims.size();
+    if (valid_outputs_dims[0] != boxes_dims[0])  // valid_outputs_dims[0] != num_batches
+        IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_dims[0];
+}
+
+void MKLDNNMultiClassNmsNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+    const SizeVector& boxes_dims = inputShapes[NMS_BOXES].getStaticDims();
+    num_batches = boxes_dims[0];
+    num_boxes = boxes_dims[1];
+    const SizeVector& scores_dims = inputShapes[NMS_SCORES].getStaticDims();
+    num_classes = scores_dims[1];
+    numFiltBox.resize(num_batches, std::vector<size_t>(num_classes));  // batches
+    numBoxOffset.resize(num_batches);
+
+    if (max_output_boxes_per_class) {
+        max_output_boxes_per_class = (max_output_boxes_per_class == -1) ? num_boxes : max_output_boxes_per_class;
+        filtBoxes.resize(max_output_boxes_per_class * num_batches * num_classes);
+    }
+
+    const std::vector<Precision> supportedFloatPrecision = {Precision::FP32, Precision::BF16};
+    const std::vector<Precision> supportedIntOutputPrecision = {Precision::I32, Precision::I64};
+
+    checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType);
+
+    checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType);
+
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDINDICES), supportedIntOutputPrecision, "selected_indices", outType);
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDOUTPUTS), supportedFloatPrecision, "selected_outputs", outType);
+    checkPrecision(getOriginalOutputPrecisionAtPort(NMS_SELECTEDNUM), supportedIntOutputPrecision, "selected_num", outType);
+
+    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::I32},
+                          {LayoutType::ncsp, Precision::I32}},
+                         impl_desc_type::ref_any);
+}
+
+void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) {
+    const float* boxes = reinterpret_cast<const float*>(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr());
+    const float* scores = reinterpret_cast<const float*>(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr());
+
+    auto dims_boxes = getParentEdgeAt(NMS_BOXES)->getMemory().GetDesc().getShape().getStaticDims();
+
+    if (max_output_boxes_per_class == 0)
+        return;
+
+    int* selected_indices = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr());
+
+    float* selected_outputs = selected_outputs = reinterpret_cast<float*>(getChildEdgesAtPort(NMS_SELECTEDOUTPUTS)[0]->getMemoryPtr()->GetPtr());
+
+    int* selected_num = reinterpret_cast<int*>(getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr()->GetPtr());
+
+    auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
+    auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
+
+    if ((nms_eta >= 0) && (nms_eta < 1)) {
+        nmsWithEta(boxes, scores, boxesStrides, scoresStrides);
+    } else {
+        nmsWithoutEta(boxes, scores, boxesStrides, scoresStrides);
+    }
+
+    size_t startOffset = numFiltBox[0][0];
+    numBoxOffset[0] = 0;
+    for (size_t b = 0; b < numFiltBox.size(); b++) {
+        size_t batchOffsetNew = 0;
+        size_t batchOffset = b * num_classes * max_output_boxes_per_class;
+        for (size_t c = (b == 0 ? 1 : 0); c < numFiltBox[b].size(); c++) {
+            size_t offset = batchOffset + c * max_output_boxes_per_class;
+            for (size_t i = 0; i < numFiltBox[b][c]; i++) {
+                filtBoxes[startOffset + i] = filtBoxes[offset + i];
+            }
+            startOffset += numFiltBox[b][c];
+            batchOffsetNew += numFiltBox[b][c];
+        }
+        numBoxOffset[b] = batchOffsetNew;
+        if (b == 0)
+            numBoxOffset[b] += numFiltBox[0][0];
+    }
+    // sort element before go through keep_top_k
+    parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
+        return ((l.batch_index < r.batch_index) ||
+                ((l.batch_index == r.batch_index) && ((l.score > r.score) || ((std::fabs(l.score - r.score) < 1e-6) && l.class_index < r.class_index) ||
+                                                      ((std::fabs(l.score - r.score) < 1e-6) && l.class_index == r.class_index && l.box_index < r.box_index))));
+    });
+
+    if (keep_top_k > -1) {
+        startOffset = 0;
+        size_t offset = 0;
+        for (size_t b = 0; b < numFiltBox.size(); b++) {
+            if (numBoxOffset[b] > keep_top_k) {
+                if (startOffset == offset) {
+                    startOffset += keep_top_k;
+                    offset += numBoxOffset[b];
+                } else {
+                    for (size_t i = 0; i < keep_top_k; i++) {
+                        filtBoxes[startOffset + i] = filtBoxes[offset + i];
+                    }
+                    startOffset += keep_top_k;
+                    offset += numBoxOffset[b];
+                }
+            } else {
+                if (startOffset == offset) {
+                    startOffset += numBoxOffset[b];
+                    offset += numBoxOffset[b];
+                } else {
+                    for (size_t i = 0; i < numBoxOffset[b]; i++) {
+                        filtBoxes[startOffset + i] = filtBoxes[offset + i];
+                    }
+                    startOffset += numBoxOffset[b];
+                    offset += numBoxOffset[b];
+                }
+            }
+        }
+    }
+
+    if (sort_result_across_batch) {
+        if (sort_result_type == SCORE) {
+            parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
+                return (l.score > r.score) || (l.score == r.score && l.batch_index < r.batch_index) ||
+                       (l.score == r.score && l.batch_index == r.batch_index && l.class_index < r.class_index) ||
+                       (l.score == r.score && l.batch_index == r.batch_index && l.class_index == r.class_index && l.box_index < r.box_index);
+            });
+        } else if (sort_result_type == CLASSID) {
+            parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
+                return (l.class_index < r.class_index) || (l.class_index == r.class_index && l.batch_index < r.batch_index) ||
+                       (l.class_index == r.class_index && l.batch_index == r.batch_index && l.score > r.score) ||
+                       (l.class_index == r.class_index && l.batch_index == r.batch_index && l.score == r.score && l.box_index < r.box_index);
+            });
+        }
+    } else if (sort_result_type == CLASSID) {
+        parallel_sort(filtBoxes.begin(), filtBoxes.begin() + startOffset, [](const filteredBoxes& l, const filteredBoxes& r) {
+            return ((l.batch_index < r.batch_index) ||
+                    ((l.batch_index == r.batch_index) &&
+                     ((l.class_index < r.class_index) || ((l.class_index == r.class_index) && l.score > r.score) ||
+                      ((std::fabs(l.score - r.score) <= 1e-6) && l.class_index == r.class_index && l.box_index < r.box_index))));
+        });
+    }
+
+    const size_t selectedBoxesNum = getChildEdgeAt(NMS_SELECTEDINDICES)->getMemory().GetDesc().getShape().getStaticDims()[0];
+    const size_t validOutputs = std::min(startOffset, selectedBoxesNum);
+
+    std::vector<size_t> m_selected_num;
+    m_selected_num.resize(dims_boxes[0]);
+
+    const size_t selectedBoxesNum_perBatch = selectedBoxesNum / dims_boxes[0];
+
+    for (size_t idx = 0lu; idx < validOutputs; idx++) {
+        m_selected_num[filtBoxes[idx].batch_index]++;
+    }
+
+    int64_t output_offset = 0;
+    int64_t original_offset = 0;
+    for (size_t i = 0; i < dims_boxes[0]; i++) {
+        auto real_boxes = m_selected_num[i];
+        selected_num[i] = static_cast<int>(real_boxes);
+
+        for (size_t j = 0; j < real_boxes; j++) {
+            auto original_index = original_offset + j;
+            selected_indices[j + output_offset] = filtBoxes[original_index].batch_index * dims_boxes[1] + filtBoxes[original_index].box_index;
+            auto selected_base = selected_outputs + (output_offset + j) * 6;
+            selected_base[0] = filtBoxes[original_index].class_index;
+            selected_base[1] = filtBoxes[original_index].score;
+            selected_base[2] = boxes[selected_indices[j + output_offset] * 4];
+            selected_base[3] = boxes[selected_indices[j + output_offset] * 4 + 1];
+            selected_base[4] = boxes[selected_indices[j + output_offset] * 4 + 2];
+            selected_base[5] = boxes[selected_indices[j + output_offset] * 4 + 3];
+        }
+        std::fill_n(selected_outputs + (output_offset + real_boxes) * 6, (selectedBoxesNum_perBatch - real_boxes) * 6, -1);
+        std::fill_n(selected_indices + (output_offset + real_boxes), selectedBoxesNum_perBatch - real_boxes, -1);
+        output_offset += selectedBoxesNum_perBatch;
+        original_offset += real_boxes;
+    }
+}
+
+bool MKLDNNMultiClassNmsNode::created() const {
+    return getType() == MulticlassNms;
+}
+
+float MKLDNNMultiClassNmsNode::intersectionOverUnion(const float* boxesI, const float* boxesJ, const bool normalized) {
+    float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ;
+    const float norm = static_cast<float>(normalized == false);
+
+    // to align with reference
+    yminI = boxesI[0];
+    xminI = boxesI[1];
+    ymaxI = boxesI[2];
+    xmaxI = boxesI[3];
+    yminJ = boxesJ[0];
+    xminJ = boxesJ[1];
+    ymaxJ = boxesJ[2];
+    xmaxJ = boxesJ[3];
+
+    float areaI = (ymaxI - yminI + norm) * (xmaxI - xminI + norm);
+    float areaJ = (ymaxJ - yminJ + norm) * (xmaxJ - xminJ + norm);
+    if (areaI <= 0.f || areaJ <= 0.f)
+        return 0.f;
+
+    float intersection_area = (std::max)((std::min)(ymaxI, ymaxJ) - (std::max)(yminI, yminJ) + norm, 0.f) *
+                              (std::max)((std::min)(xmaxI, xmaxJ) - (std::max)(xminI, xminJ) + norm, 0.f);
+    return intersection_area / (areaI + areaJ - intersection_area);
+}
+
+void MKLDNNMultiClassNmsNode::nmsWithEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) {
+    auto less = [](const boxInfo& l, const boxInfo& r) {
+        return l.score < r.score || ((l.score == r.score) && (l.idx > r.idx));
+    };
+
+    auto func = [](float iou, float adaptive_threshold) {
+        return iou <= adaptive_threshold ? 1.0f : 0.0f;
+    };
+
+    parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) {
+        if (class_idx != background_class) {
+            std::vector<filteredBoxes> fb;
+            const float* boxesPtr = boxes + batch_idx * boxesStrides[0];
+            const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1];
+
+            std::priority_queue<boxInfo, std::vector<boxInfo>, decltype(less)> sorted_boxes(less);
+            for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
+                if (scoresPtr[box_idx] >= score_threshold)  // algin with ref
+                    sorted_boxes.emplace(boxInfo({scoresPtr[box_idx], box_idx, 0}));
+            }
+            fb.reserve(sorted_boxes.size());
+            if (sorted_boxes.size() > 0) {
+                auto adaptive_threshold = iou_threshold;
+                int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class;
+                while (max_out_box && !sorted_boxes.empty()) {
+                    boxInfo currBox = sorted_boxes.top();
+                    float origScore = currBox.score;
+                    sorted_boxes.pop();
+                    max_out_box--;
+
+                    bool box_is_selected = true;
+                    for (int idx = static_cast<int>(fb.size()) - 1; idx >= currBox.suppress_begin_index; idx--) {
+                        float iou = intersectionOverUnion(&boxesPtr[currBox.idx * 4], &boxesPtr[fb[idx].box_index * 4], normalized);
+                        currBox.score *= func(iou, adaptive_threshold);
+                        if (iou >= adaptive_threshold) {
+                            box_is_selected = false;
+                            break;
+                        }
+                        if (currBox.score <= score_threshold)
+                            break;
+                    }
+
+                    currBox.suppress_begin_index = fb.size();
+                    if (box_is_selected) {
+                        if (nms_eta < 1 && adaptive_threshold > 0.5) {
+                            adaptive_threshold *= nms_eta;
+                        }
+                        if (currBox.score == origScore) {
+                            fb.push_back({currBox.score, batch_idx, class_idx, currBox.idx});
+                            continue;
+                        }
+                        if (currBox.score > score_threshold) {
+                            sorted_boxes.push(currBox);
+                        }
+                    }
+                }
+            }
+            numFiltBox[batch_idx][class_idx] = fb.size();
+            size_t offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class;
+            for (size_t i = 0; i < fb.size(); i++) {
+                filtBoxes[offset + i] = fb[i];
+            }
+        }
+    });
+}
+
+void MKLDNNMultiClassNmsNode::nmsWithoutEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) {
+    parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) {
+        if (class_idx != background_class) {
+            const float* boxesPtr = boxes + batch_idx * boxesStrides[0];
+            const float* scoresPtr = scores + batch_idx * scoresStrides[0] + class_idx * scoresStrides[1];
+
+            std::vector<std::pair<float, int>> sorted_boxes;
+            for (int box_idx = 0; box_idx < num_boxes; box_idx++) {
+                if (scoresPtr[box_idx] >= score_threshold)  // algin with ref
+                    sorted_boxes.emplace_back(std::make_pair(scoresPtr[box_idx], box_idx));
+            }
+
+            int io_selection_size = 0;
+            if (sorted_boxes.size() > 0) {
+                parallel_sort(sorted_boxes.begin(), sorted_boxes.end(), [](const std::pair<float, int>& l, const std::pair<float, int>& r) {
+                    return (l.first > r.first || ((l.first == r.first) && (l.second < r.second)));
+                });
+                int offset = batch_idx * num_classes * max_output_boxes_per_class + class_idx * max_output_boxes_per_class;
+                filtBoxes[offset + 0] = filteredBoxes(sorted_boxes[0].first, batch_idx, class_idx, sorted_boxes[0].second);
+                io_selection_size++;
+                int max_out_box = (max_output_boxes_per_class > sorted_boxes.size()) ? sorted_boxes.size() : max_output_boxes_per_class;
+                for (size_t box_idx = 1; box_idx < max_out_box; box_idx++) {
+                    bool box_is_selected = true;
+                    for (int idx = io_selection_size - 1; idx >= 0; idx--) {
+                        float iou =
+                            intersectionOverUnion(&boxesPtr[sorted_boxes[box_idx].second * 4], &boxesPtr[filtBoxes[offset + idx].box_index * 4], normalized);
+                        if (iou >= iou_threshold) {
+                            box_is_selected = false;
+                            break;
+                        }
+                    }
+
+                    if (box_is_selected) {
+                        filtBoxes[offset + io_selection_size] = filteredBoxes(sorted_boxes[box_idx].first, batch_idx, class_idx, sorted_boxes[box_idx].second);
+                        io_selection_size++;
+                    }
+                }
+            }
+            numFiltBox[batch_idx][class_idx] = io_selection_size;
+        }
+    });
+}
+
+void MKLDNNMultiClassNmsNode::checkPrecision(const Precision prec, const std::vector<Precision> precList, const std::string name, const std::string type) {
+    if (std::find(precList.begin(), precList.end(), prec) == precList.end())
+        IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec;
+}
+
+REG_MKLDNN_PRIM_FOR(MKLDNNMultiClassNmsNode, MulticlassNms)
\ No newline at end of file
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp
new file mode 100644
index 00000000000..0627f72cea0
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp
@@ -0,0 +1,93 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+
+#include <string>
+
+namespace MKLDNNPlugin {
+
+enum MulticlassNmsSortResultType {
+    CLASSID,  // sort selected boxes by class id (ascending) in each batch element
+    SCORE,    // sort selected boxes by score (descending) in each batch element
+    NONE      // do not guarantee the order in each batch element
+};
+
+class MKLDNNMultiClassNmsNode : public MKLDNNNode {
+public:
+    MKLDNNMultiClassNmsNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache);
+
+    void getSupportedDescriptors() override {};
+    void initSupportedPrimitiveDescriptors() override;
+    void createPrimitive() override {};
+    void execute(mkldnn::stream strm) override;
+    bool created() const override;
+
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
+
+private:
+    // input (port Num)
+    const size_t NMS_BOXES = 0;
+    const size_t NMS_SCORES = 1;
+
+    // output (port Num)
+    const size_t NMS_SELECTEDOUTPUTS = 0;
+    const size_t NMS_SELECTEDINDICES = 1;
+    const size_t NMS_SELECTEDNUM = 2;
+
+    bool sort_result_across_batch = false;
+    MulticlassNmsSortResultType sort_result_type = NONE;
+
+    size_t num_batches;
+    size_t num_boxes;
+    size_t num_classes;
+
+    int max_output_boxes_per_class = 0;
+    float iou_threshold = 0.0f;
+    float score_threshold = 0.0f;
+
+    int32_t background_class = 0;
+    int32_t keep_top_k = 0;
+    float nms_eta = 0.0f;
+    bool normalized = true;
+
+    std::string errorPrefix;
+
+    std::vector<std::vector<size_t>> numFiltBox;
+    std::vector<size_t> numBoxOffset;
+    const std::string inType = "input", outType = "output";
+
+    struct filteredBoxes {
+        float score;
+        int batch_index;
+        int class_index;
+        int box_index;
+        filteredBoxes() = default;
+        filteredBoxes(float _score, int _batch_index, int _class_index, int _box_index)
+            : score(_score), batch_index(_batch_index), class_index(_class_index), box_index(_box_index) {}
+    };
+
+    struct boxInfo {
+        float score;
+        int idx;
+        int suppress_begin_index;
+    };
+
+    std::vector<filteredBoxes> filtBoxes;
+
+    void checkPrecision(const InferenceEngine::Precision prec, const std::vector<InferenceEngine::Precision> precList, const std::string name,
+                        const std::string type);
+
+    float intersectionOverUnion(const float* boxesI, const float* boxesJ, const bool normalized);
+
+    void nmsWithEta(const float* boxes, const float* scores, const InferenceEngine::SizeVector& boxesStrides, const InferenceEngine::SizeVector& scoresStrides);
+
+    void nmsWithoutEta(const float* boxes, const float* scores, const InferenceEngine::SizeVector& boxesStrides,
+                       const InferenceEngine::SizeVector& scoresStrides);
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
index baff79e5d75..f476aa8dec5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
@@ -733,7 +733,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
                         !getParentEdgeAt(0)->getParent()->isConstant();
 
     const size_t inputsNum = getParentEdges().size();
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     config.inConfs.resize(inputsNum);
     config.outConfs.resize(1);
@@ -742,17 +742,15 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
     config.inConfs[0].inPlace = -1;
     config.outConfs[0].inPlace = canBeInplace ? 0 : -1;
     if (inputsNum == 2) {
-        const auto dims = getParentEdgeAt(1)->getDims().ToSizeVector();
-        config.inConfs[1].desc = TensorDesc(Precision::I32,
-            dims,
-            TensorDesc::getLayoutByDims(dims));
+        config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::s32,
+                                                               MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(1)->getShape().getRank()));
         config.inConfs[1].constant = true;
     }
 
     auto pushDesc = [&](memory::format_tag format, impl_desc_type impl_type) {
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, format);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), outputDataType, format);
-        supportedPrimitiveDescriptors.push_back({config, impl_type, format});
+        config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format);
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), outputDataType, format);
+        supportedPrimitiveDescriptors.push_back({config, impl_type});
     };
 
     impl_desc_type impl_type;
@@ -768,22 +766,22 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
 
     if (mayiuse(cpu::x64::sse41)) {
         // nspc
-        if (getParentEdgeAt(0)->getDims().ndims() == 4) {
+        if (getParentEdgeAt(0)->getShape().getRank() == 4) {
             pushDesc(memory::format_tag::nhwc, impl_type);
-        } else if (getParentEdgeAt(0)->getDims().ndims() == 5) {
+        } else if (getParentEdgeAt(0)->getShape().getRank() == 5) {
             pushDesc(memory::format_tag::ndhwc, impl_type);
         }
         // blk
         if (impl_desc_type::jit_avx512 == impl_type) {
-            if (getParentEdgeAt(0)->getDims().ndims() == 4) {
+            if (getParentEdgeAt(0)->getShape().getRank() == 4) {
                 pushDesc(memory::format_tag::nChw16c, impl_type);
-            } else if (getParentEdgeAt(0)->getDims().ndims() == 5) {
+            } else if (getParentEdgeAt(0)->getShape().getRank() == 5) {
                 pushDesc(memory::format_tag::nCdhw16c, impl_type);
             }
         } else if (impl_desc_type::jit_avx2 ==  impl_type || impl_desc_type::jit_sse42 == impl_type) {
-            if (getParentEdgeAt(0)->getDims().ndims() == 4) {
+            if (getParentEdgeAt(0)->getShape().getRank() == 4) {
                 pushDesc(memory::format_tag::nChw8c, impl_type);
-            } else if (getParentEdgeAt(0)->getDims().ndims() == 5) {
+            } else if (getParentEdgeAt(0)->getShape().getRank() == 5) {
                 pushDesc(memory::format_tag::nCdhw8c, impl_type);
             }
         }
@@ -792,7 +790,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
     // planar
     if (canBeInplace)
         config.inConfs[0].inPlace = 0;
-    pushDesc(MKLDNNMemory::GetPlainFormat(getChildEdgeAt(0)->getDims()), impl_type);
+    pushDesc(MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), impl_type);
 }
 
 void MKLDNNMVNNode::createPrimitive() {
@@ -805,15 +803,15 @@ void MKLDNNMVNNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         IE_THROW() << "Preferable primitive descriptor is not set.";
 
-    const SizeVector in_dims = getParentEdgeAt(0)->getDims().ToSizeVector();
+    const SizeVector in_dims = getParentEdgeAt(0)->getShape().getStaticDims();
     transformTo5DCase(in_dims);
     auto selectedPD = getSelectedPrimitiveDescriptor();
     auto jcp = jit_mvn_config_params();
-    jcp.src_prc = selectedPD->getConfig().inConfs[0].desc.getPrecision();
-    jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc.getPrecision();
+    jcp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision();
+    jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision();
     jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc));
     jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc));
-    jcp.planar_layout = MKLDNNMemory::GetPlainLayout(getChildEdgeAt(0)->getDims()) == selectedPD->getConfig().inConfs[0].desc.getLayout();
+    jcp.planar_layout = selectedPD->getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp);
     jcp.normalize_variance = normalizeVariance_;
     jcp.across_channels = acrossChannels_;
     int N = 0;
@@ -913,13 +911,12 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) {
     uint8_t *dst_data = reinterpret_cast<uint8_t*>(dstMemPtr->GetPtr());
     uint8_t *src_data = reinterpret_cast<uint8_t*>(srcMemPtr->GetPtr());
 
-    auto dim = getParentEdgeAt(0)->getDesc().getDims();
+    auto dim = getParentEdgeAt(0)->getShape().getStaticDims();
     if (mayiuse(cpu::x64::sse41)) {
         if (!mvn_mean_kernel || (normalizeVariance_ && !mvn_variance_kernel) || !mvn_kernel) {
             IE_THROW() << "MVN layer with name '" << getName() << "' doesn't create kernel to execute on sse41 above platform.";
         }
-        Layout layout = getParentEdgeAt(0)->getDesc().getLayout();
-        if (layout == C || layout == NC || layout == CHW || layout == NCHW || layout == NCDHW) {
+        if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) {
             mvn_pln(src_data, dst_data, dim);
         } else {
             mvn_blk(src_data, dst_data, dim);
@@ -1173,10 +1170,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
     size_t N = 1; size_t C = 1; size_t D = 1; size_t H = 1; size_t W = 1;
     std::tie(N, C, D, H, W) = shape5D;
 
-    bool is_nhwc = false;
-    Layout layout = getParentEdgeAt(0)->getDesc().getLayout();
-    if (layout == NHWC || layout == NDHWC)
-        is_nhwc = true;
+    bool is_nhwc = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc);
 
     size_t CB = div_up(C, blk_size);
 
@@ -1407,7 +1401,7 @@ bool MKLDNNMVNNode::canFuse(const MKLDNNNodePtr& node) const {
     }
     // limit post ops to unary when shape transformed on channel
     // 1D only fused with unary
-    int inputRank = getParentEdgeAt(0)->getDims().ndims();
+    int inputRank = getParentEdgeAt(0)->getShape().getRank();
     bool unaryEltwise = one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
                                             EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
                                             EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp
index 093127eada5..a6c0bc07b28 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp
@@ -114,18 +114,18 @@ void MKLDNNNonMaxSuppressionNode::initSupportedPrimitiveDescriptors() {
     checkOutput(outputShape_SELECTEDINDICES, supportedIntOutputPrecision, "selected_indices", NMS_SELECTEDINDICES);
     checkOutput(outputShape_SELECTEDSCORES, supportedFloatPrecision, "selected_scores", NMS_SELECTEDSCORES);
 
-    std::vector<DataConfigurator> inDataConf;
+    std::vector<PortConfigurator> inDataConf;
     inDataConf.reserve(getOriginalInputsNumber());
     for (int i = 0; i < getOriginalInputsNumber(); ++i) {
         Precision inPrecision = i == NMS_MAXOUTPUTBOXESPERCLASS ? Precision::I32 : Precision::FP32;
-        inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, inPrecision);
+        inDataConf.emplace_back(LayoutType::ncsp, inPrecision);
     }
 
-    std::vector<DataConfigurator> outDataConf;
+    std::vector<PortConfigurator> outDataConf;
     outDataConf.reserve(getOriginalOutputsNumber());
     for (int i = 0; i < getOriginalOutputsNumber(); ++i) {
         Precision outPrecision = i == NMS_SELECTEDSCORES ? Precision::FP32 : Precision::I32;
-        outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, outPrecision);
+        outDataConf.emplace_back(LayoutType::ncsp, outPrecision);
     }
 
     addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any);
@@ -135,24 +135,24 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) {
     const float *boxes = reinterpret_cast<const float *>(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr());
     const float *scores = reinterpret_cast<const float *>(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr());
 
-    max_output_boxes_per_class = outDims.size() > NMS_SELECTEDSCORES ? 0 : num_boxes;
-    if (inDims.size() > NMS_MAXOUTPUTBOXESPERCLASS) {
+    max_output_boxes_per_class = outputShapes.size() > NMS_SELECTEDSCORES ? 0 : num_boxes;
+    if (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS) {
         max_output_boxes_per_class = reinterpret_cast<int *>(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetPtr())[0];
     }
 
     if (max_output_boxes_per_class == 0)
         return;
 
-    iou_threshold = outDims.size() > NMS_SELECTEDSCORES ? 0.0f : 1.0f;
-    if (inDims.size() > NMS_IOUTHRESHOLD)
+    iou_threshold = outputShapes.size() > NMS_SELECTEDSCORES ? 0.0f : 1.0f;
+    if (inputShapes.size() > NMS_IOUTHRESHOLD)
         iou_threshold = reinterpret_cast<float *>(getParentEdgeAt(NMS_IOUTHRESHOLD)->getMemoryPtr()->GetPtr())[0];
 
     score_threshold = 0.0f;
-    if (inDims.size() > NMS_SCORETHRESHOLD)
+    if (inputShapes.size() > NMS_SCORETHRESHOLD)
         score_threshold = reinterpret_cast<float *>(getParentEdgeAt(NMS_SCORETHRESHOLD)->getMemoryPtr()->GetPtr())[0];
 
     soft_nms_sigma = 0.0f;
-    if (inDims.size() > NMS_SOFTNMSSIGMA)
+    if (inputShapes.size() > NMS_SOFTNMSSIGMA)
         soft_nms_sigma = reinterpret_cast<float *>(getParentEdgeAt(NMS_SOFTNMSSIGMA)->getMemoryPtr()->GetPtr())[0];
     scale = 0.0f;
     if (soft_nms_sigma > 0.0) {
@@ -162,15 +162,15 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) {
     int *selected_indices = reinterpret_cast<int *>(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr());
 
     float *selected_scores = nullptr;
-    if (outDims.size() > NMS_SELECTEDSCORES)
+    if (outputShapes.size() > NMS_SELECTEDSCORES)
         selected_scores = reinterpret_cast<float *>(getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr()->GetPtr());
 
     int *valid_outputs = nullptr;
-    if (outDims.size() > NMS_VALIDOUTPUTS)
+    if (outputShapes.size() > NMS_VALIDOUTPUTS)
         valid_outputs = reinterpret_cast<int *>(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetPtr());
 
-    auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getDesc().getBlockingDesc().getStrides();
-    auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getDesc().getBlockingDesc().getStrides();
+    auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
+    auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
 
     std::vector<filteredBoxes> filtBoxes(max_output_boxes_per_class * num_batches * num_classes);
 
@@ -205,10 +205,10 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) {
                       });
     }
 
-    const size_t selectedBoxesNum = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getDims()[0];
+    const size_t selectedBoxesNum = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getShape().getStaticDims()[0];
     const size_t validOutputs = std::min(filtBoxes.size(), selectedBoxesNum);
 
-    int selectedIndicesStride = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getDesc().getBlockingDesc().getStrides()[0];
+    int selectedIndicesStride = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides()[0];
     int *selectedIndicesPtr = selected_indices;
     float *selectedScoresPtr = selected_scores;
 
@@ -218,7 +218,7 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) {
         selectedIndicesPtr[1] = filtBoxes[idx].class_index;
         selectedIndicesPtr[2] = filtBoxes[idx].box_index;
         selectedIndicesPtr += selectedIndicesStride;
-        if (outDims.size() > NMS_SELECTEDSCORES) {
+        if (outputShapes.size() > NMS_SELECTEDSCORES) {
             selectedScoresPtr[0] = static_cast<float>(filtBoxes[idx].batch_index);
             selectedScoresPtr[1] = static_cast<float>(filtBoxes[idx].class_index);
             selectedScoresPtr[2] = static_cast<float>(filtBoxes[idx].score);
@@ -226,10 +226,10 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) {
         }
     }
     std::fill(selectedIndicesPtr, selectedIndicesPtr + (selectedBoxesNum - idx) * selectedIndicesStride, -1);
-    if (outDims.size() > NMS_SELECTEDSCORES) {
+    if (outputShapes.size() > NMS_SELECTEDSCORES) {
         std::fill(selectedScoresPtr, selectedScoresPtr + (selectedBoxesNum - idx) * selectedIndicesStride, -1.f);
     }
-    if (outDims.size() > NMS_VALIDOUTPUTS)
+    if (outputShapes.size() > NMS_VALIDOUTPUTS)
         *valid_outputs = static_cast<int>(validOutputs);
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
index 2da3ae8f330..10b1be0dac5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
@@ -682,8 +682,10 @@ bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptr<const ngr
             if (axes.size() == 1 && axes[0] == 1) {
                 return true;
             } else if (axes.size() == dataDims.size() - 1) {
-                for (size_t i = 0; i < axes.size(); i++) {
-                    if (axes[i] != i + 1)
+                auto sortAxes = axes;
+                std::sort(sortAxes.begin(), sortAxes.end());
+                for (size_t i = 0; i < sortAxes.size(); i++) {
+                    if (sortAxes[i] != i + 1)
                         return false;
                 }
                 return true;
@@ -715,7 +717,7 @@ void MKLDNNNormalizeL2Node::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << errorPrefix << " has incorrect number of output edges: " << getChildEdges().size();
 
-    if (getParentEdgeAt(0)->getDims().ndims() > 4 || getParentEdgeAt(0)->getDims().ndims() < 2) {
+    if (getParentEdgeAt(0)->getShape().getRank() > 4 || getParentEdgeAt(0)->getShape().getRank() < 2) {
         IE_THROW() << errorPrefix << "has invalid input shape. Normalize supports from 2D to 4D blobs.";
     }
 }
@@ -757,21 +759,22 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() {
 
     bool canBeInplace = src_data_size == dst_data_size && getParentEdgeAt(DATA)->getParent()->getChildEdges().size() == 1;
 
-    LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     config.inConfs.resize(2);
     config.outConfs.resize(1);
     config.outConfs[0].inPlace = canBeInplace ? 0 : -1;
 
     auto pushDesc = [&](memory::format_tag format) {
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA)->getDims(), inputDataType, format);
-        config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXES)->getDims(), memory::data_type::s32, memory::format_tag::x);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA)->getDims(), outputDataType, format);
-        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, format});
+        config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(DATA)->getShape().getStaticDims(), inputDataType, format);
+        config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(AXES)->getShape().getStaticDims(), memory::data_type::s32,
+                                                               memory::format_tag::x);
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(DATA)->getShape().getStaticDims(), outputDataType, format);
+        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
     };
 
     // only plain layout support when w/o sse42
-    if (getParentEdgeAt(DATA)->getDims().ndims() == 4 && !cornerCase) {
+    if (getParentEdgeAt(DATA)->getShape().getRank() == 4 && !cornerCase) {
         if (mayiuse(cpu::x64::sse41)) {
             pushDesc(memory::format_tag::nhwc);
             if (mayiuse(cpu::x64::avx512_common)) {
@@ -783,7 +786,7 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() {
     }
     if (canBeInplace)
         config.inConfs[0].inPlace = 0;
-    pushDesc(MKLDNNMemory::GetPlainFormat(getChildEdgeAt(DATA)->getDims()));
+    pushDesc(MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(DATA)->getShape().getRank()));
 }
 
 bool MKLDNNNormalizeL2Node::canFuse(const MKLDNNNodePtr& node) const {
@@ -824,22 +827,23 @@ void MKLDNNNormalizeL2Node::createPrimitive() {
 
     if (!cornerCase) {
         auto selectedPD = getSelectedPrimitiveDescriptor();
-        jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc.getPrecision());
-        jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision());
+        jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[0].desc->getPrecision());
+        jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc->getPrecision());
         jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt);
         jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt);
 
         jcp.is_nchw = jcp.is_nhwc = jcp.is_blk = false;
-        if (getParentEdgeAt(0)->getMemory().GetDesc().isPlainFormat()) {
+        if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) {
             jcp.is_nchw = true;
-        } else if (getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat()) {
+        } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) ||
+                  getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c)) {
             jcp.is_blk = true;
         } else {
             jcp.is_nhwc = true;
         }
 
         jcp.across_spatial = across_spatial;
-        auto dims = getParentEdgeAt(0)->getDesc().getDims();
+        auto dims = getParentEdgeAt(0)->getShape().getStaticDims();
         size_t dims_size = dims.size();
         jcp.n = (dims_size > 0) ? dims[0] : 1lu;
         jcp.c = (dims_size > 1) ? dims[1] : 1lu;
@@ -905,7 +909,7 @@ void MKLDNNNormalizeL2Node::execute(mkldnn::stream strm) {
     const uint8_t *src_ptr = reinterpret_cast<const uint8_t*>(srcMemPtr->GetPtr());
     uint8_t *dst_ptr = reinterpret_cast<uint8_t*>(dstMemPtr->GetPtr());
 
-    auto dims = getParentEdgeAt(DATA)->getDesc().getDims();
+    auto dims = getParentEdgeAt(DATA)->getShape().getStaticDims();
 
     NormalizeContext ctx = {
         *this,
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp
index 8f164c33c18..350e86e556e 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp
@@ -10,7 +10,7 @@
 #include "utils/bfloat16.hpp"
 #include <mkldnn_selective_build.h>
 #include "mkldnn_one_hot_node.h"
-#include <nodes/common/tensor_desc_creator.h>
+#include <nodes/common/blocked_desc_creator.h>
 #include <ngraph/opsets/opset1.hpp>
 #include "common/cpu_memcpy.h"
 
@@ -89,11 +89,11 @@ void MKLDNNOneHotNode::initSupportedPrimitiveDescriptors() {
     }
     output_precision = getOriginalOutputPrecisionAtPort(0);
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, input_precision},
-                          {TensorDescCreatorTypes::ncsp, input_precision},
-                          {TensorDescCreatorTypes::ncsp, output_precision},
-                          {TensorDescCreatorTypes::ncsp, output_precision}},
-                         {{TensorDescCreatorTypes::ncsp, output_precision}},
+    addSupportedPrimDesc({{LayoutType::ncsp, input_precision},
+                          {LayoutType::ncsp, input_precision},
+                          {LayoutType::ncsp, output_precision},
+                          {LayoutType::ncsp, output_precision}},
+                         {{LayoutType::ncsp, output_precision}},
                          impl_desc_type::ref_any);
 }
 
@@ -125,13 +125,13 @@ void MKLDNNOneHotNode::one_hot(size_t prefix_size, size_t suffix_size) {
 
 void MKLDNNOneHotNode::execute(mkldnn::stream strm) {
     std::size_t prefix_size = 1;
-    auto input_dims = getParentEdgeAt(0)->getDesc().getDims();
+    auto input_dims = getParentEdgeAt(0)->getShape().getStaticDims();
 
     std::size_t actual_axis = (axis == -1) ? src_dims.size() : axis;
     for (size_t i = 0; i < actual_axis; ++i)
         prefix_size *= input_dims[i];
 
-    std::size_t suffix_size = getParentEdgeAt(0)->getBlob()->size() / prefix_size;
+    std::size_t suffix_size = getParentEdgeAt(0)->getShape().getElementsCount() / prefix_size;
 
     OneHotContext ctx = {this, prefix_size, suffix_size};
     OV_SWITCH(MKLDNNPlugin, OneHotExecute, ctx, output_precision.size(),
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
index b4ef82481ca..584eb4bce79 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
@@ -92,8 +92,8 @@ void MKLDNNPadNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << errorPrefix << "Incorrect number of output edges";
 
-    const SizeVector srcDims = getParentEdgeAt(DATA_ID)->getDims().ToSizeVector();
-    const SizeVector dstDims = getChildEdgeAt(DATA_ID)->getDims().ToSizeVector();
+    const SizeVector srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
+    const SizeVector dstDims = getChildEdgeAt(DATA_ID)->getShape().getStaticDims();
     if (srcDims.size() != dstDims.size() || padsBegin.size() != srcDims.size() || padsEnd.size() != srcDims.size())
         IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions!";
 
@@ -122,22 +122,26 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() {
         precision = precision.is_float() ? InferenceEngine::Precision::FP32 : InferenceEngine::Precision::I32;
     auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
-    auto srcDims = getParentEdgeAt(DATA_ID)->getDims();
-    int numOfDims = srcDims.ToSizeVector().size();
+    auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
+    int numOfDims = srcDims.size();
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     config.inConfs.resize(isPadValueSpecified ? 4 : 3);
     config.outConfs.resize(1);
 
     auto pushSupportedPrimitiveDescriptor = [&](memory::format_tag memoryFormat) {
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), dataType, memoryFormat);
-        config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(PADS_BEGIN_ID)->getDims(), memory::data_type::s32, memory::format_tag::x);
-        config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(PADS_END_ID)->getDims(), memory::data_type::s32, memory::format_tag::x);
+        config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType,
+                                                                             memoryFormat);
+        config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(PADS_BEGIN_ID)->getShape().getStaticDims(),
+                                                                             memory::data_type::s32, memory::format_tag::x);
+        config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(PADS_END_ID)->getShape().getStaticDims(),
+                                                                             memory::data_type::s32, memory::format_tag::x);
         if (isPadValueSpecified)
-            config.inConfs[3].desc = MKLDNNMemoryDesc(getParentEdgeAt(PAD_VALUE_ID)->getDims(), memory::data_type::f32, memory::format_tag::x);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(DATA_ID)->getDims(), dataType, memoryFormat);
-        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, memoryFormat});
+            config.inConfs[3].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(PAD_VALUE_ID)->getShape().getStaticDims(),
+                                                                                 memory::data_type::f32, memory::format_tag::x);
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, memoryFormat);
+        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref});
     };
 
     if (numOfDims == 4)
@@ -145,7 +149,7 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() {
     else if (numOfDims == 5)
         pushSupportedPrimitiveDescriptor(mkldnn::memory::format_tag::ndhwc);
 
-    pushSupportedPrimitiveDescriptor(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims()));
+    pushSupportedPrimitiveDescriptor(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(0)->getShape().getRank()));
 
     auto canUseBlocked = [=](const size_t blockSize) {
         return (padMode == CONSTANT && padsBegin[1] % blockSize == 0 && padsEnd[1] % blockSize == 0) ||
@@ -175,10 +179,11 @@ void MKLDNNPadNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         IE_THROW() << "Preferable primitive descriptor for Pad " << getName() << " is not set.";
 
-    params.sizeData = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size();
+    params.sizeData = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size();
 
-    params.srcDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims();
-    params.dstDims = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims();
+    const auto inBlkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    params.srcDims = inBlkDesc.getBlockDims();
+    params.dstDims = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>().getBlockDims();
 
     size_t nDims = params.srcDims.size();
     params.srcStrides.resize(nDims, 1);
@@ -188,13 +193,14 @@ void MKLDNNPadNode::createPrimitive() {
         params.dstStrides[i] = params.dstStrides[i + 1] * params.dstDims[i + 1];
     }
 
-    if (getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat()) {
+    if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) ||
+            getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c)) {
         padsBegin[1] /= params.srcDims[params.srcDims.size() - 1];
         padsEnd[1] /= params.srcDims[params.srcDims.size() - 1];
         padsBegin.push_back(0);
         padsEnd.push_back(0);
     } else {
-        auto order = getParentEdgeAt(0)->getDesc().getBlockingDesc().getOrder();
+        auto order = inBlkDesc.getOrder();
         std::vector<unsigned int> newPadsBegin(padsBegin.size(), 0), newPadsEnd(padsEnd.size(), 0);
         for (size_t i = 0; i < padsBegin.size(); ++i) {
             newPadsBegin[i] = padsBegin[order[i]];
@@ -304,7 +310,7 @@ void MKLDNNPadNode::padConstant() {
     auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
     if (!selectedPrimitiveDescriptor)
         IE_THROW() << "CPU Pad node with name '" << getName() << "' doesn't have primitive descriptors.";
-    InferenceEngine::Precision precision = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc.getPrecision();
+    InferenceEngine::Precision precision = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc->getPrecision();
     OV_SWITCH(MKLDNNPlugin, PadConstantEmitter, this, precision,
               OV_CASE(InferenceEngine::Precision::FP32, float),
               OV_CASE(InferenceEngine::Precision::I32, int32_t),
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
index c7a007d0c6f..5d6e900d75d 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
@@ -13,6 +13,7 @@
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include <utils/general_utils.h>
+#include <cpu_memory_desc_utils.h>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -60,18 +61,18 @@ MKLDNNPoolingNode::MKLDNNPoolingNode(const std::shared_ptr<ngraph::Node>& op, co
     }
 }
 
-std::vector<memory::format_tag> MKLDNNPoolingNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const {
-    if (dims.ndims() == 0)
+std::vector<memory::format_tag> MKLDNNPoolingNode::getAvailableFormatsForDims(const Shape &dims) const {
+    if (dims.getRank() == 0)
         return {memory::format_tag::x};
-    else if (dims.ndims() == 1)
+    else if (dims.getRank() == 1)
         return {memory::format_tag::x};
-    else if (dims.ndims() == 2)
+    else if (dims.getRank() == 2)
         return {memory::format_tag::nc};
-    else if (dims.ndims() == 3)
+    else if (dims.getRank() == 3)
         return {memory::format_tag::tnc, memory::format_tag::ntc};
-    else if (dims.ndims() == 4)
+    else if (dims.getRank() == 4)
         return {memory::format_tag::nChw8c, memory::format_tag::nChw16c, memory::format_tag::nhwc, memory::format_tag::nchw};
-    else if (dims.ndims() == 5)
+    else if (dims.getRank() == 5)
         return {memory::format_tag::nCdhw8c, memory::format_tag::nCdhw16c, memory::format_tag::ndhwc, memory::format_tag::ncdhw};
     return {memory::format_tag::any};
 }
@@ -112,15 +113,17 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
     effective_pad_begin = data_pad_begin;
     effective_pad_end.resize(data_pad_end.size());
 
-    auto parentDims = getParentEdgeAt(0)->getDims();
-    auto childDims = getChildEdgeAt(0)->getDims();
-    if ((parentDims.ndims() < 4) || (parentDims.ndims() > 5))
+    auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims();
+    auto childDims = getChildEdgeAt(0)->getShape().getStaticDims();
+    const size_t inputRank = getParentEdgeAt(0)->getShape().getRank();
+
+    if ((inputRank < 4) || (inputRank > 5))
         IE_THROW() << "Pooling layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
 
     for (int i = 0; i < effective_pad_end.size(); i++) {
         int krn = kernel[i];
-        int src = getParentEdgeAt(0)->getDims()[2 + i];
-        int dst = getChildEdgeAt(0)->getDims()[2 + i];
+        int src = getParentEdgeAt(0)->getShape().getStaticDims()[2 + i];
+        int dst = getChildEdgeAt(0)->getShape().getStaticDims()[2 + i];
 
         int calc_dst = (src - krn + data_pad_begin[i]) / stride[i] + 1;
         effective_pad_end[i] = (dst - calc_dst) * stride[i];
@@ -130,24 +133,28 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
         if (outputDataType == memory::data_type::bf16)
             outputDataType = memory::data_type::f32;
         // i8 layers supports only ndhwc and nhwc layouts
-        MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, parentDims.ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc};
-        MKLDNNMemoryDesc out_candidate{childDims, outputDataType, parentDims.ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc};
-        createDescriptor({ in_candidate }, { out_candidate });
-    } else if ((parentDims.ndims() == 4 || parentDims.ndims() == 5) && parentDims[1] == 1) {
+        const auto in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(parentDims, inputDataType, inputRank == 5 ?
+                                                                 memory::format_tag::ndhwc : memory::format_tag::nhwc);
+        const auto out_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(childDims, outputDataType, inputRank == 5 ?
+                                                                 memory::format_tag::ndhwc : memory::format_tag::nhwc);
+        createDescriptor({ in_candidate.get() }, { out_candidate.get() });
+    } else if ((inputRank == 4 || inputRank == 5) && parentDims[1] == 1) {
         // WA. We should force planar layout since it provides better performance
-        MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, parentDims.ndims() == 5 ? memory::format_tag::ncdhw : memory::format_tag::nchw};
-        MKLDNNMemoryDesc out_candidate{childDims, outputDataType, parentDims.ndims() == 5 ? memory::format_tag::ncdhw : memory::format_tag::nchw};
-        createDescriptor({ in_candidate }, { out_candidate });
+        const auto in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(parentDims, inputDataType, inputRank == 5 ?
+                                                                memory::format_tag::ncdhw : memory::format_tag::nchw);
+        const auto out_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(childDims, outputDataType, inputRank == 5 ?
+                                                                memory::format_tag::ncdhw : memory::format_tag::nchw);
+        createDescriptor({ in_candidate.get() }, { out_candidate.get() });
     } else {
         if (inputDataType != memory::data_type::bf16) {
             inputDataType = memory::data_type::f32;
             outputDataType = memory::data_type::f32;
         }
         // It doesn't support any format
-        for (auto format : getAvailableFormatsForDims(parentDims)) {
-            MKLDNNMemoryDesc in_candidate{parentDims, inputDataType, format};
-            MKLDNNMemoryDesc out_candidate{childDims, outputDataType, format};
-            createDescriptor({in_candidate}, {out_candidate});
+        for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) {
+            const auto in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(parentDims, inputDataType, format);
+            const auto out_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(childDims, outputDataType, format);
+            createDescriptor({in_candidate.get()}, {out_candidate.get()});
         }
     }
 }
@@ -172,10 +179,10 @@ bool MKLDNNPoolingNode::created() const {
     return getType() == Pooling;
 }
 
-void MKLDNNPoolingNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
-                                         const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
-    MKLDNNMemoryDesc in_candidate(inputDesc[0]);
-    MKLDNNMemoryDesc out_candidate(outputDesc[0]);
+void MKLDNNPoolingNode::createDescriptor(const std::vector<const MemoryDesc*> &inputDesc,
+                                         const std::vector<const MemoryDesc*> &outputDesc) {
+    MKLDNNMemoryDesc in_candidate =  MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]);
+    MKLDNNMemoryDesc out_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]);
 
     mkldnn::algorithm alg;
     if (algorithm == PoolingAvg) {
@@ -240,21 +247,23 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() {
     for (auto& desc : descs) {
         auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
         while (static_cast<bool>(itpd)) {
-            InferenceEngine::LayerConfig config;
+            NodeConfig config;
             config.dynBatchSupport = true;
             for (size_t i = 0; i < descInputNumbers(desc); i++) {
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 dataConfig.inPlace = -1;
                 dataConfig.constant = false;
-                dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(itpd, i));
+                dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getSrcMemDesc(itpd, i));
+                dataConfig.desc = getSrcMemDesc(itpd, i);
                 config.inConfs.push_back(dataConfig);
             }
 
             for (size_t i = 0; i < descOutputNumbers(desc); i++) {
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 dataConfig.inPlace = canBeInPlace() ? 0 : -1;
                 dataConfig.constant = false;
-                dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(itpd, i));
+                dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getDstMemDesc(itpd, i));
+                dataConfig.desc = getDstMemDesc(itpd, i);
                 config.outConfs.push_back(dataConfig);
             }
             impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
@@ -266,23 +275,23 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() {
     }
 }
 
-void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
+void MKLDNNPoolingNode::initDescriptor(const NodeConfig& config) {
     auto* selectedPD = getSelectedPrimitiveDescriptor();
     if (!selectedPD) {
         return;
     }
-    std::vector<InferenceEngine::TensorDesc> inDescs;
+    std::vector<const MemoryDesc*> inDescs;
     for (const auto& inConf : config.inConfs)
-        inDescs.push_back(inConf.desc);
-    std::vector<InferenceEngine::TensorDesc> outDescs;
+        inDescs.push_back(inConf.desc.get());
+    std::vector<const MemoryDesc*> outDescs;
     for (const auto& outConf : config.outConfs)
-        outDescs.push_back(outConf.desc);
+        outDescs.push_back(outConf.desc.get());
     createDescriptor({inDescs}, {outDescs});
 
     mkldnn::primitive_attr attr;
     setPostOps(attr);
 
-    InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
+    NodeConfig rightConfig = selectedPD->getConfig();
     size_t selected_count = 0;
     for (size_t j = 0; j < descs.size(); j++) {
         const auto &desc = descs[j];
@@ -291,10 +300,10 @@ void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &confi
         itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
 
         while (itpd) {
-            InferenceEngine::LayerConfig cfg;
+            NodeConfig cfg;
             cfg.dynBatchSupport = true;
             for (size_t i = 0; i < descInputNumbers(desc); i++) {
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 dataConfig.inPlace = canBeInPlace() ? 0 : -1;
                 dataConfig.constant = false;
                 dataConfig.desc = getSrcMemDesc(itpd, i);
@@ -302,7 +311,7 @@ void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &confi
             }
 
             for (size_t i = 0; i < descOutputNumbers(desc); i++) {
-                InferenceEngine::DataConfig dataConfig;
+                PortConfig dataConfig;
                 dataConfig.inPlace = -1;
                 dataConfig.constant = false;
                 dataConfig.desc = getDstMemDesc(itpd, i);
@@ -332,20 +341,18 @@ void MKLDNNPoolingNode::initDescriptor(const InferenceEngine::LayerConfig &confi
             return;
 
         for (size_t i = 0; i < selectedConfig.inConfs.size(); i++) {
-            if (selectedConfig.inConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY &&
-                !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.inConfs[i].desc, config.inConfs[i].desc))
+            if (!selectedConfig.inConfs[i].desc->isCompatible(*config.inConfs[i].desc))
                 IE_THROW() << "Incorrect descriptor for node: " << getName();
         }
 
         for (size_t i = 0; i < selectedConfig.outConfs.size(); i++) {
-            if (selectedConfig.outConfs[i].desc.getLayout() != InferenceEngine::Layout::ANY &&
-                !MKLDNNExtensionUtils::initTensorsAreEqual(selectedConfig.outConfs[i].desc, config.outConfs[i].desc))
+            if (!selectedConfig.outConfs[i].desc->isCompatible(*config.outConfs[i].desc))
                 IE_THROW() << "Incorrect descriptor for node: " << getName();
         }
         rightConfig = config;
     }
 
-    selectedPD->getConfig() = rightConfig;
+    selectedPD->setConfig(rightConfig);
 }
 
 void MKLDNNPoolingNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h
index 1f6acf58b78..a594e774e47 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h
@@ -16,12 +16,12 @@ class MKLDNNPoolingNode : public MKLDNNNode {
 public:
     MKLDNNPoolingNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
 
-    void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
-                          const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
-    std::vector<mkldnn::memory::format_tag> getAvailableFormatsForDims(const MKLDNNDims &dims) const override;
+    void createDescriptor(const std::vector<const MemoryDesc*>& inputDesc,
+                          const std::vector<const MemoryDesc*>& outputDesc) override;
+    std::vector<mkldnn::memory::format_tag> getAvailableFormatsForDims(const Shape &dims) const override;
     void getSupportedDescriptors() override;
     void initSupportedPrimitiveDescriptors() override;
-    void initDescriptor(const InferenceEngine::LayerConfig &config) override;
+    void initDescriptor(const NodeConfig& config) override;
     void createPrimitive() override;
     bool created() const override;
     bool canBeInPlace() const override {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp
index 584960373ae..e7421d82f12 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp
@@ -1,7 +1,6 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "base.hpp"
 
 #include <string>
 #include <vector>
@@ -141,17 +140,17 @@ void MKLDNNProposalNode::initSupportedPrimitiveDescriptors() {
         return;
 
     if (store_prob) {
-        addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                              {TensorDescCreatorTypes::ncsp, Precision::FP32},
-                              {TensorDescCreatorTypes::ncsp, Precision::FP32}},
-                             {{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                              {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+        addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
+                              {LayoutType::ncsp, Precision::FP32},
+                              {LayoutType::ncsp, Precision::FP32}},
+                             {{LayoutType::ncsp, Precision::FP32},
+                              {LayoutType::ncsp, Precision::FP32}},
                              impl_desc_type::ref_any);
     } else {
-        addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                              {TensorDescCreatorTypes::ncsp, Precision::FP32},
-                              {TensorDescCreatorTypes::ncsp, Precision::FP32}},
-                             {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+        addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
+                              {LayoutType::ncsp, Precision::FP32},
+                              {LayoutType::ncsp, Precision::FP32}},
+                             {{LayoutType::ncsp, Precision::FP32}},
                              impl_desc_type::ref_any);
     }
 }
@@ -166,8 +165,8 @@ void MKLDNNProposalNode::execute(mkldnn::stream strm) {
         if (store_prob)
             outProbData = reinterpret_cast <float *>(getChildEdgesAtPort(PROBABILITIES_OUT_IDX)[0]->getMemoryPtr()->GetPtr());
 
-        auto inProbDims = getParentEdgeAt(0)->getDims().ToSizeVector();
-        const size_t imgInfoSize = getParentEdgeAt(2)->getDims()[0];
+        auto inProbDims = getParentEdgeAt(0)->getShape().getStaticDims();
+        const size_t imgInfoSize = getParentEdgeAt(2)->getShape().getStaticDims()[0];
 
         // input image height & width
         const float imgHeight = imgInfoData[0];
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp
index 393ef27921a..e56d6d2c245 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp
@@ -12,7 +12,7 @@
 #include <ngraph/opsets/opset1.hpp>
 #include "mkldnn_psroi_pooling_node.h"
 #include <cpu/x64/jit_generator.hpp>
-#include <nodes/common/tensor_desc_creator.h>
+#include <nodes/common/blocked_desc_creator.h>
 
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -139,27 +139,27 @@ void MKLDNNPSROIPoolingNode::initSupportedPrimitiveDescriptors() {
     auto dataPrecision = getOriginalInputPrecisionAtPort(0) == Precision::BF16 ? Precision::BF16 : Precision::FP32;
 
     if (getAlgorithm() == Algorithm::PSROIPoolingAverage || getAlgorithm() == Algorithm::PSROIPoolingBilinear) {
-        std::vector<std::pair<TensorDescCreatorTypes, TensorDescCreatorTypes>> dataFomats{
-            {TensorDescCreatorTypes::ncsp, TensorDescCreatorTypes::ncsp},
-            {TensorDescCreatorTypes::nspc, TensorDescCreatorTypes::nspc},
-            {TensorDescCreatorTypes::nCsp16c, TensorDescCreatorTypes::nCsp16c},
-            {TensorDescCreatorTypes::nCsp8c, TensorDescCreatorTypes::nCsp8c}
+        std::vector<std::pair<LayoutType, LayoutType>> dataFomats{
+            {LayoutType::ncsp, LayoutType::ncsp},
+            {LayoutType::nspc, LayoutType::nspc},
+            {LayoutType::nCsp16c, LayoutType::nCsp16c},
+            {LayoutType::nCsp8c, LayoutType::nCsp8c}
         };
 
         for (const auto &df : dataFomats) {
-            addSupportedPrimDesc({{df.first, dataPrecision}, {TensorDescCreatorTypes::ncsp, Precision::FP32}},
+            addSupportedPrimDesc({{df.first, dataPrecision}, {LayoutType::ncsp, Precision::FP32}},
                                  {{df.second, dataPrecision}},
                                  impl_type);
         }
     } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinearDeformable && noTrans) {
-        addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision}, {TensorDescCreatorTypes::ncsp, Precision::FP32}},
-                             {{TensorDescCreatorTypes::ncsp, dataPrecision}},
+        addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision}, {LayoutType::ncsp, Precision::FP32}},
+                             {{LayoutType::ncsp, dataPrecision}},
                              impl_type);
     } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinearDeformable) {
-        addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, dataPrecision},
-                              {TensorDescCreatorTypes::ncsp, Precision::FP32},
-                              {TensorDescCreatorTypes::ncsp, Precision::FP32}},
-                             {{TensorDescCreatorTypes::ncsp, dataPrecision}},
+        addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision},
+                              {LayoutType::ncsp, Precision::FP32},
+                              {LayoutType::ncsp, Precision::FP32}},
+                             {{LayoutType::ncsp, dataPrecision}},
                              impl_type);
     }
 }
@@ -182,19 +182,18 @@ inline float bilinearInterp(const inputType* data, const float x, const float y,
     return value;
 }
 
-void MKLDNNPSROIPoolingNode::unpackParams(const TensorDesc& srcDesc, const TensorDesc& dstDesc,
+void MKLDNNPSROIPoolingNode::unpackParams(const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc,
                                           int& hInputStride, int& wInputStride,
                                           int& hOutputStride, int& wOutputStride,
-                                          Layout& inFmt, Layout& outFmt,
                                           int& inBlockSize, int& outBlockSize,
                                           int& outBlockCount,
                                           unsigned long& inputChannelsPadding, unsigned long& outputChannelsPadding) {
-    inFmt = srcDesc.getLayout();
-    outFmt = dstDesc.getLayout();
-    int expectedInBlockDimsSize = (inFmt == Layout::BLOCKED ? 5 : 4);
-    int expectedOutBlockDimsSize = (outFmt == Layout::BLOCKED ? 5 : 4);
-    auto inBlkDims = srcDesc.getBlockingDesc().getBlockDims();
-    auto outBlkDims = dstDesc.getBlockingDesc().getBlockDims();
+    const bool inpIsBlk = srcDesc.hasLayoutType(LayoutType::nCsp16c) || srcDesc.hasLayoutType(LayoutType::nCsp8c);
+    const bool outIsBlk = dstDesc.hasLayoutType(LayoutType::nCsp16c) || dstDesc.hasLayoutType(LayoutType::nCsp8c);
+    int expectedInBlockDimsSize = (inpIsBlk ? 5 : 4);
+    int expectedOutBlockDimsSize = (outIsBlk ? 5 : 4);
+    auto inBlkDims = srcDesc.getBlockDims();
+    auto outBlkDims = dstDesc.getBlockDims();
     if (inBlkDims.size() != expectedInBlockDimsSize)
         IE_THROW() << errorPrefix << " has unexpected size of blocking dims in input (given " << inBlkDims.size() << ", expected "
                           << expectedInBlockDimsSize << ")";
@@ -202,15 +201,15 @@ void MKLDNNPSROIPoolingNode::unpackParams(const TensorDesc& srcDesc, const Tenso
         IE_THROW() << errorPrefix << " has unexpected size of blocking dims in output (given " << outBlkDims.size() << ", expected "
                            << expectedOutBlockDimsSize << ")";
 
-    inBlockSize = (inFmt == Layout::BLOCKED ? srcDesc.getBlockingDesc().getBlockDims()[4] : 1);
-    outBlockSize = (outFmt == Layout::BLOCKED ? dstDesc.getBlockingDesc().getBlockDims()[4] : 1);
-    inputChannelsPadding = srcDesc.getBlockingDesc().getBlockDims()[1] * inBlockSize;
-    outputChannelsPadding = dstDesc.getBlockingDesc().getBlockDims()[1] * outBlockSize;
+    inBlockSize = (inpIsBlk ? srcDesc.getBlockDims()[4] : 1);
+    outBlockSize = (outIsBlk ? dstDesc.getBlockDims()[4] : 1);
+    inputChannelsPadding = srcDesc.getBlockDims()[1] * inBlockSize;
+    outputChannelsPadding = dstDesc.getBlockDims()[1] * outBlockSize;
     outBlockCount = outputChannelsPadding / outBlockSize;
 
     int hOutStrIndex = 0, wOutStrIndex = 0, hInStrIndex = 0, wInStrIndex = 0;
-    const auto& outOrder = dstDesc.getBlockingDesc().getOrder();
-    const auto& inOrder = srcDesc.getBlockingDesc().getOrder();
+    const auto& outOrder = dstDesc.getOrder();
+    const auto& inOrder = srcDesc.getOrder();
     for (int i = 0; i < outOrder.size(); i++) {
         if (outOrder[i] == 2) hOutStrIndex = i;
         if (outOrder[i] == 3) wOutStrIndex = i;
@@ -219,21 +218,20 @@ void MKLDNNPSROIPoolingNode::unpackParams(const TensorDesc& srcDesc, const Tenso
         if (inOrder[i] == 2) hInStrIndex = i;
         if (inOrder[i] == 3) wInStrIndex = i;
     }
-    hInputStride = srcDesc.getBlockingDesc().getStrides()[hInStrIndex];
-    wInputStride = srcDesc.getBlockingDesc().getStrides()[wInStrIndex];
-    hOutputStride = dstDesc.getBlockingDesc().getStrides()[hOutStrIndex];
-    wOutputStride = dstDesc.getBlockingDesc().getStrides()[wOutStrIndex];
+    hInputStride = srcDesc.getStrides()[hInStrIndex];
+    wInputStride = srcDesc.getStrides()[wInStrIndex];
+    hOutputStride = dstDesc.getStrides()[hOutStrIndex];
+    wOutputStride = dstDesc.getStrides()[wOutStrIndex];
 }
 
 template <typename inputType, typename outputType>
 void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois,
                                             const int n, const int roiBatchInd,
-                                            const TensorDesc& srcDesc, const TensorDesc& dstDesc) {
-    Layout inFmt, outFmt;
+                                            const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) {
     int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride;
     unsigned long inputChannelsPadding, outputChannelsPadding;
     unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride,
-        inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding);
+                 inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding);
     const float roiStartW = static_cast<float>(round(bottomRois[1])) * spatialScale;
     const float roiStartH = static_cast<float>(round(bottomRois[2])) * spatialScale;
     const float roiEndW   = static_cast<float>(round(bottomRois[3] + 1.0f)) * spatialScale;
@@ -273,7 +271,7 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType
             dstData[dstIndex] = outSum / binArea;
         }
     };
-    if (inFmt == Layout::NHWC) {
+    if (srcDesc.hasLayoutType(LayoutType::nspc)) {
         parallel_for2d(nh, nw, [&](int h, int w) {
             const int binOffsetOutput = n * nc * nh * nw;
             const int binOffsetInput = roiBatchInd * channels * height * width;
@@ -282,10 +280,10 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType
                 avgPsroi(c, h, w, 0, 0, binOffsetInput + gc, binOffsetOutput + c);
             }
         });
-    } else if (inFmt == Layout::NCHW) {
+    } else if (srcDesc.hasLayoutType(LayoutType::ncsp)) {
         parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
             const int gc = (c * groupSize + h) * groupSize + w;
-            const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize);
+            const int outputBlockResidual = (dstDesc.hasLayoutType(LayoutType::ncsp) ? 0 : c % inBlockSize);
             const int outputBlockIdx = (c / outBlockSize) * outBlockSize;
             const int binOffsetInput = (roiBatchInd * inputChannelsPadding + gc) * height * width;
             const int binOffsetOutput = (n * outputChannelsPadding + outputBlockIdx) * nh * nw;
@@ -297,8 +295,8 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType
             int cEnd = (blkIdx == outBlockCount - 1 ? nc : cStart + outBlockSize);
             for (int c = cStart; c < cEnd; c++) {
                 const int gc = (c * groupSize + h) * groupSize + w;
-                const int inputBlockResidual = (inFmt == Layout::NCHW ? 0 : gc % inBlockSize);
-                const int outputBlockResidual = (outFmt == Layout::NCHW ? 0 : c % inBlockSize);
+                const int inputBlockResidual = (srcDesc.hasLayoutType(LayoutType::ncsp) ? 0 : gc % inBlockSize);
+                const int outputBlockResidual = (dstDesc.hasLayoutType(LayoutType::ncsp) ? 0 : c % inBlockSize);
                 const int inputBlockIdx = (gc / inBlockSize) * inBlockSize;
                 const int outputBlockIdx = (c / outBlockSize) * outBlockSize;
                 const int binOffsetInput = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width;
@@ -312,12 +310,11 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType
 template <typename inputType, typename outputType>
 void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois,
                                              const int currentRoi, const int roiBatchInd,
-                                             const TensorDesc& srcDesc, const TensorDesc& dstDesc) {
-    Layout inFmt, outFmt;
+                                             const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) {
     int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride;
     unsigned long inputChannelsPadding, outputChannelsPadding;
     unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride,
-                 inFmt, outFmt, inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding);
+                 inBlockSize, outBlockSize, outBlockCount, inputChannelsPadding, outputChannelsPadding);
     const float roiStartW = bottomRois[1] * spatialScale;
     const float roiStartH = bottomRois[2] * spatialScale;
     const float roiEndW = bottomRois[3] * spatialScale;
@@ -340,13 +337,14 @@ void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputTyp
             const float inY = nh > 1 ? (h * heightScale + boxYmin * (height - 1)) : 0.5f * (boxYmin + boxYmax) * (height - 1);
             for (size_t binX = 0; binX < spatialBinsX; binX++) {
                 size_t gc = c + (binY * spatialBinsX + binX) * nc;
-                if (inFmt == Layout::NHWC) {
+                if (srcDesc.hasLayoutType(LayoutType::nspc)) {
                     binOffIn = roiBatchInd * channels * height * width + gc;
                     inBlkRes = 0;
                 } else {  // nchw, nChw16c, nChw8c
                     const int inputBlockIdx = (gc / inBlockSize) * inBlockSize;
                     binOffIn = (roiBatchInd * inputChannelsPadding + inputBlockIdx) * height * width;
-                    inBlkRes = (inFmt == Layout::BLOCKED ? gc % inBlockSize : 0);
+                    inBlkRes = ((srcDesc.hasLayoutType(LayoutType::nCsp16c) || srcDesc.hasLayoutType(LayoutType::nCsp8c))
+                                ? gc % inBlockSize : 0);
                 }
                 const auto *bottomData = srcData + binOffIn;
 
@@ -386,14 +384,14 @@ void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputTyp
         dstData[dstIndex] = accum;
     };
 
-    if (inFmt == Layout::NHWC) {
+    if (srcDesc.hasLayoutType(LayoutType::nspc)) {
         const int binOffsetOutput = currentRoi * nc * nh * nw;
         parallel_for2d(nh, nw, [&](int h, int w) {
             for (int c = 0; c < nc; c++) {
                 bilinearPsroi(c, h, w, 0, binOffsetOutput + c);
             }
         });
-    } else if (inFmt == Layout::NCHW) {
+    } else if (srcDesc.hasLayoutType(LayoutType::ncsp)) {
         parallel_for3d(nc, nh, nw, [&](int c, int h, int w) {
             bilinearPsroi(c, h, w, 0, (currentRoi * outputChannelsPadding + c) * binCount);
         });
@@ -404,7 +402,8 @@ void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputTyp
             for (int c = cStart; c < cEnd; c++) {
                 const int outputBlockIdx = (c / inBlockSize) * inBlockSize;
                 const int binOffsetOutput = (currentRoi * outputChannelsPadding + outputBlockIdx) * binCount;
-                const int outputBlockResidual = (inFmt == Layout::BLOCKED ? c % inBlockSize : 0);
+                const int outputBlockResidual = ((srcDesc.hasLayoutType(LayoutType::nCsp16c) || srcDesc.hasLayoutType(LayoutType::nCsp8c))
+                                                 ? c % inBlockSize : 0);
                 bilinearPsroi(c, h, w, outputBlockResidual, binOffsetOutput);
             }
         });
@@ -480,8 +479,8 @@ void MKLDNNPSROIPoolingNode::executeSpecified() {
     const auto *bottomRoisBeginning = reinterpret_cast<const float *>(getParentEdgeAt(1)->getMemoryPtr()->GetPtr());
     auto *dstData = reinterpret_cast<outputType *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
 
-    auto srcDesc = getParentEdgeAt(0)->getDesc();
-    auto dstDesc = getChildEdgeAt(0)->getDesc();
+    auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
 
     int realRois = 0;
     for (; realRois < nn; realRois++) {
@@ -497,7 +496,7 @@ void MKLDNNPSROIPoolingNode::executeSpecified() {
     int channelsEachClass = outputDim;
     if (!noTrans) {
         bottomTrans = reinterpret_cast<const float *>(getParentEdgeAt(2)->getMemoryPtr()->GetPtr());
-        numClasses = static_cast<int>(getParentEdgeAt(2)->getDesc().getDims()[1]) / 2;
+        numClasses = static_cast<int>(getParentEdgeAt(2)->getShape().getStaticDims()[1]) / 2;
         channelsEachClass /= numClasses;
     }
 
@@ -534,8 +533,8 @@ struct MKLDNNPSROIPoolingNode::PSROIPoolingExecute {
 };
 
 void MKLDNNPSROIPoolingNode::execute(mkldnn::stream strm) {
-    auto inputPrec = getParentEdgesAtPort(0)[0]->getDesc().getPrecision();
-    auto outputPrec = getChildEdgesAtPort(0)[0]->getDesc().getPrecision();
+    auto inputPrec = getParentEdgesAtPort(0)[0]->getMemory().GetDesc().getPrecision();
+    auto outputPrec = getChildEdgesAtPort(0)[0]->getMemory().GetDesc().getPrecision();
 
     if (!((inputPrec == Precision::BF16 && outputPrec == Precision::BF16) ||
           (inputPrec == Precision::FP32 && outputPrec == Precision::FP32))) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h
index 24e015d3a6d..45f275fe1dd 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h
@@ -50,10 +50,9 @@ private:
 
     std::string errorPrefix;
 
-    void unpackParams(const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc,
+    void unpackParams(const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc,
                       int& hInputStride, int& wInputStride,
                       int& hOutputStride, int& wOutputStride,
-                      InferenceEngine::Layout& inFmt, InferenceEngine::Layout& outFmt,
                       int& inBlockSize, int& outBlockSize,
                       int& outBlockCount,
                       unsigned long& inputChannelsPadding, unsigned long& outputChannelsPadding);
@@ -61,12 +60,12 @@ private:
     template <typename inputType, typename outputType>
     void executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois,
                         const int n, const int roiBatchInd,
-                        const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc);
+                        const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc);
 
     template <typename inputType, typename outputType>
     void executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois,
                          const int currentRoi, const int roiBatchInd,
-                         const InferenceEngine::TensorDesc& srcDesc, const InferenceEngine::TensorDesc& dstDesc);
+                         const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc);
 
     template <typename inputType, typename outputType>
     void executeBilinearDeformable(const inputType *srcData, outputType *dstData, const float *bottomRois,
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp
index 33e625fce6f..86818d36140 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "base.hpp"
-
 #include <string>
 
 #include <ngraph/opsets/opset1.hpp>
@@ -65,8 +63,8 @@ void MKLDNNRangeNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    std::vector<DataConfigurator> inDataConf;
-    std::vector<DataConfigurator> outDataConf;
+    std::vector<PortConfigurator> inDataConf;
+    std::vector<PortConfigurator> outDataConf;
 
     if (!(getOriginalInputPrecisionAtPort(RANGE_START) == Precision::I32 &&
             getOriginalInputPrecisionAtPort(RANGE_LIMIT) == Precision::I32 &&
@@ -78,23 +76,23 @@ void MKLDNNRangeNode::initSupportedPrimitiveDescriptors() {
             getOriginalOutputPrecisionAtPort(0) == Precision::FP32)) {
         inDataConf.reserve(getOriginalInputsNumber());
         for (int i = 0; i < getOriginalInputsNumber(); ++i)
-            inDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32);
+            inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32);
         outDataConf.reserve(1);
-        outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32);
+        outDataConf.emplace_back(LayoutType::ncsp, Precision::FP32);
         addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any);
     } else {
         inDataConf.reserve(getOriginalInputsNumber());
         for (int i = 0; i < getOriginalInputsNumber(); ++i)
-            inDataConf.emplace_back(TensorDescCreatorTypes::ncsp);
+            inDataConf.emplace_back(LayoutType::ncsp);
         outDataConf.reserve(1);
-        outDataConf.emplace_back(TensorDescCreatorTypes::ncsp);
+        outDataConf.emplace_back(LayoutType::ncsp);
         addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any);
     }
 }
 
 void MKLDNNRangeNode::execute(mkldnn::stream strm) {
     StatusCode retcode = OK;
-    switch (getParentEdgeAt(0)->getDesc().getPrecision()) {
+    switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision()) {
         case Precision::FP32:
             retcode = rangeKernel<float>();
             break;
@@ -112,7 +110,7 @@ void MKLDNNRangeNode::execute(mkldnn::stream strm) {
 
 template <typename data_t>
 InferenceEngine::StatusCode MKLDNNRangeNode::rangeKernel() noexcept {
-    size_t dst_size = (getChildEdgesAtPort(0)[0]->getDims())[0];
+    size_t dst_size = (getChildEdgesAtPort(0)[0]->getShape().getStaticDims())[0];
     data_t* dst_data = reinterpret_cast<data_t *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
     data_t start = reinterpret_cast<const data_t *>(getParentEdgeAt(RANGE_START)->getMemoryPtr()->GetPtr())[0];
     data_t limit = reinterpret_cast<const data_t *>(getParentEdgeAt(RANGE_LIMIT)->getMemoryPtr()->GetPtr())[0];
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp
index 7828bc55f27..c76156ec4ae 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp
@@ -1405,18 +1405,18 @@ void MKLDNNReduceNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << errorPrefix << " gets incorrect number of output edges!";
 
-    if (getParentEdgeAt(REDUCE_INDEXES)->getDims().ndims() != 1) {
+    if (getParentEdgeAt(REDUCE_INDEXES)->getShape().getRank() != 1) {
         IE_THROW() << errorPrefix << " gets incorrect index vector dimension! Index vector should be 1 dimension.";
     }
 
     if (keep_dims) {
-        if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() != getChildEdgeAt(0)->getDims().ndims())
+        if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() != getChildEdgeAt(0)->getShape().getRank())
             IE_THROW() << errorPrefix << " gets incorrect number of input/output dimensions!";
     } else {
         // In fact, after the Reduce operation, the shape must be a scalar if the previous one was 1d.
         // But for now, 0d tensor (scalar) is emulated as 1d tensor. Skip checking in such cases.
-        bool is_emulated_0d_as_1d = getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 1 && getChildEdgeAt(0)->getDims().ndims() == 1;
-        if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= getChildEdgeAt(0)->getDims().ndims() && !is_emulated_0d_as_1d)
+        bool is_emulated_0d_as_1d = getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 1 && getChildEdgeAt(0)->getShape().getRank() == 1;
+        if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() <= getChildEdgeAt(0)->getShape().getRank() && !is_emulated_0d_as_1d)
             IE_THROW() << errorPrefix << "gets incorrect number of input/output dimensions!";
     }
 }
@@ -1436,7 +1436,7 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() {
     Precision inputPrecision = getOriginalInputPrecisionAtPort(REDUCE_DATA);
     Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
 
-    jit_mode = (mayiuse(cpu::x64::sse41)) && getParentEdgeAt(REDUCE_DATA)->getDims().ndims() <= 5 &&
+    jit_mode = (mayiuse(cpu::x64::sse41)) && getParentEdgeAt(REDUCE_DATA)->getShape().getRank() <= 5 &&
                std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), inputPrecision) != std::end(supportedPrecisions) &&
                std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), outputPrecision) != std::end(supportedPrecisions);
 
@@ -1461,7 +1461,7 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() {
     src_data_size = MKLDNNExtensionUtils::sizeOfDataType(inputDataType);
     dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(outputDataType);
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     config.inConfs.resize(2);
     config.outConfs.resize(1);
@@ -1474,10 +1474,12 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() {
 
     auto pushDesc = [&](memory::format_tag inFormat, memory::format_tag outFormat, memory::data_type inDataType,
             memory::data_type outDataType, impl_desc_type impl_type) {
-        config.inConfs[REDUCE_DATA].desc = MKLDNNMemoryDesc(getParentEdgeAt(REDUCE_DATA)->getDims(), inDataType, inFormat);
-        config.inConfs[REDUCE_INDEXES].desc = MKLDNNMemoryDesc(getParentEdgeAt(REDUCE_INDEXES)->getDims(), memory::data_type::s32, memory::format_tag::x);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outDataType, outFormat);
-        supportedPrimitiveDescriptors.push_back({config, impl_type, outFormat});
+        config.inConfs[REDUCE_DATA].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims(),
+                                                                                       inDataType, inFormat);
+        config.inConfs[REDUCE_INDEXES].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(REDUCE_INDEXES)->getShape().getStaticDims(),
+                                                                            memory::data_type::s32, memory::format_tag::x);
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), outDataType, outFormat);
+        supportedPrimitiveDescriptors.push_back({config, impl_type});
     };
 
     if (jit_mode) {
@@ -1488,16 +1490,16 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() {
             impl_type = impl_desc_type::jit_avx2;
         }
 
-        pushDesc(MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(REDUCE_DATA)->getDims().ndims())),
-             MKLDNNMemory::GetPlainFormat(memory::dims(getChildEdgeAt(0)->getDims().ndims())), inputDataType, outputDataType, impl_type);
+        pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()),
+                 MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), inputDataType, outputDataType, impl_type);
         if (keep_dims) {
-            if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 4 && getParentEdgeAt(REDUCE_DATA)->getDims().ToSizeVector()[1] > 1) {
+            if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 4 && getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims()[1] > 1) {
                 if (mayiuse(cpu::x64::avx512_common)) {
                     pushDesc(memory::format_tag::nChw16c, memory::format_tag::nChw16c, inputDataType, outputDataType, impl_type);
                 } else if (mayiuse(cpu::x64::avx2) || mayiuse(cpu::x64::sse41)) {
                     pushDesc(memory::format_tag::nChw8c, memory::format_tag::nChw8c, inputDataType, outputDataType, impl_type);
                 }
-            } else if (getParentEdgeAt(REDUCE_DATA)->getDims().ndims() == 5 && getParentEdgeAt(REDUCE_DATA)->getDims().ToSizeVector()[1] > 1) {
+            } else if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 5 && getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims()[1] > 1) {
                 if (mayiuse(cpu::x64::avx512_common)) {
                     pushDesc(memory::format_tag::nCdhw16c, memory::format_tag::nCdhw16c, inputDataType, outputDataType, impl_type);
                 } else if (mayiuse(cpu::x64::avx2) || mayiuse(cpu::x64::sse41)) {
@@ -1506,8 +1508,8 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() {
             }
         }
     } else {
-        pushDesc(MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(REDUCE_DATA)->getDims().ndims())),
-                 MKLDNNMemory::GetPlainFormat(memory::dims(getChildEdgeAt(0)->getDims().ndims())),
+        pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()),
+                 MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()),
                  memory::data_type::f32, memory::data_type::f32, impl_desc_type::ref);
     }
 }
@@ -1524,11 +1526,11 @@ void MKLDNNReduceNode::createPrimitive() {
         IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor";
 
     auto selectedPD = getSelectedPrimitiveDescriptor();
-    planar_layout = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDesc().isPlainFormat();
+    planar_layout = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp);
 
     auto jcp = jit_reduce_config_params();
-    jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[REDUCE_DATA].desc.getPrecision());
-    jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc.getPrecision());
+    jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[REDUCE_DATA].desc->getPrecision());
+    jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].desc->getPrecision());
     jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt);
     jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt);
     jcp.planar_layout = planar_layout;
@@ -1564,8 +1566,8 @@ void MKLDNNReduceNode::execute(mkldnn::stream strm) {
 
     const auto idx_data = reinterpret_cast<const int32_t *>(srcIndexesMemPtr->GetData());
     size_t dst_size = dstMemPtr->GetSize();
-    src_dims = getParentEdgeAt(REDUCE_DATA)->getDesc().getDims();
-    src_strides = getParentEdgeAt(REDUCE_DATA)->getDesc().getBlockingDesc().getStrides();
+    src_dims = getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims();
+    src_strides = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
     dims_size = src_dims.size();
     calc_process_dst_dims(idx_data);
 
@@ -1930,9 +1932,9 @@ inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) {
 
 inline void MKLDNNReduceNode::calc_process_dst_dims(const int32_t *idx_data) {
     SizeVector out_dims;
-    SizeVector dst_dims = getChildEdgeAt(0)->getDesc().getDims();
+    SizeVector dst_dims = getChildEdgeAt(0)->getShape().getStaticDims();
     std::set<size_t> axes;
-    for (size_t i = 0; i < getParentEdgeAt(REDUCE_INDEXES)->getDims()[0]; i++) {
+    for (size_t i = 0; i < getParentEdgeAt(REDUCE_INDEXES)->getShape().getStaticDims()[0]; i++) {
         int32_t axis = idx_data[i];
         if (axis < 0)
             axis += src_dims.size();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp
index e4950732ab6..f7ddad8b679 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp
@@ -6,6 +6,7 @@
 #include <ie_ngraph_utils.hpp>
 #include <mkldnn_extension_utils.h>
 #include <ngraph/runtime/host_tensor.hpp>
+#include "common/blocked_desc_creator.h"
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -28,45 +29,32 @@ void MKLDNNReferenceNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    InferenceEngine::LayerConfig config;
-    for (size_t i = 0; i < inDims.size(); i++) {
-        InferenceEngine::DataConfig dataConfig;
-        dataConfig.inPlace = -1;
-        dataConfig.constant = false;
-
-        dataConfig.desc = MKLDNNMemoryDesc(inDims[i],
-                MKLDNNExtensionUtils::IEPrecisionToDataType(convertPrecision(ngraphOp->get_input_element_type(i))),
-                MKLDNNMemory::GetPlainFormat(inDims[i]));
-
-        config.inConfs.push_back(dataConfig);
+    std::vector<PortConfigurator> inputConfigurators;
+    inputConfigurators.reserve(inputShapes.size());
+    for (size_t i = 0; i < inputShapes.size(); i++) {
+        inputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ngraphOp->get_input_element_type(i)), inputShapes[i]);
     }
 
-    for (size_t i = 0; i < outDims.size(); i++) {
-        InferenceEngine::DataConfig dataConfig;
-        dataConfig.inPlace = -1;
-        dataConfig.constant = false;
-
-        dataConfig.desc = MKLDNNMemoryDesc(outDims[i],
-                MKLDNNExtensionUtils::IEPrecisionToDataType(convertPrecision(ngraphOp->get_output_element_type(i))),
-                MKLDNNMemory::GetPlainFormat(outDims[i]));
-
-        config.outConfs.push_back(dataConfig);
+    std::vector<PortConfigurator> outputConfigurators;
+    outputConfigurators.reserve(inputShapes.size());
+    for (size_t i = 0; i < outputShapes.size(); i++) {
+        outputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ngraphOp->get_output_element_type(i)), outputShapes[i]);
     }
 
-    supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, memory::format_tag::undef});
+    addSupportedPrimDesc(inputConfigurators, outputConfigurators, impl_desc_type::ref);
 }
 
 void MKLDNNReferenceNode::createPrimitive() {}
 
 void MKLDNNReferenceNode::execute(mkldnn::stream strm) {
     ngraph::HostTensorVector inputs;
-    for (size_t i = 0; i < inDims.size(); i++) {
+    for (size_t i = 0; i < inputShapes.size(); i++) {
         void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().GetPtr();
         inputs.push_back(std::make_shared<ngraph::HostTensor>(ngraphOp->get_input_element_type(i), ngraphOp->get_input_shape(i), srcDataPtr));
     }
 
     ngraph::HostTensorVector outputs;
-    for (size_t i = 0; i < outDims.size(); i++) {
+    for (size_t i = 0; i < outputShapes.size(); i++) {
         void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().GetPtr();
         outputs.push_back(std::make_shared<ngraph::HostTensor>(ngraphOp->get_output_element_type(i), ngraphOp->get_output_shape(i), dstDataPtr));
     }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp
index af1159bb07d..c140baa88c5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp
@@ -8,7 +8,7 @@
 #include <mkldnn_types.h>
 #include "ie_parallel.hpp"
 #include "mkldnn_region_yolo_node.h"
-#include <nodes/common/tensor_desc_creator.h>
+#include <nodes/common/blocked_desc_creator.h>
 #include <ngraph/opsets/opset1.hpp>
 #include "common/cpu_convert.h"
 #include <cpu/x64/jit_generator.hpp>
@@ -291,8 +291,8 @@ void MKLDNNRegionYoloNode::initSupportedPrimitiveDescriptors() {
         impl_type = impl_desc_type::ref;
     }
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, input_prec}},
-                         {{TensorDescCreatorTypes::ncsp, output_prec}},
+    addSupportedPrimDesc({{LayoutType::ncsp, input_prec}},
+                         {{LayoutType::ncsp, output_prec}},
                          impl_type);
 }
 
@@ -367,13 +367,10 @@ inline void MKLDNNRegionYoloNode::calculate_logistic(size_t start_index, int cou
 }
 
 void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) {
-    auto inputDesc = getParentEdgeAt(0)->getDesc();
-    auto outputDesc = getChildEdgeAt(0)->getDesc();
-
-    size_t B = (inputDesc.getDims().size() > 0) ? inputDesc.getDims()[0] : 1;
-    size_t IC = (inputDesc.getDims().size() > 1) ? inputDesc.getDims()[1] : 1;
-    size_t IH = (inputDesc.getDims().size() > 2) ? inputDesc.getDims()[2] : 1;
-    size_t IW = (inputDesc.getDims().size() > 3) ? inputDesc.getDims()[3] : 1;
+    size_t B =  (getParentEdgeAt(0)->getShape().getRank() > 0) ? getParentEdgeAt(0)->getShape().getStaticDims()[0] : 1;
+    size_t IC = (getParentEdgeAt(0)->getShape().getRank() > 1) ? getParentEdgeAt(0)->getShape().getStaticDims()[1] : 1;
+    size_t IH = (getParentEdgeAt(0)->getShape().getRank() > 2) ? getParentEdgeAt(0)->getShape().getStaticDims()[2] : 1;
+    size_t IW = (getParentEdgeAt(0)->getShape().getRank() > 3) ? getParentEdgeAt(0)->getShape().getStaticDims()[3] : 1;
 
     size_t mask_size = mask.size();
     int end_index = 0;
@@ -400,7 +397,8 @@ void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) {
     const auto *src_data = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     auto *dst_data = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
 
-    cpu_convert(src_data, dst_data, inputDesc.getPrecision(), outputDesc.getPrecision(), output_size);
+    cpu_convert(src_data, dst_data, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(),
+                getChildEdgeAt(0)->getMemory().GetDesc().getPrecision(), output_size);
 
     for (int b = 0; b < B; b++) {
         for (int n = 0; n < num_; n++) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
index c318468ef1e..99bd606a9a0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
@@ -25,10 +25,6 @@ MKLDNNReorderNode::MKLDNNReorderNode(const std::string& name, const mkldnn::engi
         MKLDNNNode("Reorder", name, eng, w_cache) {
 }
 void MKLDNNReorderNode::getSupportedDescriptors() {
-    if (outDims.empty() && output.getLayout() != InferenceEngine::Layout::ANY)
-        outDims.push_back(MKLDNNDims(output.getDims()));
-    if (inDims.empty() && input.getLayout() != InferenceEngine::Layout::ANY)
-        inDims.push_back(MKLDNNDims(input.getDims()));
     if (getParentEdges().size() != 1)
         IE_THROW() << "Incorrect number of input edges for layer " << getName();
     if (getChildEdges().empty())
@@ -39,13 +35,10 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    auto inputDataType = MKLDNNMemoryDesc(input).getDataType();
-    auto outputDataType = MKLDNNMemoryDesc(output).getDataType();
-
     auto parent = getParentEdgeAt(0)->getParent();
     auto child = getChildEdgeAt(0)->getChild();
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = true;
     config.inConfs.resize(1);
     config.outConfs.resize(1);
@@ -57,19 +50,18 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() {
         config.inConfs[0].inPlace = 0;
         config.outConfs[0].inPlace = 0;
     }
-    if (input.getLayout() != InferenceEngine::Layout::ANY && output.getLayout() != InferenceEngine::Layout::ANY) {
-        config.inConfs[0].desc = input;
-        config.outConfs[0].desc = output;
+    if (input && output) {
+        config.inConfs[0].desc = input->clone();
+        config.outConfs[0].desc = output->clone();
     } else if (parent->getSelectedPrimitiveDescriptor() != nullptr &&
                child->getSelectedPrimitiveDescriptor() != nullptr) {
-        config.inConfs[0].desc = parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc;
-        config.outConfs[0].desc = child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc;
+        config.inConfs[0].desc = parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc->clone();
+        config.outConfs[0].desc = child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->clone();
     } else {
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::any);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::any);
+        IE_THROW() << "Cannot initialize supported PDs for Reorder node with name `" << getName() << "`";
     }
 
-    supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::reorder, MKLDNNMemory::Convert(config.outConfs[0].desc.getLayout()));
+    supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::reorder);
 }
 
 void MKLDNNReorderNode::createPrimitive() {
@@ -82,21 +74,23 @@ void MKLDNNReorderNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         IE_THROW() << "Preferable primitive descriptor is not set.";
 
+    auto inDims = getParentEdgeAt(0)->getShape().getStaticDims();
+
     if (!isOptimized) {
-        if (MKLDNNPlugin::one_of(getParentEdgeAt(0)->getDims().ndims(), 4, 5) &&
-                getParentEdgeAt(0)->getDims()[1] <= 64 &&
-                getParentEdgeAt(0)->getDims()[1] >= 16 &&
-                (getParentEdgeAt(0)->getMemory().GetElementsCount() / getParentEdgeAt(0)->getDims()[1]) >= 128 &&
-                getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat() &&
-                getChildEdgeAt(0)->getMemory().GetDesc().isPlainFormat() &&
-                getParentEdgeAt(0)->getMemory().GetDesc().getDataType() == memory::data_type::f32 &&
-                getChildEdgeAt(0)->getMemory().GetDesc().getDataType() == memory::data_type::f32) {
+        if (MKLDNNPlugin::one_of(inDims.size(), 4, 5) &&
+                inDims[1] <= 64 &&
+                inDims[1] >= 16 &&
+                (getParentEdgeAt(0)->getMemory().GetElementsCount() / inDims[1]) >= 128 &&
+                getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc) &&
+                getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) &&
+                getParentEdgeAt(0)->getMemory().GetDesc().getPrecision() == Precision::FP32 &&
+                getChildEdgeAt(0)->getMemory().GetDesc().getPrecision() == Precision::FP32) {
             // oneDNN JIT reorder shows bad perf for nspc to ncsp reorder case so we fallback on simple c++ implementation
             canUseOptimizedNspc2Ncsp = true;
         } else if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) &&
-                   MKLDNNPlugin::one_of(getParentEdgeAt(0)->getDims().ndims(), 4, 5) &&
-                   getParentEdgeAt(0)->getMemory().GetDesc().isPlainFormat() &&
-                   getChildEdgeAt(0)->getMemory().GetDesc().isTailCFormat() &&
+                   MKLDNNPlugin::one_of(inDims.size(), 4, 5) &&
+                   getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) &&
+                   getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc) &&
                    getParentEdgeAt(0)->getMemory().GetDataType() == getChildEdgeAt(0)->getMemory().GetDataType() &&
                    MKLDNNExtensionUtils::sizeOfDataType(getParentEdgeAt(0)->getMemory().GetDataType()) == 1) {
             // oneDNN doesn't provide JIT reorder impl for non-avx2 targets so we fallback on simple c++ implementation which shows better perf
@@ -110,29 +104,12 @@ void MKLDNNReorderNode::createPrimitive() {
 
 void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr) {
     src_blocked = std::make_shared<MKLDNNMemory>(getEngine());
-    src_blocked->Create(srcDesc, srcPtr, false);
+    src_blocked->Create(MKLDNNMemoryDesc(srcDesc), srcPtr, false);
 
     dst_blocked = std::make_shared<MKLDNNMemory>(getEngine());
-    dst_blocked->Create(dstDesc, dstPtr, false);
+    dst_blocked->Create(MKLDNNMemoryDesc(dstDesc), dstPtr, false);
 
     mkldnn::primitive_attr attr;
-
-    if (_scales) {
-        std::vector<float> scales;
-
-        float* scaleData = static_cast<float*>(_scales->buffer());
-
-        for (size_t i = 0; i < _scales->size(); i++) {
-            scales.push_back(scaleData[i]);
-        }
-
-        int mask = 0;
-        int oc_dim_id = 1;
-        mask = 1 << oc_dim_id;
-
-        attr.set_output_scales(mask, scales);
-    }
-
     auto createReorder = [&]() -> bool {
         // No autoblocking. Reorder can be applied as is
         reorder::primitive_desc pd = mkldnn::reorder::primitive_desc(src_blocked->GetPrimitive(), dst_blocked->GetPrimitive(), attr, true);
@@ -159,13 +136,13 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDe
         // MKLDNN doesn't support direct reorders from planar data formats to grouped weights formats.
         // Code block below tries to detect such cases and reinterpret data planar formats (e.g. nchw)
         // as grouped weights planar formats (e.g. goihw) since they have same physical memory layout.
-        if (src_blocked->GetDesc().isPlainFormat() &&
+        if (src_blocked->GetDesc().hasLayoutType(LayoutType::ncsp) &&
             src_blocked->GetDims().size() + 1 == dst_blocked->GetDims().size()) {
             const auto newDims = dst_blocked->GetDims();
-            const auto newFormat = MKLDNNMemory::GetPlainFormat(newDims);
+            const auto newFormat = MKLDNNMemory::GetPlainFormatByRank(newDims.size());
 
             auto newDesc = mkldnn::memory::desc(newDims, src_blocked->GetDataType(), newFormat);
-            src_blocked->Create(newDesc, srcPtr, false);
+            src_blocked->Create(MKLDNNMemoryDesc(newDesc), srcPtr, false);
 
             success = createReorder();
         }
@@ -192,12 +169,14 @@ bool MKLDNNReorderNode::created() const {
 void MKLDNNReorderNode::optimizedNcsp2Nspc() {
     auto parentEdge = getParentEdgeAt(0);
     auto childEdge = getChildEdgeAt(0);
-    const int ndims = parentEdge->getDims().ndims();
-    const size_t DIM0 = parentEdge->getDims()[0];
-    const size_t DIM1 = parentEdge->getDims()[1];
-    const size_t DIM2 = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1;
-    const size_t DIM3 = parentEdge->getDims()[ndims - 2];
-    const size_t DIM4 = parentEdge->getDims()[ndims - 1];
+
+    auto inDims = parentEdge->getShape().getStaticDims();
+    const size_t ndims = inDims.size();
+    const size_t DIM0 = inDims[0];
+    const size_t DIM1 = inDims[1];
+    const size_t DIM2 = ndims == 5 ? inDims[ndims - 3] : 1;
+    const size_t DIM3 = inDims[ndims - 2];
+    const size_t DIM4 = inDims[ndims - 1];
 
     auto src_data = reinterpret_cast<const uint8_t *>(parentEdge->getMemoryPtr()->GetPtr());
     auto dst_data = reinterpret_cast<uint8_t *>(childEdge->getMemoryPtr()->GetPtr());
@@ -221,12 +200,14 @@ void MKLDNNReorderNode::optimizedNcsp2Nspc() {
 void MKLDNNReorderNode::optimizedNspc2Ncsp() {
     auto parentEdge = getParentEdgeAt(0);
     auto childEdge = getChildEdgeAt(0);
-    const int ndims = parentEdge->getDims().ndims();
-    const size_t DIM0 = parentEdge->getDims()[0];
-    const size_t DIM1 = parentEdge->getDims()[1];
-    const size_t DIM2 = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1;
-    const size_t DIM3 = parentEdge->getDims()[ndims - 2];
-    const size_t DIM4 = parentEdge->getDims()[ndims - 1];
+
+    auto inDims = parentEdge->getShape().getStaticDims();
+    const size_t ndims = inDims.size();
+    const size_t DIM0 = inDims[0];
+    const size_t DIM1 = inDims[1];
+    const size_t DIM2 = ndims == 5 ? inDims[ndims - 3] : 1;
+    const size_t DIM3 = inDims[ndims - 2];
+    const size_t DIM4 = inDims[ndims - 1];
 
     auto src_data = reinterpret_cast<const float *>(parentEdge->getMemoryPtr()->GetPtr());
     auto dst_data = reinterpret_cast<float *>(childEdge->getMemoryPtr()->GetPtr());
@@ -279,4 +260,20 @@ void MKLDNNReorderNode::setDynamicBatchLim(int lim) {
         createReorderPrimitive(src_d, src_data_hdl, dst_d, dst_data_hdl);
     }
 }
+
+std::string MKLDNNReorderNode::getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc) {
+    std::string inArgs, outArgs;
+    if (parentDesc.getPrecision() != childDesc.getPrecision()) {
+        inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
+        outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
+    }
+    auto formatSrc = parentDesc.serializeFormat();
+    auto formatDst = childDesc.serializeFormat();
+    if (formatSrc != formatDst || one_of(std::string("undef"), formatSrc, formatDst)) {
+        inArgs += (inArgs.empty() ? "" : "_") + formatSrc;
+        outArgs += (outArgs.empty() ? "" : "_") + formatDst;
+    }
+    return inArgs + "_" + outArgs;
+}
+
 REG_MKLDNN_PRIM_FOR(MKLDNNReorderNode, Reorder);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
index 729097453fb..da821878035 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
@@ -9,6 +9,7 @@
 #include <string>
 #include <memory>
 #include <vector>
+#include <utils/general_utils.h>
 
 namespace MKLDNNPlugin {
 
@@ -24,9 +25,14 @@ public:
     bool created() const override;
     const std::vector<impl_desc_type>& getPrimitivesPriority() override;
 
-    void setDescs(const InferenceEngine::TensorDesc& input, const InferenceEngine::TensorDesc& output) {
-        this->input = input;
-        this->output = output;
+    void setDescs(const MemoryDesc& input, const MemoryDesc& output) {
+        this->input = input.clone();
+        inputShapes.clear();
+        inputShapes.push_back(this->input->getShape());
+
+        this->output = output.clone();
+        outputShapes.clear();
+        outputShapes.push_back(this->output->getShape());
     }
 
     void setOptimized(bool isOptimized) {
@@ -39,17 +45,14 @@ public:
         return false;
     }
 
-    const InferenceEngine::TensorDesc& getInput() { return input; }
-    const InferenceEngine::TensorDesc& getOutput() { return output; }
+    const MemoryDesc& getInput() { return *input; }
+    const MemoryDesc& getOutput() { return *output; }
 
-    /**
-     * @brief A pointer to a scales blob
-     */
-    InferenceEngine::Blob::Ptr _scales;
+    static std::string getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc);
 
 private:
-    InferenceEngine::TensorDesc input;
-    InferenceEngine::TensorDesc output;
+    std::unique_ptr<MemoryDesc> input;
+    std::unique_ptr<MemoryDesc> output;
 
     MKLDNNMemoryPtr dst_blocked;
     MKLDNNMemoryPtr src_blocked;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp
index 3db7470e92f..48e2eaf9992 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "base.hpp"
-
 #include <string>
 
 #include <ngraph/opsets/opset2.hpp>
@@ -48,8 +46,8 @@ void MKLDNNReorgYoloNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32}},
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}},
+                         {{LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
@@ -57,10 +55,10 @@ void MKLDNNReorgYoloNode::execute(mkldnn::stream strm) {
     const auto *src_data = reinterpret_cast<const float *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     auto *dst_data = reinterpret_cast<float *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
 
-    int IW = (getParentEdgeAt(0)->getDesc().getDims().size() > 3) ? getParentEdgeAt(0)->getDims()[3] : 1;
-    int IH = (getParentEdgeAt(0)->getDesc().getDims().size() > 2) ? getParentEdgeAt(0)->getDims()[2] : 1;
-    int IC = (getParentEdgeAt(0)->getDesc().getDims().size() > 1) ? getParentEdgeAt(0)->getDims()[1] : 1;
-    int B  = (getParentEdgeAt(0)->getDesc().getDims().size() > 0) ? getParentEdgeAt(0)->getDims()[0] : 1;
+    int IW = (getParentEdgeAt(0)->getShape().getRank() > 3) ? getParentEdgeAt(0)->getShape().getStaticDims()[3] : 1;
+    int IH = (getParentEdgeAt(0)->getShape().getRank() > 2) ? getParentEdgeAt(0)->getShape().getStaticDims()[2] : 1;
+    int IC = (getParentEdgeAt(0)->getShape().getRank() > 1) ? getParentEdgeAt(0)->getShape().getStaticDims()[1] : 1;
+    int B  = (getParentEdgeAt(0)->getShape().getRank() > 0) ? getParentEdgeAt(0)->getShape().getStaticDims()[0] : 1;
 
     int ic_off = IC / (stride * stride);
     int ih_off = IH * stride;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
index 543e0a86bcb..81175dcaf41 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
@@ -35,18 +35,18 @@ void MKLDNNReshapeNode::initSupportedPrimitiveDescriptors() {
     if (inputDataType != outputDataType)
         inputDataType = outputDataType;
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = true;
     config.inConfs.resize(getParentEdges().size());
     for (size_t i = 0; i <getParentEdges().size(); i++) {
         config.inConfs[i].inPlace = -1;
         config.inConfs[i].constant = false;
-        config.inConfs[i].desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType);
+        config.inConfs[i].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(i)->getShape().getStaticDims(), inputDataType);
     }
     config.outConfs.resize(1);
     config.outConfs[0].inPlace = 0;
     config.outConfs[0].constant = false;
-    config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType);
+    config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType);
     supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp
index 5f6e6083e90..ffa831a670d 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "base.hpp"
-
 #include <string>
 #include <vector>
 
@@ -85,9 +83,9 @@ void MKLDNNReverseSequenceNode::initSupportedPrimitiveDescriptors() {
     if (lengthsPrecision != Precision::I32 && lengthsPrecision != Precision::FP32)
         lengthsPrecision = Precision::I32;
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, lengthsPrecision}},
-                         {{TensorDescCreatorTypes::ncsp, Precision::FP32}},
+    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, lengthsPrecision}},
+                         {{LayoutType::ncsp, Precision::FP32}},
                          impl_desc_type::ref_any);
 }
 
@@ -96,7 +94,7 @@ void MKLDNNReverseSequenceNode::execute(mkldnn::stream strm) {
     const float *src_data = reinterpret_cast<const float *>(getParentEdgeAt(REVERSESEQUENCE_DATA)->getMemoryPtr()->GetPtr());
     float* dst_data = reinterpret_cast<float *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
 
-    switch (getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getDesc().getPrecision()) {
+    switch (getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().GetDesc().getPrecision()) {
         case Precision::FP32: {
             float *seq_lengths_data = reinterpret_cast<float *>(getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemoryPtr()->GetPtr());
             for (i = 0; i < src_dims[batch_axis]; i++) {
@@ -171,7 +169,7 @@ void MKLDNNReverseSequenceNode::execute(mkldnn::stream strm) {
         break;
         default:
             IE_THROW() << "ReverseSequence layer does not support "
-                        << getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getDesc().getPrecision()  << " precision";
+                        << getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().GetDesc().getPrecision()  << " precision";
     }
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
index a85544e9e96..91201da8592 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
@@ -260,19 +260,19 @@ void MKLDNNRNN::initCell(const std::shared_ptr<ngraph::Node>& op) {
     Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1;
 
     // Expected shapes
-    MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC};
+    std::vector<size_t> D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC};
 
-    if (in_data_dims != D_shape.ToSizeVector()
-        || in_h_state_dims != S_shape.ToSizeVector()
-        || out_h_state_dims != S_shape.ToSizeVector())
+    if (in_data_dims != D_shape
+        || in_h_state_dims != S_shape
+        || out_h_state_dims != S_shape)
         IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
 
     if (S == 2) {
         auto in_c_state_dims = op->get_input_shape(2);
         auto out_c_state_dims = op->get_output_shape(1);
 
-        if (in_c_state_dims != S_shape.ToSizeVector()
-            || out_c_state_dims != S_shape.ToSizeVector())
+        if (in_c_state_dims != S_shape
+            || out_c_state_dims != S_shape)
             IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
     }
 }
@@ -281,52 +281,57 @@ void MKLDNNRNN::fillCellDesc() {
     runtimePrecision = getOriginalInputPrecisionAtPort(0);
     auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision);
 
-    MKLDNNDims S_4D_shape {L, D, N, SC};
+    std::vector<size_t> S_4D_shape {L, D, N, SC};
 
     // layer input plus states
-    in_data_d.resize(S + 1);
-    out_data_d.resize(S + 1);
+    in_data_d.reserve(S + 1);
+    out_data_d.reserve(S + 1);
 
     // Shapes and Attributes are correct. Can start internal stuff initialization.
-    in_data_d[RNNInOutKind::Layer]  = {MKLDNNDims{T, N, DC}, dataType, memory::format_tag::tnc};
-    out_data_d[RNNInOutKind::Layer] = {MKLDNNDims{T, N, SC}, dataType, memory::format_tag::tnc};
+    in_data_d.emplace_back(std::vector<size_t>{T, N, DC}, dataType, memory::format_tag::tnc);
+    out_data_d.emplace_back(std::vector<size_t>{T, N, SC}, dataType, memory::format_tag::tnc);
 
-    in_data_d[RNNInOutKind::HiddenState]  = {S_4D_shape, dataType, memory::format_tag::ldnc};
-    out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
+    in_data_d.emplace_back(S_4D_shape, dataType, memory::format_tag::ldnc);
+    out_data_d.emplace_back(S_4D_shape, dataType, memory::format_tag::ldnc);
 
     if (haveCellState(cell_type)) {
-        in_data_d[RNNInOutKind::CellState] =  {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
-        out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
+        in_data_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc);
+        out_data_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc);
     }
 
-    w_data_d   = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo};
-    w_state_d  = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo};
+    w_data_d   = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(std::vector<size_t>{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo);
+    w_state_d  = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(std::vector<size_t>{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo);
 
     // Add 5th input
-    w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
+    w_bias_d = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(std::vector<size_t>{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo);
 
     copyWeightsData();
 
     // Expected shapes
-    MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, WShape {SC * G, DC}, RShape {SC * G, SC}, BShape {SC * Gb};
-    std::vector<TensorDesc> in_candidate, out_candidate;
+    std::vector<size_t> D_shape {N, DC}, S_shape {N, SC}, WShape {SC * G, DC}, RShape {SC * G, SC}, BShape {SC * Gb};
+    std::vector<MKLDNNMemoryDesc> in_candidate, out_candidate;
     in_candidate.reserve(6);
 
-    in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, dataType, memory::format_tag::nc});
-    in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc});
-    out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc});
+    in_candidate.emplace_back(D_shape, dataType, memory::format_tag::nc);
+    in_candidate.emplace_back(S_shape, dataType, memory::format_tag::nc);
+    out_candidate.emplace_back(S_shape, dataType, memory::format_tag::nc);
 
     if (haveCellState(cell_type)) {
-        in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
-        out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
+        in_candidate.emplace_back(S_shape, memory::data_type::f32, memory::format_tag::nc);
+        out_candidate.emplace_back(S_shape, memory::data_type::f32, memory::format_tag::nc);
     }
     if (one_of(cell_type, mkldnn::algorithm::vanilla_rnn, mkldnn::algorithm::vanilla_gru, mkldnn::algorithm::lbr_gru, mkldnn::algorithm::vanilla_lstm)) {
-        in_candidate.emplace_back(MKLDNNMemoryDesc {WShape, memory::data_type::f32, memory::format_tag::nc});
-        in_candidate.emplace_back(MKLDNNMemoryDesc {RShape, memory::data_type::f32, memory::format_tag::nc});
-        in_candidate.emplace_back(MKLDNNMemoryDesc {BShape, memory::data_type::f32, memory::format_tag::x});
+        in_candidate.emplace_back(WShape, memory::data_type::f32, memory::format_tag::nc);
+        in_candidate.emplace_back(RShape, memory::data_type::f32, memory::format_tag::nc);
+        in_candidate.emplace_back(BShape, memory::data_type::f32, memory::format_tag::x);
     }
 
-    createDescriptor(in_candidate, out_candidate);
+    std::vector<const MemoryDesc*> in_candidate_ptrs(in_candidate.size());
+    std::vector<const MemoryDesc*> out_candidate_ptrs(out_candidate.size());
+    std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; });
+    std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; });
+
+    createDescriptor(in_candidate_ptrs, out_candidate_ptrs);
 }
 
 void MKLDNNRNN::initSeq(const std::shared_ptr<ngraph::Node>& op) {
@@ -373,64 +378,71 @@ void MKLDNNRNN::initSeq(const std::shared_ptr<ngraph::Node>& op) {
     Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1;
 
     // layer input plus states
-    in_data_d.resize(S + 1);
-    out_data_d.resize(S + 1);
+    in_data_d.reserve(S + 1);
+    out_data_d.reserve(S + 1);
 }
 
 void MKLDNNRNN::fillSeqDesc() {
     runtimePrecision = getOriginalInputPrecisionAtPort(0);
     auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision);
 
-    MKLDNNDims S_4D_shape {L, D, N, SC};
+    std::vector<size_t> S_4D_shape {L, D, N, SC};
 
     // Try to create descriptor and corresponding configuration
-    in_data_d[RNNInOutKind::Layer]  = {MKLDNNDims{in_data_dims},  dataType, memory::format_tag::tnc};
-    out_data_d[RNNInOutKind::Layer] = {MKLDNNDims{out_data_dims}, dataType, memory::format_tag::tnc};
+    in_data_d.emplace_back(std::vector<size_t>{in_data_dims},  dataType, memory::format_tag::tnc);
+    out_data_d.emplace_back(std::vector<size_t>{out_data_dims}, dataType, memory::format_tag::tnc);
 
-    in_data_d[RNNInOutKind::HiddenState]  = {MKLDNNDims{S_4D_shape}, dataType, memory::format_tag::ldnc};
-    out_data_d[RNNInOutKind::HiddenState] = {MKLDNNDims{S_4D_shape}, dataType, memory::format_tag::ldnc};
+    in_data_d.emplace_back(std::vector<size_t>{S_4D_shape}, dataType, memory::format_tag::ldnc);
+    out_data_d.emplace_back(std::vector<size_t>{S_4D_shape}, dataType, memory::format_tag::ldnc);
 
     if (haveCellState(cell_type)) {
-        in_data_d[RNNInOutKind::CellState] = {MKLDNNDims{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc};
-        out_data_d[RNNInOutKind::CellState] = {MKLDNNDims{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc};
+        in_data_d.emplace_back(std::vector<size_t>{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc);
+        out_data_d.emplace_back(std::vector<size_t>{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc);
     }
 
-    w_data_d  = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo};
-    w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo};
+    w_data_d  = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(std::vector<size_t>{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo);
+    w_state_d = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(std::vector<size_t>{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo);
 
-    w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
+    w_bias_d = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(std::vector<size_t>{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo);
 
     copyWeightsData();
 
-    std::vector<TensorDesc> in_candidate;
+    std::vector<MKLDNNMemoryDesc> in_candidate;
+    in_candidate.reserve(7);
 
     if (nativeOrder)
-        in_candidate.push_back(MKLDNNMemoryDesc{inDims[RNNInOutKind::Layer], dataType, memory::format_tag::tnc});
+        in_candidate.emplace_back(inputShapes[RNNInOutKind::Layer].getStaticDims(), dataType, memory::format_tag::tnc);
     else
-        in_candidate.push_back(MKLDNNMemoryDesc{{N, T, DC}, dataType, memory::format_tag::ntc});
+        in_candidate.emplace_back(std::vector<size_t>{N, T, DC}, dataType, memory::format_tag::ntc);
 
-    in_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, dataType, memory::format_tag::ntc}); // initial hidden state
+    in_candidate.emplace_back(std::vector<size_t>{N, D, SC}, dataType, memory::format_tag::ntc); // initial hidden state
     if (haveCellState(cell_type))
-        in_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc}); // initial cell state
-    in_candidate.push_back(MKLDNNMemoryDesc{{N}, memory::data_type::s32, memory::format_tag::x}); // sequence lengths
-    in_candidate.push_back(MKLDNNMemoryDesc{{D, G * SC, DC}, memory::data_type::f32, memory::format_tag::ntc}); // W
-    in_candidate.push_back(MKLDNNMemoryDesc{{D, G * SC, SC}, memory::data_type::f32, memory::format_tag::ntc}); // R
-    in_candidate.push_back(MKLDNNMemoryDesc{{D, Gb * SC}, memory::data_type::f32, memory::format_tag::nc}); // B
+        in_candidate.emplace_back(std::vector<size_t>{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc); // initial cell state
+    in_candidate.emplace_back(std::vector<size_t>{N}, memory::data_type::s32, memory::format_tag::x); // sequence lengths
+    in_candidate.emplace_back(std::vector<size_t>{D, G * SC, DC}, memory::data_type::f32, memory::format_tag::ntc); // W
+    in_candidate.emplace_back(std::vector<size_t>{D, G * SC, SC}, memory::data_type::f32, memory::format_tag::ntc); // R
+    in_candidate.emplace_back(std::vector<size_t>{D, Gb * SC}, memory::data_type::f32, memory::format_tag::nc); // B
 
-    std::vector<TensorDesc> out_candidate;
+    std::vector<MKLDNNMemoryDesc> out_candidate;
+    out_candidate.reserve(3);
 
     if (nativeOrder) {
-        out_candidate.push_back(out_data_d[RNNInOutKind::Layer]);
+        out_candidate.emplace_back(out_data_d[RNNInOutKind::Layer]);
     } else {
         // TODO reorder ntc -> ndtc does not work, thus use tnc(plain) + transformation reshape-transpose-reshape for now.
-        out_candidate.push_back(MKLDNNMemoryDesc{{T, N, SC}, dataType, memory::format_tag::tnc});
+        out_candidate.emplace_back(std::vector<size_t>{T, N, SC}, dataType, memory::format_tag::tnc);
     }
 
-    out_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, dataType, memory::format_tag::ntc});
+    out_candidate.emplace_back(std::vector<size_t>{N, D, SC}, dataType, memory::format_tag::ntc);
     if (haveCellState(cell_type))
-        out_candidate.push_back(MKLDNNMemoryDesc{{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc});
+        out_candidate.emplace_back(std::vector<size_t>{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc);
 
-    createDescriptor(in_candidate, out_candidate);
+    std::vector<const MemoryDesc*> in_candidate_ptrs(in_candidate.size());
+    std::vector<const MemoryDesc*> out_candidate_ptrs(out_candidate.size());
+    std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; });
+    std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; });
+
+    createDescriptor(in_candidate_ptrs, out_candidate_ptrs);
 }
 
 bool MKLDNNRNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) {
@@ -447,14 +459,14 @@ void MKLDNNRNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t
     }
     // create weight blobs (data and state part)
     auto w_data_mem = std::make_shared<MKLDNNMemory>(getEngine());
-    w_data_mem->Create(w_data_d);
+    w_data_mem->Create(*w_data_d);
     internalBlobMemory.push_back(w_data_mem);
     auto w_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
-    w_state_mem->Create(w_state_d);
+    w_state_mem->Create(*w_state_d);
     internalBlobMemory.push_back(w_state_mem);
 
-    const size_t ie_w_vec_size = getParentEdgesAtPort(wIdx)[0]->getDims().size();
-    const size_t ie_r_vec_size = getParentEdgesAtPort(rIdx)[0]->getDims().size();
+    const size_t ie_w_vec_size = getParentEdgesAtPort(wIdx)[0]->getShape().getElementsCount();
+    const size_t ie_r_vec_size = getParentEdgesAtPort(rIdx)[0]->getShape().getElementsCount();
 
     auto *wInputNode = dynamic_cast<MKLDNNInputNode *>(getParentEdgesAtPort(wIdx)[0]->getParent().get());
     auto wConstBlob = wInputNode->getMemoryPtr();
@@ -504,7 +516,7 @@ void MKLDNNRNN::fillBiases(const int *gate_map) {
     }
 
     auto w_bias_mem = std::make_shared<MKLDNNMemory>(getEngine());
-    w_bias_mem->Create(w_bias_d);
+    w_bias_mem->Create(*w_bias_d);
     internalBlobMemory.push_back(w_bias_mem);
 
     auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(getParentEdgesAtPort(bIdx)[0]->getParent().get());
@@ -590,18 +602,17 @@ void MKLDNNRNN::copyWeightsData() {
     if (runtimePrecision == Precision::BF16 || runtimePrecision == Precision::FP32)
         fillBiases<Precision::FP32>(gate_map);
 }
-
-void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
-                                 const std::vector<TensorDesc> &outputDesc) {
+void MKLDNNRNN::createDescriptor(const std::vector<const MemoryDesc*> &inputDesc,
+                                 const std::vector<const MemoryDesc*> &outputDesc) {
     switch (cell_type) {
         case mkldnn::algorithm::vanilla_rnn: {
             MKLDNNDescriptor desc(std::shared_ptr<vanilla_rnn_forward::desc>(
                     new vanilla_rnn_forward::desc(prop_kind::forward_scoring, cell_act, direction,
                             /* In Data       */ in_data_d[RNNInOutKind::Layer],
                             /* In State      */ in_data_d[RNNInOutKind::HiddenState],
-                            /* Weights data  */ w_data_d,
-                            /* Weights state */ w_state_d,
-                            /* Bias          */ w_bias_d,
+                            /* Weights data  */ *w_data_d,
+                            /* Weights state */ *w_state_d,
+                            /* Bias          */ *w_bias_d,
                             /* Out Data      */ out_data_d[RNNInOutKind::Layer],
                             /* Out State     */ out_data_d[RNNInOutKind::HiddenState])));
             descs.push_back(desc);
@@ -611,9 +622,9 @@ void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
                     new gru_forward::desc(prop_kind::forward_scoring, direction,
                             /* In Data       */ in_data_d[RNNInOutKind::Layer],
                             /* In State      */ in_data_d[RNNInOutKind::HiddenState],
-                            /* Weights data  */ w_data_d,
-                            /* Weights state */ w_state_d,
-                            /* Bias          */ w_bias_d,
+                            /* Weights data  */ *w_data_d,
+                            /* Weights state */ *w_state_d,
+                            /* Bias          */ *w_bias_d,
                             /* Out Data      */ out_data_d[RNNInOutKind::Layer],
                             /* Out State     */ out_data_d[RNNInOutKind::HiddenState])));
             descs.push_back(desc);
@@ -623,9 +634,9 @@ void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
                     new lbr_gru_forward::desc(prop_kind::forward_scoring, direction,
                             /* In Data       */ in_data_d[RNNInOutKind::Layer],
                             /* In State      */ in_data_d[RNNInOutKind::HiddenState],
-                            /* Weights data  */ w_data_d,
-                            /* Weights state */ w_state_d,
-                            /* Bias          */ w_bias_d,
+                            /* Weights data  */ *w_data_d,
+                            /* Weights state */ *w_state_d,
+                            /* Bias          */ *w_bias_d,
                             /* Out Data      */ out_data_d[RNNInOutKind::Layer],
                             /* Out State     */ out_data_d[RNNInOutKind::HiddenState])));
             descs.push_back(desc);
@@ -636,9 +647,9 @@ void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
                             /* In Data       */ in_data_d[RNNInOutKind::Layer],
                             /* In State      */ in_data_d[RNNInOutKind::HiddenState],
                             /* In State C    */ in_data_d[RNNInOutKind::CellState],
-                            /* Weights data  */ w_data_d,
-                            /* Weights state */ w_state_d,
-                            /* Bias          */ w_bias_d,
+                            /* Weights data  */ *w_data_d,
+                            /* Weights state */ *w_state_d,
+                            /* Bias          */ *w_bias_d,
                             /* Out Data      */ out_data_d[RNNInOutKind::Layer],
                             /* Out State     */ out_data_d[RNNInOutKind::HiddenState],
                             /* Out State C   */ out_data_d[RNNInOutKind::CellState])));
@@ -649,21 +660,21 @@ void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
     }
 
     // Fill supported config
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     for (size_t i = 0; i < inputDesc.size(); i++) {
-        InferenceEngine::DataConfig dataConfig;
+        PortConfig dataConfig;
         dataConfig.inPlace = -1;
         dataConfig.constant = false;
-        dataConfig.desc = inputDesc[i];
+        dataConfig.desc = inputDesc[i]->clone();
         config.inConfs.push_back(dataConfig);
     }
 
     for (size_t i = 0; i < outputDesc.size(); i++) {
-        InferenceEngine::DataConfig dataConfig;
+        PortConfig dataConfig;
         dataConfig.inPlace = -1;
         dataConfig.constant = false;
-        dataConfig.desc = outputDesc[i];
+        dataConfig.desc = outputDesc[i]->clone();
         config.outConfs.push_back(dataConfig);
     }
 
@@ -705,9 +716,9 @@ void MKLDNNRNN::execute(mkldnn::stream strm) {
             args[state_o_tags[s]] = getChildEdgesAtPort(s)[0]->getMemoryPtr()->GetPrimitive();
         }
     } else {
-        ptrdiff_t n_ports_with_init_states = outDims.size() - 1; // first is a sequence data
+        size_t n_ports_with_init_states = outputShapes.size() - 1; // first is a sequence data
         for (size_t s = 0; s < std::min(S, n_ports_with_init_states); s++) {
-            if (s < inDims.size()) {
+            if (s < outputShapes.size()) {
                 args[state_o_tags[s]] = getChildEdgesAtPort(s+1)[0]->getMemoryPtr()->GetPrimitive();
             }
         }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
index 7b42760a425..0a2bd93d3d9 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
@@ -19,8 +19,8 @@ public:
     void getSupportedDescriptors() override;
     void createPrimitive() override;
     bool created() const override;
-    void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
-                          const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
+    void createDescriptor(const std::vector<const MemoryDesc*>& inputDesc,
+                          const std::vector<const MemoryDesc*>& outputDesc) override;
 
     void execute(mkldnn::stream strm) override;
 
@@ -40,6 +40,8 @@ private:
     void copyWeightsData();
 
 private:
+    using MKLDNNMemoryDescPtr = std::unique_ptr<MKLDNNMemoryDesc>;
+
     InferenceEngine::Precision runtimePrecision;
     /** Specify mode Cell or Seq. true - Cell, false - Seq */
     bool is_cell = false;
@@ -57,15 +59,15 @@ private:
     mkldnn::algorithm cell_act = mkldnn::algorithm::eltwise_tanh;
 
     // Internal attributes
-    ptrdiff_t N = 0;   /**< Batch value */
-    ptrdiff_t T = 0;   /**< Sequence value */
-    ptrdiff_t DC = 0;  /**< Input data channel size */
-    ptrdiff_t SC = 0;  /**< State channel size value */
-    ptrdiff_t G = 0;   /**< Gate size. LSTM - 4, GRU - 3, RNN - 1 */
-    ptrdiff_t Gb = 0;  /**< Gate size for biases. Gb = GRU_lbr ? G+1 : G */
-    ptrdiff_t S = 2;   /**< Num of state. LSTM - 2, GRU & RNN - 1 */
-    const ptrdiff_t L = 1;   /**< What is it??. Constant for mkldnn impl */
-    const ptrdiff_t D = 1;   /**< Num of direction. 1 or 2 */
+    size_t N = 0;   /**< Batch value */
+    size_t T = 0;   /**< Sequence value */
+    size_t DC = 0;  /**< Input data channel size */
+    size_t SC = 0;  /**< State channel size value */
+    size_t G = 0;   /**< Gate size. LSTM - 4, GRU - 3, RNN - 1 */
+    size_t Gb = 0;  /**< Gate size for biases. Gb = GRU_lbr ? G+1 : G */
+    size_t S = 2;   /**< Num of state. LSTM - 2, GRU & RNN - 1 */
+    const size_t L = 1;   /**< What is it??. Constant for mkldnn impl */
+    const size_t D = 1;   /**< Num of direction. 1 or 2 */
 
     std::vector<MKLDNNMemoryDesc> in_data_d;
     std::vector<MKLDNNMemoryDesc> out_data_d;
@@ -76,9 +78,9 @@ private:
         CellState   = 2
     };
 
-    MKLDNNMemoryDesc w_data_d;
-    MKLDNNMemoryDesc w_state_d;
-    MKLDNNMemoryDesc w_bias_d;
+    MKLDNNMemoryDescPtr w_data_d;
+    MKLDNNMemoryDescPtr w_state_d;
+    MKLDNNMemoryDescPtr w_bias_d;
 
     std::vector<size_t > in_data_dims;
     std::vector<size_t > out_data_dims;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp
index 1aa7752f456..0517350e09c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp
@@ -73,31 +73,31 @@ void MKLDNNROIAlignNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size();
 
-    if (getParentEdgeAt(0)->getDims().ndims() != 4) {
-        IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims();
+    if (getParentEdgeAt(0)->getShape().getRank() != 4) {
+        IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank();
     }
 
-    if (getParentEdgeAt(1)->getDims().ndims() != 2) {
-        IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims();
+    if (getParentEdgeAt(1)->getShape().getRank() != 2) {
+        IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank();
     }
 
-    if (getParentEdgeAt(2)->getDims().ndims() != 1) {
-        IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getDims().ndims();
+    if (getParentEdgeAt(2)->getShape().getRank() != 1) {
+        IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getShape().getRank();
     }
 
-    if (getChildEdgeAt(0)->getDims().ndims() != 4) {
-        IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims();
+    if (getChildEdgeAt(0)->getShape().getRank() != 4) {
+        IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank();
     }
 
-    if (getParentEdgeAt(1)->getDims()[1] != 4) {
+    if (getParentEdgeAt(1)->getShape().getStaticDims()[1] != 4) {
         IE_THROW() << errorPrefix << "has invalid shape on 1st input: ["
-                           << getParentEdgeAt(1)->getDims()[0] << "," << getParentEdgeAt(1)->getDims()[1] << "]";
+                           << getParentEdgeAt(1)->getShape().getStaticDims()[0] << "," << getParentEdgeAt(1)->getShape().getStaticDims()[1] << "]";
     }
 
-    if (getParentEdgeAt(1)->getDims()[0] != getParentEdgeAt(2)->getDims()[0]) {
+    if (getParentEdgeAt(1)->getShape().getStaticDims()[0] != getParentEdgeAt(2)->getShape().getStaticDims()[0]) {
         IE_THROW() << errorPrefix << "has different sizes of inputs for proposals ("
-                           << getParentEdgeAt(1)->getDims()[0] << ") and indexes ("
-                           << getParentEdgeAt(2)->getDims()[0] << ")";
+                           << getParentEdgeAt(1)->getShape().getStaticDims()[0] << ") and indexes ("
+                           << getParentEdgeAt(2)->getShape().getStaticDims()[0] << ")";
     }
 }
 
@@ -116,7 +116,7 @@ void MKLDNNROIAlignNode::initSupportedPrimitiveDescriptors() {
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrec0);
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrec);
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     config.inConfs.resize(3);
     config.outConfs.resize(1);
@@ -129,11 +129,13 @@ void MKLDNNROIAlignNode::initSupportedPrimitiveDescriptors() {
     };
 
     for (auto fmts : supportedFormats) {
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, fmts.first);
-        config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), memory::data_type::f32, memory::format_tag::nc);
-        config.inConfs[2].desc = MKLDNNMemoryDesc(getParentEdgeAt(2)->getDims(), memory::data_type::s32, memory::format_tag::x);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmts.second);
-        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, fmts.second});
+        config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, fmts.first);
+        config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32,
+                                                               memory::format_tag::nc);
+        config.inConfs[2].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::s32,
+                                                               memory::format_tag::x);
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, fmts.second);
+        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
     }
 }
 
@@ -178,8 +180,8 @@ void MKLDNNROIAlignNode::executeSpecified() {
     auto dstBlockDesc = dstMemory.GetDescriptor().data.format_desc.blocking;
 
     int blockSize = srcBlockDesc.inner_nblks > 0 ? srcBlockDesc.inner_blks[0] : 1;
-    auto isPlainFmt = srcMemory0.GetDesc().isPlainFormat();
-    auto isNhwcFmt = srcMemory0.GetDesc().isTailCFormat();
+    auto isPlainFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::ncsp);
+    auto isNhwcFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::nspc);
 
     const auto *srcData = reinterpret_cast<const inputType *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     const auto *srcRoi = reinterpret_cast<const float *>(getParentEdgeAt(1)->getMemoryPtr()->GetPtr());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
index a1a7f8329a5..23fd252ae2b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
@@ -354,21 +354,21 @@ void MKLDNNROIPoolingNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size();
 
-    if (getParentEdgeAt(0)->getDims().ndims() != 4) {
-        IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims();
+    if (getParentEdgeAt(0)->getShape().getRank() != 4) {
+        IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank();
     }
 
-    if (getParentEdgeAt(1)->getDims().ndims() != 2) {
-        IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims();
+    if (getParentEdgeAt(1)->getShape().getRank() != 2) {
+        IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank();
     }
 
-    if (getChildEdgeAt(0)->getDims().ndims() != 4) {
-        IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims();
+    if (getChildEdgeAt(0)->getShape().getRank() != 4) {
+        IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank();
     }
 
-    if (getParentEdgeAt(1)->getDims()[1] != 5) {
+    if (getParentEdgeAt(1)->getShape().getStaticDims()[1] != 5) {
         IE_THROW() << errorPrefix << "has invalid shape on 1st input: ["
-                                          << getParentEdgeAt(1)->getDims()[0] << "," << getParentEdgeAt(1)->getDims()[1] << "]";
+                                  << getParentEdgeAt(1)->getShape().getStaticDims()[0] << "," << getParentEdgeAt(1)->getShape().getStaticDims()[1] << "]";
     }
 }
 
@@ -388,7 +388,7 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() {
     src_data_size = MKLDNNExtensionUtils::sizeOfDataType(dataType);
     dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(dataType);
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     config.inConfs.resize(2);
     config.inConfs[0].constant = false;
@@ -400,7 +400,7 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() {
     config.outConfs[0].constant = false;
     config.outConfs[0].inPlace = -1;
 
-    auto parentDims = getParentEdgeAt(0)->getDims();
+    auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims();
     auto format = mayiuse(avx512_common) ? memory::format_tag::nChw16c : memory::format_tag::nChw8c;
     impl_desc_type impl_type;
     if (mayiuse(cpu::x64::avx512_common)) {
@@ -413,10 +413,10 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() {
         impl_type = impl_desc_type::ref;
     }
 
-    config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), dataType, format);
-    config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), dataType, memory::format_tag::nc);
-    config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), dataType, format);
-    supportedPrimitiveDescriptors.push_back({config, impl_type, format});
+    config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), dataType, format);
+    config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(1)->getShape().getStaticDims(), dataType, memory::format_tag::nc);
+    config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, format);
+    supportedPrimitiveDescriptors.push_back({config, impl_type});
 }
 
 void MKLDNNROIPoolingNode::createPrimitive() {
@@ -428,8 +428,8 @@ void MKLDNNROIPoolingNode::createPrimitive() {
     const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8;
     jpp.c_block = simd_w;
 
-    auto inDims = config.inConfs[0].desc.getDims();
-    auto outDims = config.outConfs[0].desc.getDims();
+    auto inDims = config.inConfs[0].desc->getShape().getStaticDims();
+    auto outDims = config.outConfs[0].desc->getShape().getStaticDims();
 
     jpp.mb = outDims[0];
     jpp.c = rnd_up(inDims[1], simd_w);
@@ -447,8 +447,8 @@ void MKLDNNROIPoolingNode::createPrimitive() {
     jpp.nb_c_blocking = mayiuse(cpu::x64::avx512_common) ? 15 : 7;
 
     auto selectedPD = getSelectedPrimitiveDescriptor();
-    jpp.src_prc = selectedPD->getConfig().inConfs[0].desc.getPrecision();
-    jpp.dst_prc = selectedPD->getConfig().outConfs[0].desc.getPrecision();
+    jpp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision();
+    jpp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision();
     jpp.src_data_size = jpp.src_prc.size();
     jpp.dst_data_size = jpp.dst_prc.size();
 
@@ -481,9 +481,9 @@ void MKLDNNROIPoolingNode::execute() {
         IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors.";
     auto config = selectedPrimitiveDescriptor->getConfig();
 
-    auto src_strides = config.inConfs[0].desc.getBlockingDesc().getStrides();
-    auto dst_strides = config.outConfs[0].desc.getBlockingDesc().getStrides();
-    size_t src_roi_step = config.inConfs[1].desc.getBlockingDesc().getStrides()[0];
+    auto src_strides = srcMemory0.GetDescWithType<BlockedMemoryDesc>().getStrides();
+    auto dst_strides = dstMemory.GetDescWithType<BlockedMemoryDesc>().getStrides();
+    size_t src_roi_step = srcMemory1.GetDescWithType<BlockedMemoryDesc>().getStrides()[0];
 
     int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking);
     int MB = jpp.mb;
@@ -512,13 +512,18 @@ void MKLDNNROIPoolingNode::execute() {
             if (roi_pooling_kernel) {
                 arg.bin_area = 0;
                 arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
+                (*roi_pooling_kernel)(&arg);
             } else {
-                for (int c = 0; c < c_block; c++) {
-                    dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
+                for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
+                    int ch_blk_cur = cbb * cb_num + cbb_cur;
+                    if (ch_blk_cur >= jpp.nb_c) {
+                        break;  // current block work is done
+                    }
+                    for (int c = 0; c < c_block; c++) {
+                        dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
+                    }
                 }
             }
-
-            (*roi_pooling_kernel)(&arg);
         } else {
             size_t roi_off = n * src_roi_step;
             const auto *src_roi_ptr = &src_roi[roi_off];
@@ -568,18 +573,23 @@ void MKLDNNROIPoolingNode::execute() {
                     arg.kh = hend - hstart;
                     arg.kw = wend - wstart;
                 } else {
-                    for (int c = 0; c < c_block; c++) {
-                        const size_t pool_index = n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c;
-                        if ((hend <= hstart) || (wend <= wstart)) {
-                            dst[pool_index] = 0;
-                        } else {
-                            for (int h = hstart; h < hend; ++h) {
-                                for (int w = wstart; w < wend; ++w) {
-                                    float batch_data = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
-                                                                h * src_strides[2] + w * src_strides[3] + c];
-
-                                    if (batch_data > dst[pool_index]) {
-                                        dst[pool_index] = batch_data;
+                    for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
+                        int ch_blk_cur = cbb * cb_num + cbb_cur;
+                        if (ch_blk_cur >= jpp.nb_c) {
+                            break;  // current block work is done
+                        }
+                        for (int c = 0; c < c_block; c++) {
+                            const size_t pool_index = n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c;
+                            if ((hend <= hstart) || (wend <= wstart)) {
+                                dst[pool_index] = 0;
+                            } else {
+                                dst[pool_index] =  src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                            hstart * src_strides[2] + wstart * src_strides[3] + c];
+                                for (int h = hstart; h < hend; ++h) {
+                                    for (int w = wstart; w < wend; ++w) {
+                                        float batch_data = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                                    h * src_strides[2] + w * src_strides[3] + c];
+                                        dst[pool_index] = std::fmax(batch_data, dst[pool_index]);
                                     }
                                 }
                             }
@@ -595,18 +605,35 @@ void MKLDNNROIPoolingNode::execute() {
                 float height_scale = (jpp.pooled_h > 1 ? ((roi_end_h_ - roi_start_h_) * (jpp.ih - 1)) / (jpp.pooled_h - 1) : 0);
                 float width_scale  = (jpp.pooled_w > 1 ? ((roi_end_w_ - roi_start_w_) * (jpp.iw - 1)) / (jpp.pooled_w - 1) : 0);
 
-                float in_y = (jpp.pooled_h > 1 ? (oh * height_scale + roi_start_h_ * (jpp.ih - 1)) :
-                              0.5 * (roi_start_h_ + roi_end_h_) * (jpp.ih - 1));
-                float in_x = (jpp.pooled_w > 1 ? (ow * width_scale  + roi_start_w_ * (jpp.iw - 1)) :
-                              0.5 * (roi_start_w_ + roi_end_w_) * (jpp.iw - 1));
+                float in_y, in_x;
+                // because of nonalgebraic character of floating point operation, some proposals can cause violation of inequality:
+                // ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) <= (end_h - start_h) * (input_h - 1),
+                // and as result excess of right limit for proposal value,
+                // if the border case (current_h == pooled_h - 1) will not be handled explicitly
+                if (jpp.pooled_h > 1) {
+                    in_y = (oh == jpp.pooled_h - 1 ? roi_end_h_ * (jpp.ih - 1) : (oh * height_scale + roi_start_h_ * (jpp.ih - 1)));
+                } else {
+                    in_y = 0.5 * (roi_start_h_ + roi_end_h_) * (jpp.ih - 1);
+                }
+                if (jpp.pooled_w > 1) {
+                    in_x = (ow == jpp.pooled_w - 1 ? roi_end_w_ * (jpp.iw - 1) : (ow * width_scale  + roi_start_w_ * (jpp.iw - 1)));
+                } else {
+                    in_x = 0.5 * (roi_start_w_ + roi_end_w_) * (jpp.iw - 1);
+                }
 
                 if (in_y < 0 || in_y > jpp.ih - 1 || in_x < 0 || in_x > jpp.iw - 1) {
                     if (roi_pooling_kernel) {
                         arg.bin_area = 0;
                         arg.dst = &dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3]];
                     } else {
-                        for (int c = 0; c < c_block; c++) {
-                            dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
+                        for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
+                            int ch_blk_cur = cbb * cb_num + cbb_cur;
+                            if (ch_blk_cur >= jpp.nb_c) {
+                                break;  // current block work is done
+                            }
+                            for (int c = 0; c < c_block; c++) {
+                                dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] = 0;
+                            }
                         }
                     }
                 } else {
@@ -635,21 +662,27 @@ void MKLDNNROIPoolingNode::execute() {
 
                         arg.bin_area = 1;
                     } else {
-                        for (int c = 0; c < 1; c++) {
-                            const float top_left     = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
-                                                                top_y_index * src_strides[2] + left_x_index * src_strides[3] + c];
-                            const float top_right    = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
-                                                                top_y_index * src_strides[2] + right_x_index * src_strides[3] + c];
-                            const float bottom_left  = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
-                                                                bottom_y_index * src_strides[2] + left_x_index * src_strides[3] + c];
-                            const float bottom_right = src_data[roi_batch_ind * src_strides[0] + cb * src_strides[1] +
-                                                                bottom_y_index * src_strides[2] + right_x_index * src_strides[3] + c];
+                        for (int cbb_cur = 0; cbb_cur < cb_num; cbb_cur++) {
+                            int ch_blk_cur = cbb * cb_num + cbb_cur;
+                            if (ch_blk_cur >= jpp.nb_c) {
+                                break;  // current block work is done
+                            }
+                            for (int c = 0; c < c_block; c++) {
+                                const float top_left     = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                                    top_y_index * src_strides[2] + left_x_index * src_strides[3] + c];
+                                const float top_right    = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                                    top_y_index * src_strides[2] + right_x_index * src_strides[3] + c];
+                                const float bottom_left  = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                                    bottom_y_index * src_strides[2] + left_x_index * src_strides[3] + c];
+                                const float bottom_right = src_data[roi_batch_ind * src_strides[0] + ch_blk_cur * src_strides[1] +
+                                                                    bottom_y_index * src_strides[2] + right_x_index * src_strides[3] + c];
 
-                            const float top    = top_left + (top_right - top_left) * (in_x - left_x_index);
-                            const float bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index);
+                                const float top    = top_left + (top_right - top_left) * (in_x - left_x_index);
+                                const float bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index);
 
-                            dst[n * dst_strides[0] + cb * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] =
-                                    top + (bottom - top) * (in_y - top_y_index);
+                                dst[n * dst_strides[0] + ch_blk_cur * dst_strides[1] + oh * dst_strides[2] + ow * dst_strides[3] + c] =
+                                        top + (bottom - top) * (in_y - top_y_index);
+                            }
                         }
                     }
                 }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp
index 136ccba9c64..410051c7be4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp
@@ -41,7 +41,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr<ngraph::Node>& op, const mk
             IE_THROW() << layerErrorPrefix << " has incorrect number of input/output edges!";
         }
 
-        shape = inDims[DATA_INDEX].ToSizeVector();
+        shape = inputShapes[DATA_INDEX].getStaticDims();
         const auto &dataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX);
 
         if (std::find(supportedPrecisionSizes.begin(), supportedPrecisionSizes.end(), dataPrecision.size()) == supportedPrecisionSizes.end())
@@ -52,7 +52,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr<ngraph::Node>& op, const mk
         }
         numOfDims = shape.size();
 
-        if (shape != outDims[0].ToSizeVector()) {
+        if (shape != outputShapes[0].getStaticDims()) {
             IE_THROW() << layerErrorPrefix << " has different 'data' input and output dimensions";
         }
 
@@ -62,7 +62,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr<ngraph::Node>& op, const mk
             IE_THROW() << layerErrorPrefix << " has unsupported 'axes' input precision: " << axesTensorPrec.name();
         }
 
-        const auto axesTensorRank = inDims[AXES_INDEX].ndims();
+        const auto axesTensorRank = inputShapes[AXES_INDEX].getRank();
         if (axesTensorRank > 1) {
             IE_THROW() << layerErrorPrefix << " doesn't support 'axes' input tensor with rank: " << axesTensorRank;
         }
@@ -73,7 +73,7 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr<ngraph::Node>& op, const mk
             IE_THROW() << layerErrorPrefix << " has unsupported 'shift' input precision: " << shiftTensorPrec.name();
         }
 
-        const auto shiftTensorRank = inDims[SHIFT_INDEX].ndims();
+        const auto shiftTensorRank = inputShapes[SHIFT_INDEX].getRank();
         if (shiftTensorRank > 1) {
             IE_THROW() << layerErrorPrefix << " doesn't support 'shift' input tensor with rank: " << shiftTensorRank;
         }
@@ -92,32 +92,31 @@ void MKLDNNRollNode::initSupportedPrimitiveDescriptors() {
 
     auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
-    auto srcDims = getParentEdgeAt(0)->getDims();
+    auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims();
 
-    auto dataMemoryFormat = MKLDNNMemory::GetPlainFormat(getParentEdgeAt(0)->getDims());
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
 
-    auto createDataConfig = [](const MKLDNNDims& dims, memory::data_type dataType) -> InferenceEngine::DataConfig {
-        InferenceEngine::DataConfig dataConfig;
+    auto createDataConfig = [](const Shape& dims, memory::data_type dataType) -> PortConfig {
+        PortConfig dataConfig;
         dataConfig.inPlace = -1;
         dataConfig.constant = false;
-        dataConfig.desc = MKLDNNMemoryDesc(dims, dataType, MKLDNNMemory::GetPlainFormat(dims));
+        dataConfig.desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(dims.getStaticDims(), dataType, MKLDNNMemory::GetPlainFormatByRank(dims.getRank()));
         return dataConfig;
     };
 
-    config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getDims(), dataType));
-    config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getDims(), memory::data_type::s32));
-    config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getDims(), memory::data_type::s32));
+    config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getShape(), dataType));
+    config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getShape(), memory::data_type::s32));
+    config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getShape(), memory::data_type::s32));
 
-    config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getDims(), dataType));
+    config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getShape(), dataType));
 
-    supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, dataMemoryFormat});
+    supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref});
 }
 
 
 void MKLDNNRollNode::execute(mkldnn::stream strm) {
-    const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getDesc().getPrecision();
+    const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getMemory().GetDesc().getPrecision();
     const auto& dataTypeSize = dataPrecision.size();
     switch (dataTypeSize) {
         case sizeof(PrecisionTrait<Precision::I8>::value_type): {
@@ -156,7 +155,7 @@ void MKLDNNRollNode::rollImpl() {
     auto *output = reinterpret_cast<DataType*>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
     std::vector<size_t> shiftsVector(numOfDims, 0);
 
-    const size_t axesLength = axesEdge->getDims()[0];
+    const size_t axesLength = axesEdge->getShape().getStaticDims()[0];
     for (size_t dim = 0; dim < axesLength ; ++dim) {
         int32_t currentAxis = axes[dim] < 0 ? axes[dim] + numOfDims : axes[dim];
         int32_t shiftSum = shiftsVector[currentAxis] + shifts[dim];
@@ -171,7 +170,7 @@ void MKLDNNRollNode::rollImpl() {
     const size_t elementSize = sizeof(DataType);
 
     const size_t nIterations = totalElements / blockSize;
-    const auto strides = dataEdge->getDesc().getBlockingDesc().getStrides();
+    const auto strides = dataEdge->getMemory().GetDescWithType<BlockedMemoryDesc>().getStrides();
     parallel_for(nIterations, [&](size_t iter) {
         size_t start = iter * blockSize;
         size_t leftBlockStartOffset = start;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp
index 5b9692fc562..af7b36dd7f3 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp
@@ -52,9 +52,9 @@ void MKLDNNScatterUpdateNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << errorPrefix << " has incorrect number of output edges";
 
-    if (getParentEdgeAt(DATA_ID)->getDims().ndims() < 1 ||
-        getParentEdgeAt(INDICES_ID)->getDims().ndims() < 1 ||
-        getParentEdgeAt(UPDATE_ID)->getDims().ndims() < 1) {
+    if (getParentEdgeAt(DATA_ID)->getShape().getRank() < 1 ||
+        getParentEdgeAt(INDICES_ID)->getShape().getRank() < 1 ||
+        getParentEdgeAt(UPDATE_ID)->getShape().getRank() < 1) {
         IE_THROW() << errorPrefix << " do not support scalar input";
     }
 
@@ -77,15 +77,15 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    auto srcDataDim = getParentEdgeAt(DATA_ID)->getDims();
-    auto indicesDim = getParentEdgeAt(INDICES_ID)->getDims();
-    auto updateDim = getParentEdgeAt(UPDATE_ID)->getDims();
-    auto dstDataDim = getChildEdgeAt(0)->getDims();
+    auto srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
+    auto indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims();
+    auto updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims();
+    auto dstDataDim = getChildEdgeAt(0)->getShape().getStaticDims();
 
-    size_t srcRank = srcDataDim.ndims();
-    size_t indicesRank = indicesDim.ndims();
-    size_t updateRank = updateDim.ndims();
-    size_t dstRank = dstDataDim.ndims();
+    size_t srcRank = srcDataDim.size();
+    size_t indicesRank = indicesDim.size();
+    size_t updateRank = updateDim.size();
+    size_t dstRank = dstDataDim.size();
 
     // common check
     if (srcRank != dstRank) {
@@ -179,7 +179,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() {
     bool canBeInplace = getParentEdgeAt(DATA_ID)->getParent()->getChildEdges().size() == 1 &&
             !getParentEdgeAt(DATA_ID)->getParent()->isConstant();
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     if (axisRelaxed) {
         config.inConfs.resize(4);
@@ -201,20 +201,22 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() {
     }
 
     auto pushDesc = [&](memory::format_tag inFormat, memory::format_tag idxFormat, memory::format_tag updateFormat, memory::format_tag outFormat) {
-        config.inConfs[DATA_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), dataType, inFormat);
-        config.inConfs[INDICES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(INDICES_ID)->getDims(), indicesType, idxFormat);
-        config.inConfs[UPDATE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(UPDATE_ID)->getDims(), dataType, updateFormat);
+        config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, inFormat);
+        config.inConfs[INDICES_ID].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(), indicesType,
+                                                                                      idxFormat);
+        config.inConfs[UPDATE_ID].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(), dataType,
+                                                                                     updateFormat);
         if (axisRelaxed)
-            config.inConfs[AXIS_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXIS_ID)->getDims(),
+            config.inConfs[AXIS_ID].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(AXIS_ID)->getShape().getStaticDims(),
                 MKLDNNExtensionUtils::IEPrecisionToDataType(axisPrec), memory::format_tag::x);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), dataType, outFormat);
-        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, outFormat});
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, outFormat);
+        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
     };
 
-    pushDesc(MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(DATA_ID)->getDims())),
-        MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(INDICES_ID)->getDims())),
-        MKLDNNMemory::GetPlainFormat(memory::dims(getParentEdgeAt(UPDATE_ID)->getDims())),
-        MKLDNNMemory::GetPlainFormat(memory::dims(getChildEdgeAt(0)->getDims())));
+    pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()),
+             MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(INDICES_ID)->getShape().getRank()),
+             MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(UPDATE_ID)->getShape().getRank()),
+             MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()));
 }
 
 void MKLDNNScatterUpdateNode::createPrimitive() {
@@ -272,8 +274,8 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) {
     uint8_t *indicesPtr = reinterpret_cast<uint8_t*>(indicesMemPtr->GetPtr());
     uint8_t *updatePtr = reinterpret_cast<uint8_t*>(updateMemPtr->GetPtr());
 
-    SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims();
-    SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims();
+    SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
+    SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims();
     size_t srcRank = srcDataDim.size();
     int axis = 0;
     if (axisRelaxed) {
@@ -309,8 +311,8 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) {
         });
 
         if (scatterUpdateMode == ScatterUpdateMode::ScatterUpdate) {
-            SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims();
-            SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getDesc().getDims();
+            SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims();
+            SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims();
             size_t indicesRank = indicesDim.size();
             size_t updateRank = updateDim.size();
             SizeVector expectUpdateShape = {};
@@ -370,9 +372,9 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) {
 // and indices tensor of shape [i_0, i_1, ..., i_k].
 // Updates tensor shape should be [d_0, d_1, ... d_(axis - 1), i_0, i_1, ..., i_k, d_(axis + 1), ..., d_n].
 void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) {
-    SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims();
-    SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims();
-    SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getDesc().getDims();
+    SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
+    SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims();
+    SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims();
     size_t indicesRank = indicesDim.size();
 
     std::vector<size_t> srcBlockND = getBlockND(srcDataDim);
@@ -403,8 +405,8 @@ void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, i
 // k is indices.shape[-1] and should not be greater than rank of input, q is rank of indicies.
 // updates is a (q-1)-dimension tensor of replacement-slice-values
 void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update, uint8_t *dstData) {
-    SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims();
-    SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims();
+    SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
+    SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims();
     size_t indicesRank = indicesDim.size();
 
     std::vector<size_t> srcBlockND = getBlockND(srcDataDim);
@@ -433,9 +435,9 @@ void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update,
 // output[i][indices[i][j][k]][k] = updates[i][j][k] if axis = 1,
 // output[i][j][indices[i][j][k]] = updates[i][j][k] if axis = 2.
 void MKLDNNScatterUpdateNode::scatterElementsUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) {
-    SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getDesc().getDims();
-    SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getDesc().getDims();
-    SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getDesc().getDims();
+    SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
+    SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims();
+    SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims();
     size_t updateRank = updateDim.size();
 
     std::vector<size_t> srcBlockND = getBlockND(srcDataDim);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp
index c67a4394ed8..093ee7e8255 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp
@@ -7,7 +7,7 @@
 #include <string>
 #include "ie_parallel.hpp"
 #include "mkldnn_select_node.h"
-#include <nodes/common/tensor_desc_creator.h>
+#include <nodes/common/blocked_desc_creator.h>
 #include <ngraph/opsets/opset1.hpp>
 #include <utils/general_utils.h>
 #include "common/cpu_memcpy.h"
@@ -129,10 +129,10 @@ void MKLDNNSelectNode::initSupportedPrimitiveDescriptors() {
     if (inputPrecisionSize != 1 && inputPrecisionSize != 2 && inputPrecisionSize != 4 && inputPrecisionSize != 8)
         IE_THROW() << errorPrefix << " has unsupported precision: " << inputPrecision << " on 'Then' and 'Else' inputs";
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, conditionPrecision},
-                          {TensorDescCreatorTypes::ncsp, inputPrecision},
-                          {TensorDescCreatorTypes::ncsp, inputPrecision}},
-                         {{TensorDescCreatorTypes::ncsp, inputPrecision}},
+    addSupportedPrimDesc({{LayoutType::ncsp, conditionPrecision},
+                          {LayoutType::ncsp, inputPrecision},
+                          {LayoutType::ncsp, inputPrecision}},
+                         {{LayoutType::ncsp, inputPrecision}},
                          impl_desc_type::ref_any);
 }
 
@@ -180,8 +180,8 @@ void MKLDNNSelectNode::execute_impl() {
 }
 
 void MKLDNNSelectNode::execute(mkldnn::stream strm) {
-    const size_t condPrecSize = getParentEdgeAt(CONDITION)->getDesc().getPrecision().size();
-    const size_t inputsPrecSize = getParentEdgeAt(THEN)->getDesc().getPrecision().size();
+    const size_t condPrecSize = getParentEdgeAt(CONDITION)->getMemory().GetDesc().getPrecision().size();
+    const size_t inputsPrecSize = getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().size();
 
     switch (condPrecSize) {
         case 1: {
@@ -192,7 +192,7 @@ void MKLDNNSelectNode::execute(mkldnn::stream strm) {
                 case 8: { execute_impl<uint8_t, uint64_t>(); break; }
                 default:
                     IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: "
-                                   + std::string(getParentEdgeAt(THEN)->getDesc().getPrecision().name());
+                                   + std::string(getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().name());
             }
             break;
         }
@@ -204,13 +204,13 @@ void MKLDNNSelectNode::execute(mkldnn::stream strm) {
                 case 8: { execute_impl<int32_t, uint64_t>(); break; }
                 default:
                     IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: "
-                                  + std::string(getParentEdgeAt(THEN)->getDesc().getPrecision().name());
+                                  + std::string(getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().name());
             }
             break;
         }
         default: {
                 IE_THROW() << "Select layer doesn't support 'Condition' inputs' precision: "
-                              + std::string(getParentEdgeAt(CONDITION)->getDesc().getPrecision().name());
+                              + std::string(getParentEdgeAt(CONDITION)->getMemory().GetDesc().getPrecision().name());
         }
     }
 }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp
index 95b00af386b..f83ddfed0d0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp
@@ -7,7 +7,7 @@
 #include <ie_parallel.hpp>
 #include <mkldnn_extension_utils.h>
 #include <cpu/x64/jit_generator.hpp>
-#include "common/tensor_desc_creator.h"
+#include "common/blocked_desc_creator.h"
 
 #include "common/cpu_memcpy.h"
 #include "utils/general_utils.h"
@@ -95,8 +95,8 @@ void MKLDNNShuffleChannelsNode::initSupportedPrimitiveDescriptors() {
     }
 
     // use ncsp as default for non-quantized networks and nspc for quantized
-    auto firstCreatorType = isInQuantizedGraph ? TensorDescCreatorTypes::nspc : TensorDescCreatorTypes::ncsp;
-    auto secondCreatorType = isInQuantizedGraph ? TensorDescCreatorTypes::ncsp : TensorDescCreatorTypes::nspc;
+    auto firstCreatorType = isInQuantizedGraph ? LayoutType::nspc : LayoutType::ncsp;
+    auto secondCreatorType = isInQuantizedGraph ? LayoutType::ncsp : LayoutType::nspc;
 
     addSupportedPrimDesc({{firstCreatorType, precision}},
                          {{firstCreatorType, precision}},
@@ -106,11 +106,11 @@ void MKLDNNShuffleChannelsNode::initSupportedPrimitiveDescriptors() {
                          impl_type, supportDynamicBatch_);
     // canUseBlocked
     if (axis_ != 1) {
-        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision}},
-                             {{TensorDescCreatorTypes::nCsp8c, precision}},
+        addSupportedPrimDesc({{LayoutType::nCsp8c, precision}},
+                             {{LayoutType::nCsp8c, precision}},
                              impl_type, supportDynamicBatch_);
-        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision}},
-                             {{TensorDescCreatorTypes::nCsp16c, precision}},
+        addSupportedPrimDesc({{LayoutType::nCsp16c, precision}},
+                             {{LayoutType::nCsp16c, precision}},
                              impl_type, supportDynamicBatch_);
     }
 }
@@ -127,7 +127,8 @@ void MKLDNNShuffleChannelsNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         THROW_SHCH_ERROR << "has unidentified preferable primitive descriptor";
 
-    const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat();
+    const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) ||
+                           getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c);
 
     int batchRank = axis_;
     int spatialRank = dataRank_ - axis_ - 1;
@@ -135,7 +136,7 @@ void MKLDNNShuffleChannelsNode::createPrimitive() {
     // 2 for decomposed axis dim, 1 for composed spatial dim
     int reshapedRank = batchRank + 2 + static_cast<int>(spatialRank != 0) + static_cast<int>(isBlocked && (spatialRank == 0));
     PermuteParams params;
-    params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size();
+    params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size();
     params.order.resize(reshapedRank, 0);
     params.src_block_order.resize(reshapedRank);
     params.dst_block_order.resize(reshapedRank);
@@ -158,9 +159,10 @@ void MKLDNNShuffleChannelsNode::createPrimitive() {
 
     const int channelDim = 1;
     if (isBlocked) {
-        size_t blkSize = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back();
+        const auto blkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+        size_t blkSize = blkDesc.getBlockDims().back();
         size_t CB = div_up(inShape_[1], blkSize);
-        SizeVector srcBlockedDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims();
+        SizeVector srcBlockedDims = blkDesc.getBlockDims();
         if (axis_ > channelDim) {  // axis on spatial
             for (int i = 0; i < batchRank; i++) {
                 params.order[i] = i;
@@ -179,7 +181,7 @@ void MKLDNNShuffleChannelsNode::createPrimitive() {
             params.order[2] = 2;
             params.src_block_dims[2] = spatialShapeSize;
         }
-    } else if (getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) {
+    } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) {
         if (axis_ == channelDim) {  // axis on channel
             params.order[0] = 0;
             params.src_block_dims[0] = inShape_[0];
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
index 53dda785e69..9fe05e475fc 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
@@ -7,6 +7,7 @@
 #include <string>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
+#include <cpu_memory_desc_utils.h>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -37,19 +38,20 @@ void MKLDNNSoftMaxNode::getSupportedDescriptors() {
     if (!getChildEdges().size())
         IE_THROW() << "Incorrect number of output edges for layer " << getName();
 
-    if (getParentEdgeAt(0)->getDims().ndims() == 3) {
-        MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::abc);
-        createDescriptor({in_candidate}, {});
+    if (getParentEdgeAt(0)->getShape().getRank() == 3) {
+        MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType,
+                                                                                 memory::format_tag::abc);
+        createDescriptor({in_candidate.get()}, {});
     }
 
-    for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) {
-        MKLDNNDims dims = getParentEdgeAt(0)->getDims();
+    for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) {
+        const auto dims = getParentEdgeAt(0)->getShape().getStaticDims();
         if (MKLDNNMemoryDesc(dims, inputDataType, format).blocksExtended())
             continue;
 
-        MKLDNNMemoryDesc in_candidate(dims, inputDataType, format);
+        MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(dims, inputDataType, format);
 
-        createDescriptor({in_candidate}, {});
+        createDescriptor({in_candidate.get()}, {});
     }
 }
 
@@ -63,7 +65,7 @@ void MKLDNNSoftMaxNode::createPrimitive() {
     descs[0] = desc;
     std::shared_ptr<softmax_forward::desc> selected_desc_ptr = descs[0];
 
-    const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor();
+    const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
     if (selected_pd == nullptr)
         IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
 
@@ -92,33 +94,34 @@ bool MKLDNNSoftMaxNode::created() const {
     return getType() == Softmax;
 }
 
-void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() {
-    auto selected_pd = getSelectedPrimitiveDescriptor();
-    if (selected_pd == nullptr)
-        IE_THROW() << "Preferable primitive descriptor is not set.";
-    auto config = selected_pd->getConfig();
-    if (isInitConfig(config))
-        return;
+ void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() {
+     auto selected_pd = getSelectedPrimitiveDescriptor();
+     if (selected_pd == nullptr)
+         IE_THROW() << "Preferable primitive descriptor is not set.";
+     auto config = selected_pd->getConfig();
+     if (isConfigDefined(config))
+         return;
 
-    if (config.inConfs.size() != 1 || config.outConfs.size() != 1 ||
-            (!isUninitTensorDesc(config.inConfs[0].desc) &&
-                    !isUninitTensorDesc(config.outConfs[0].desc) && config.inConfs[0].desc != config.outConfs[0].desc))
-        IE_THROW() << "Layer " << getName() << " has incorrect selected config!";
+     if (config.inConfs.size() != 1 || config.outConfs.size() != 1 ||
+             (config.inConfs[0].desc->isDefined() &&
+                     config.outConfs[0].desc->isDefined() && !config.inConfs[0].desc->isCompatible(*config.outConfs[0].desc)))
+         IE_THROW() << "Layer " << getName() << " has incorrect selected config!";
 
-    if (!isUninitTensorDesc(config.inConfs[0].desc)) {
-        config.outConfs[0].desc = config.inConfs[0].desc;
-    } else if (!isUninitTensorDesc(config.outConfs[0].desc)) {
-        config.inConfs[0].desc = config.outConfs[0].desc;
-    } else {
-        config.outConfs[0].desc = config.inConfs[0].desc = getConfiguredInputDesc(config, 0);
-    }
+     if (config.inConfs[0].desc->isDefined()) {
+         config.outConfs[0].desc = config.inConfs[0].desc->clone();
+     } else if (config.outConfs[0].desc->isDefined()) {
+         config.inConfs[0].desc = config.outConfs[0].desc->clone();
+     } else {
+         config.inConfs[0].desc = getDefinedInputDesc(config, 0);
+         config.outConfs[0].desc = config.inConfs[0].desc->clone();
+     }
 
-    initDescriptor(config);
-}
+     initDescriptor(config);
+ }
 
-void MKLDNNSoftMaxNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
-                                         const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
-    MKLDNNMemoryDesc in_candidate(inputDesc[0]);
+void MKLDNNSoftMaxNode::createDescriptor(const std::vector<const MemoryDesc*> &inputDesc,
+                                         const std::vector<const MemoryDesc*> &outputDesc) {
+    MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]);
 
     MKLDNNDescriptor desc(std::shared_ptr<softmax_forward::desc>(
             new softmax_forward::desc(prop_kind::forward_scoring, in_candidate, axis)));
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h
index b422eb3f030..fd200cdb145 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h
@@ -17,8 +17,8 @@ public:
     MKLDNNSoftMaxNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
 
     void initOptimalPrimitiveDescriptor() override;
-    void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
-                          const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
+    void createDescriptor(const std::vector<const MemoryDesc*>& inputDesc,
+                          const std::vector<const MemoryDesc*>& outputDesc) override;
     void getSupportedDescriptors() override;
     void createPrimitive() override;
     bool created() const override;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp
index 4702f97e0fb..1861799f97c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp
@@ -10,7 +10,7 @@
 #include "utils/bfloat16.hpp"
 #include <mkldnn_selective_build.h>
 #include "mkldnn_space_to_batch_node.h"
-#include <nodes/common/tensor_desc_creator.h>
+#include <nodes/common/blocked_desc_creator.h>
 #include <ngraph/opsets/opset2.hpp>
 
 using namespace MKLDNNPlugin;
@@ -67,32 +67,32 @@ void MKLDNNSpaceToBatchNode::initSupportedPrimitiveDescriptors() {
     if (supported_precision_sizes.find(precision.size()) == supported_precision_sizes.end())
         IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name();
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::nspc, precision},
-                          {TensorDescCreatorTypes::ncsp},
-                          {TensorDescCreatorTypes::ncsp},
-                          {TensorDescCreatorTypes::ncsp}},
-                         {{TensorDescCreatorTypes::nspc, precision}},
+    addSupportedPrimDesc({{LayoutType::nspc, precision},
+                          {LayoutType::ncsp},
+                          {LayoutType::ncsp},
+                          {LayoutType::ncsp}},
+                         {{LayoutType::nspc, precision}},
                          impl_desc_type::ref_any);
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, precision},
-                          {TensorDescCreatorTypes::ncsp},
-                          {TensorDescCreatorTypes::ncsp},
-                          {TensorDescCreatorTypes::ncsp}},
-                         {{TensorDescCreatorTypes::ncsp, precision}},
+    addSupportedPrimDesc({{LayoutType::ncsp, precision},
+                          {LayoutType::ncsp},
+                          {LayoutType::ncsp},
+                          {LayoutType::ncsp}},
+                         {{LayoutType::ncsp, precision}},
                          impl_desc_type::ref_any);
     if (inDims[1] % 8 == 0) {
-        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp8c, precision},
-                              {TensorDescCreatorTypes::ncsp},
-                              {TensorDescCreatorTypes::ncsp},
-                              {TensorDescCreatorTypes::ncsp}},
-                             {{TensorDescCreatorTypes::nCsp8c, precision}},
+        addSupportedPrimDesc({{LayoutType::nCsp8c, precision},
+                              {LayoutType::ncsp},
+                              {LayoutType::ncsp},
+                              {LayoutType::ncsp}},
+                             {{LayoutType::nCsp8c, precision}},
                              impl_desc_type::ref_any);
     }
     if (inDims[1] % 16 == 0) {
-        addSupportedPrimDesc({{TensorDescCreatorTypes::nCsp16c, precision},
-                              {TensorDescCreatorTypes::ncsp},
-                              {TensorDescCreatorTypes::ncsp},
-                              {TensorDescCreatorTypes::ncsp}},
-                             {{TensorDescCreatorTypes::nCsp16c, precision}},
+        addSupportedPrimDesc({{LayoutType::nCsp16c, precision},
+                              {LayoutType::ncsp},
+                              {LayoutType::ncsp},
+                              {LayoutType::ncsp}},
+                             {{LayoutType::nCsp16c, precision}},
                              impl_desc_type::ref_any);
     }
 }
@@ -112,15 +112,15 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() {
     const auto *srcData = reinterpret_cast<const T *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
     auto *dstData = reinterpret_cast<T *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
 
-    const auto layout = getParentEdgeAt(0)->getDesc().getLayout();
-    const bool blocked = layout != NCHW && layout != NCDHW && layout != NHWC && layout != NDHWC;
+    const bool blocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) ||
+                         getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c);
     const auto dimsSize = inDims.size();
 
     auto inShape5D  = getShape5D(outDims);
     auto outShape5D = getShape5D(inDims);
     auto blockShape = getShape5D(blockShapeIn);
 
-    if (layout == NHWC || layout == NDHWC) {
+    if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) {
         inShape5D.push_back(inShape5D[1]);
         inShape5D.erase(inShape5D.begin() + 1);
         outShape5D.push_back(outShape5D[1]);
@@ -129,9 +129,10 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() {
         blockShape.erase(blockShape.begin() + 1);
     }
 
-    const size_t blockSize = blocked ? getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims().back() : 1lu;
-    const size_t blockCountInput = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1];
-    const size_t blockCountOutput = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims()[1];
+    const auto outBlkDims = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>().getBlockDims();
+    const size_t blockSize = blocked ? outBlkDims.back() : 1lu;
+    const size_t blockCountInput = outBlkDims[1];
+    const size_t blockCountOutput = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>().getBlockDims()[1];
     const auto blockRemainder = inShape5D[1] % blockSize;
     const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder;
 
@@ -172,7 +173,7 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() {
             oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - padsBeginIn[2] : 0lu;
             bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx;
             oAdd[1] = bIdx % blockShapeIn[1] - padsBeginIn[1];
-            if (layout == NHWC || layout == NDHWC) {
+            if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) {
                 oAdd.push_back(oAdd[1]);
                 oAdd.erase(oAdd.begin() + 1);
             }
@@ -226,12 +227,13 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() {
 }
 
 void MKLDNNSpaceToBatchNode::execute(mkldnn::stream strm) {
-    switch (getParentEdgeAt(0)->getDesc().getPrecision().size()) {
+    switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size()) {
         case 1: SpaceToBatchKernel<PrecisionTrait<Precision::U8>::value_type>();  break;
         case 2: SpaceToBatchKernel<PrecisionTrait<Precision::U16>::value_type>(); break;
         case 4: SpaceToBatchKernel<PrecisionTrait<Precision::I32>::value_type>(); break;
         default:
-            IE_THROW() << "SpaceToBatch layer does not support precision '" + std::string(getParentEdgeAt(0)->getDesc().getPrecision().name()) + "'";
+            IE_THROW() << "SpaceToBatch layer does not support precision '" + std::string(getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name())
+                          + "'";
     }
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp
index 69c3356a2f0..25003088139 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp
@@ -6,7 +6,7 @@
 
 #include <cpu/x64/jit_generator.hpp>
 #include <mkldnn_extension_utils.h>
-#include "common/tensor_desc_creator.h"
+#include "common/blocked_desc_creator.h"
 #include <utils/general_utils.h>
 #include <ngraph/opsets/opset1.hpp>
 
@@ -64,13 +64,13 @@ MKLDNNSpaceToDepthNode::MKLDNNSpaceToDepthNode(const std::shared_ptr<ngraph::Nod
 }
 
 void MKLDNNSpaceToDepthNode::getSupportedDescriptors() {
-    SizeVector srcDims = inDims[0].ToSizeVector();
+    SizeVector srcDims = inputShapes[0].getStaticDims();
     if (srcDims.size() < 3)
         THROW_ERROR << "has incorrect number of input dimensions";
     if (srcDims.size() > 5)
         THROW_ERROR << "doesn't support dimensions with rank greater than 5";
 
-    SizeVector dstDims = outDims[0].ToSizeVector();
+    SizeVector dstDims = outputShapes[0].getStaticDims();
     if (srcDims.size() != dstDims.size())
         THROW_ERROR << "has incorrect number of input/output dimensions";
 
@@ -98,8 +98,8 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() {
         return;
 
     InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
-    auto srcDims = getParentEdgeAt(0)->getDims();
-    const size_t nDims = srcDims.ndims();
+    auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims();
+    const size_t nDims = srcDims.size();
 
     impl_desc_type impl_type;
     if (mayiuse(impl::cpu::x64::avx512_common)) {
@@ -112,7 +112,7 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() {
         impl_type = impl_desc_type::ref;
     }
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = true;
     config.inConfs.resize(1);
     config.outConfs.resize(1);
@@ -121,26 +121,26 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() {
     config.outConfs[0].inPlace = -1;
     config.outConfs[0].constant = false;
 
-    std::vector<TensorDescCreatorTypes> supportedTypes;
+    std::vector<LayoutType> supportedTypes;
     if (nDims > 2) {
         auto canUseBlocked = [=](const size_t block) {
             return srcDims[1] % block == 0 && (mode == Mode::DEPTH_FIRST ? block % blockStep == 0 : true);
         };
 
-        supportedTypes.push_back(TensorDescCreatorTypes::nspc);
+        supportedTypes.push_back(LayoutType::nspc);
         if (canUseBlocked(8lu))
-            supportedTypes.push_back(TensorDescCreatorTypes::nCsp8c);
+            supportedTypes.push_back(LayoutType::nCsp8c);
         if (canUseBlocked(16lu))
-            supportedTypes.push_back(TensorDescCreatorTypes::nCsp16c);
+            supportedTypes.push_back(LayoutType::nCsp16c);
     }
-    supportedTypes.push_back(TensorDescCreatorTypes::ncsp);
-    auto creators = TensorDescCreator::getCommonCreators();
-    auto range = TensorDescCreator::makeFilteredRange(creators, nDims, supportedTypes);
+    supportedTypes.push_back(LayoutType::ncsp);
+    auto creators = BlockedDescCreator::getCommonCreators();
+    auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes);
 
     for (auto itr = range.first; itr != range.second; ++itr) {
-        config.inConfs[0].desc = itr->second->createDesc(precision, getParentEdgeAt(0)->getDims().ToSizeVector());
-        config.outConfs[0].desc = itr->second->createDesc(precision, getChildEdgeAt(0)->getDims().ToSizeVector());
-        supportedPrimitiveDescriptors.emplace_back(config, impl_type, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
+        config.inConfs[0].desc = itr->second->createUniqueDesc(precision, getParentEdgeAt(0)->getShape().getStaticDims());
+        config.outConfs[0].desc = itr->second->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims());
+        supportedPrimitiveDescriptors.emplace_back(config, impl_type);
     }
 }
 
@@ -154,18 +154,19 @@ void MKLDNNSpaceToDepthNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         THROW_ERROR << "has unidentified preferable primitive descriptor";
 
-    SizeVector srcDims = getParentEdgeAt(0)->getBlob()->getTensorDesc().getDims();
-    SizeVector dstDims = getChildEdgeAt(0)->getBlob()->getTensorDesc().getDims();
+    SizeVector srcDims = getParentEdgeAt(0)->getShape().getStaticDims();
+    SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims();
 
     size_t nDims = srcDims.size();
     const size_t nSpatialDims = nDims - 2;
-    const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().isBlockedCFormat();
+    const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) ||
+                           getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c);
     const size_t reshapedRank = nDims + nSpatialDims + static_cast<int>(isBlocked) + static_cast<int>(isBlocked && mode == Mode::DEPTH_FIRST);
     const size_t lastIdx = reshapedRank - 1;
     size_t firstSpatialOrder = 2;
 
     PermuteParams params;
-    params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size();
+    params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size();
     params.order.resize(reshapedRank, 0);
     params.src_block_order.resize(reshapedRank);
     params.dst_block_order.resize(reshapedRank);
@@ -190,8 +191,8 @@ void MKLDNNSpaceToDepthNode::createPrimitive() {
     };
 
     if (isBlocked) {
-        SizeVector srcBlockedDims = getParentEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims();
-        SizeVector dstBlockedDims = getChildEdgeAt(0)->getDesc().getBlockingDesc().getBlockDims();
+        SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>().getBlockDims();
+        SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>().getBlockDims();
 
         size_t orderShiftForBlocks, orderShiftForDims;
         if (mode == Mode::BLOCKS_FIRST) {
@@ -218,7 +219,7 @@ void MKLDNNSpaceToDepthNode::createPrimitive() {
         }
 
         reshapeAndSetPermOrder(orderShiftForBlocks, orderShiftForDims, firstSpatialOrder, dstBlockedDims);
-    } else if (getParentEdgeAt(0)->getMemory().GetDesc().isTailCFormat()) {
+    } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) {
         srcDims.push_back(srcDims[1]);
         dstDims.push_back(dstDims[1]);
         srcDims.erase(srcDims.begin() + 1);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp
index 201bebf4e63..a95bd0c4f75 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp
@@ -4,12 +4,13 @@
 
 #include "mkldnn_split_node.h"
 #include "common/cpu_memcpy.h"
-#include "common/tensor_desc_creator.h"
+#include "common/blocked_desc_creator.h"
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include <ie_parallel.hpp>
 #include "utils/general_utils.h"
+#include <cpu_memory_desc_utils.h>
 
 #define THROW_ERROR IE_THROW() << "Split layer with name '" << getName() <<"' "
 
@@ -74,17 +75,17 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    auto srcDims = getParentEdgeAt(0)->getDims();
+    auto srcShape = getParentEdgeAt(0)->getShape();
     auto axis_size = 0;
-    auto dstFirstDims = getChildEdgeAt(0)->getDims();
-    for (size_t i = 0; i < outDims.size(); i++) {
-        auto o_Dims = outDims[i];
-        if (dstFirstDims.ndims() != o_Dims.ndims()) {
+    auto dstFirstDims = getChildEdgeAt(0)->getShape().getStaticDims();
+    for (size_t i = 0; i < outputShapes.size(); i++) {
+        auto o_Dims = outputShapes[i].getStaticDims();
+        if (dstFirstDims.size() != o_Dims.size()) {
             THROW_ERROR << "only supports output blobs with equal number of dimensions";
         }
 
         axis_size += o_Dims[axis];
-        for (size_t j = 0; j < dstFirstDims.ndims(); j++) {
+        for (size_t j = 0; j < dstFirstDims.size(); j++) {
             if (j == axis)
                 continue;
             if (o_Dims[j] != dstFirstDims[j])
@@ -92,7 +93,7 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
         }
     }
     dstFirstDims[axis] = axis_size;
-    if (dstFirstDims.size() != srcDims.size())
+    if (std::accumulate(dstFirstDims.begin(), dstFirstDims.end(), 1, std::multiplies<size_t>()) != srcShape.getElementsCount())
         THROW_ERROR << "sizes of input blob and sum of output blobs are not equal.";
 
     InferenceEngine::Precision inpPrecision = getOriginalInputPrecisionAtPort(0);
@@ -105,18 +106,18 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
     }
 
     //Set plain and tailC formats
-    std::vector<TensorDescCreatorTypes> tdCreatorTypes{ TensorDescCreatorTypes::ncsp, TensorDescCreatorTypes::nspc };
+    std::vector<LayoutType> tdCreatorTypes{ LayoutType::ncsp, LayoutType::nspc };
 
     //Support channel blocked format
-    if (srcDims.ndims() > 2) {
-        for (auto item : { std::make_pair(8lu, TensorDescCreatorTypes::nCsp8c), std::make_pair(16lu, TensorDescCreatorTypes::nCsp16c) }) {
-            SizeVector blkDims = srcDims.ToSizeVector();
+    if (srcShape.getRank() > 2) {
+        for (auto item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c) }) {
+            SizeVector blkDims = srcShape.getStaticDims();
             if (blkDims[channelsPos] % item.first)
                 continue;
 
             bool blocked = true;
-            for (size_t i = 0; i < outDims.size(); i++) {
-                if (outDims[i].ToSizeVector()[channelsPos] % item.first) {
+            for (size_t i = 0; i < outputShapes.size(); i++) {
+                if (outputShapes[i].getStaticDims()[channelsPos] % item.first) {
                     blocked = false;
                     break;
                 }
@@ -129,43 +130,37 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
 
     std::vector<size_t> pdIndexesToReuse;
 
-    auto& creatorsMap = TensorDescCreator::getCommonCreators();
-    auto itrRange = TensorDescCreator::makeFilteredRange(creatorsMap, static_cast<unsigned>(srcDims.ndims()), tdCreatorTypes);
+    auto& creatorsMap = BlockedDescCreator::getCommonCreators();
+    auto itrRange = BlockedDescCreator::makeFilteredRange(creatorsMap, static_cast<unsigned>(srcShape.getRank()), tdCreatorTypes);
     for (auto itr = itrRange.first; itr != itrRange.second; ++itr) {
-        InferenceEngine::LayerConfig config;
+        NodeConfig config;
 
         config.dynBatchSupport = dynBatchSupport;
         config.inConfs.resize(INPUTS_NUM);
         config.inConfs[0].inPlace = -1;
         config.inConfs[0].constant = false;
-        config.inConfs[0].desc = itr->second->createDesc(inpPrecision, srcDims.ToSizeVector());
+        config.inConfs[0].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(itr->second->createDesc(inpPrecision, srcShape.getStaticDims()));
         config.inConfs[1].inPlace = -1;
         config.inConfs[1].constant = true;
-        config.inConfs[1].desc.setDims({1});
-        config.inConfs[1].desc.setPrecision(axisPrecision);
+        config.inConfs[1].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(axisPrecision, SizeVector{1});
         if (INPUTS_NUM == 3) {
-            config.inConfs[2].desc = TensorDesc(axisPrecision, SizeVector{outDims.size()}, TensorDesc::getLayoutByDims(SizeVector{outDims.size()}));
+            config.inConfs[2].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(axisPrecision, SizeVector{outputShapes.size()});
             config.inConfs[2].constant = true;
         }
 
-        config.outConfs.resize(outDims.size());
-
-        std::vector<memory::format_tag> outFormats;
-
-        for (size_t i = 0; i < outDims.size(); i++) {
-            auto o_Dims = outDims[i];
+        config.outConfs.resize(outputShapes.size());
 
+        for (size_t i = 0; i < outputShapes.size(); i++) {
             config.outConfs[i].inPlace = -1;
             config.outConfs[i].constant = false;
-            config.outConfs[i].desc = itr->second->createDesc(inpPrecision, o_Dims.ToSizeVector());
-            outFormats.push_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat());
+            config.outConfs[i].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(itr->second->createDesc(inpPrecision, outputShapes[i].getStaticDims()));
         }
-        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, outFormats);
+        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref);
 
-        if (itr->first == TensorDescCreatorTypes::ncsp) {
+        if (itr->first == LayoutType::ncsp) {
             // at least the plain layout can be optimized inplace.
             pdIndexesToReuse.emplace_back(supportedPrimitiveDescriptors.size() - 1);
-        } else if (itr->first == TensorDescCreatorTypes::nCsp8c || itr->first == TensorDescCreatorTypes::nCsp16c) {
+        } else if (itr->first == LayoutType::nCsp8c || itr->first == LayoutType::nCsp16c) {
             if (axis < 2) {
                 pdIndexesToReuse.emplace_back(supportedPrimitiveDescriptors.size() - 1);
             }
@@ -176,12 +171,11 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
     for (auto refPdIndex : pdIndexesToReuse) {
         const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig();
         auto config = refConfig;
-
-        const auto& order = refConfig.inConfs[0].desc.getBlockingDesc().getOrder();
-        const auto& blkDims = refConfig.inConfs[0].desc.getBlockingDesc().getBlockDims();
+        const auto inBlockingDesc = refConfig.inConfs[0].desc->as<BlockedMemoryDesc>();
+        const auto& order = inBlockingDesc->getOrder();
+        const auto& blkDims = inBlockingDesc->getBlockDims();
         auto numOfDim = blkDims.size();
 
-        std::vector<memory::format_tag> outFormats;
         SizeVector offsets(numOfDim, 0lu);
         SizeVector strides(numOfDim);
         strides.back() = 1lu;
@@ -195,49 +189,43 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
             }
         }
 
-        config.inConfs[0].desc = TensorDesc(inpPrecision, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides});
+        config.inConfs[0].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(inpPrecision, srcShape.getStaticDims(), blkDims, order, offset, offsets, strides);
 
-        for (size_t i = 0; i < outDims.size(); i++) {
-            const auto& outBlkDims = refConfig.outConfs[i].desc.getBlockingDesc().getBlockDims();
-            const auto& dims = refConfig.outConfs[i].desc.getDims();
+        for (size_t i = 0; i < outputShapes.size(); i++) {
+            auto outBlockingDesc = refConfig.outConfs[i].desc->as<BlockedMemoryDesc>();
+            const auto& outBlkDims = outBlockingDesc->getBlockDims();
+            const auto& dims = outBlockingDesc->getShape().getStaticDims();
 
             config.outConfs[i].inPlace = 0;
-            config.outConfs[i].desc = TensorDesc(outPrecision, dims, {outBlkDims, order, offset, offsets, strides});
-            outFormats.emplace_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat());
+            config.outConfs[i].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(outPrecision, dims, outBlkDims, order, offset, offsets, strides);
         }
-        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats);
+        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
     }
 
     // Special nspc -> ncsp case when splitting channels
-    if (axis == 1 && (dstFirstDims.ndims() == 4 || dstFirstDims.ndims() == 5)) {
-        InferenceEngine::LayerConfig config;
+    if (axis == 1 && (dstFirstDims.size() == 4 || dstFirstDims.size() == 5)) {
+        NodeConfig config;
 
         config.dynBatchSupport = dynBatchSupport;
         config.inConfs.resize(INPUTS_NUM);
         config.inConfs[0].inPlace = -1;
         config.inConfs[0].constant = false;
-        config.inConfs[0].desc = creatorsMap.at(TensorDescCreatorTypes::nspc)->createDesc(inpPrecision, srcDims.ToSizeVector());
+        config.inConfs[0].desc = creatorsMap.at(LayoutType::nspc)->createUniqueDesc(inpPrecision, srcShape.getStaticDims());
         config.inConfs[1].inPlace = -1;
         config.inConfs[1].constant = true;
-        config.inConfs[1].desc.setDims({1});
-        config.inConfs[1].desc.setPrecision(axisPrecision);
+        config.inConfs[1].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(axisPrecision, SizeVector{1});
         if (INPUTS_NUM == 3) {
-            config.inConfs[2].desc = TensorDesc(axisPrecision, SizeVector{outDims.size()}, TensorDesc::getLayoutByDims(SizeVector{outDims.size()}));
+            config.inConfs[2].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(axisPrecision, SizeVector{outputShapes.size()});
             config.inConfs[2].constant = true;
         }
-        config.outConfs.resize(outDims.size());
-
-        std::vector<memory::format_tag> outFormats;
-
-        for (size_t i = 0; i < outDims.size(); i++) {
-            auto o_Dims = outDims[i];
+        config.outConfs.resize(outputShapes.size());
 
+        for (size_t i = 0; i < outputShapes.size(); i++) {
             config.outConfs[i].inPlace = -1;
             config.outConfs[i].constant = false;
-            config.outConfs[i].desc = creatorsMap.at(TensorDescCreatorTypes::ncsp)->createDesc(inpPrecision, o_Dims.ToSizeVector());
-            outFormats.push_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat());
+            config.outConfs[i].desc = creatorsMap.at(LayoutType::ncsp)->createUniqueDesc(inpPrecision, outputShapes[i].getStaticDims());
         }
-        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, outFormats);
+        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref);
     }
 }
 
@@ -252,18 +240,16 @@ void MKLDNNSplitNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         THROW_ERROR << "Preferable primitive descriptor is not set.";
 
-    canUseOptimizedNspc2Ncsp = true;
-    if (axis != 1)
-        canUseOptimizedNspc2Ncsp = false;
+    auto& memDesc = getParentEdgeAt(0)->getMemoryPtr()->GetDesc();
 
-    if (getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NHWC &&
-        getParentEdgeAt(0)->getBlob()->getTensorDesc().getLayout() != NDHWC)
-        canUseOptimizedNspc2Ncsp = false;
-
-    for (size_t i = 0; i < getChildEdges().size(); i++) {
-        if (getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCHW &&
-            getChildEdgeAt(i)->getBlob()->getTensorDesc().getLayout() != NCDHW)
-            canUseOptimizedNspc2Ncsp = false;
+    canUseOptimizedNspc2Ncsp = false;
+    if (axis == 1 && one_of(memDesc.getShape().getRank(), 4, 5) && memDesc.hasLayoutType(LayoutType::nspc)) {
+        canUseOptimizedNspc2Ncsp = true;
+        for (size_t i = 0; i < getChildEdges().size(); i++) {
+            auto& childMemDesc = getChildEdgeAt(i)->getMemoryPtr()->GetDesc();
+            if (!childMemDesc.hasLayoutType(LayoutType::ncsp))
+                canUseOptimizedNspc2Ncsp = false;
+        }
     }
 
     if (!isOptimized()) {
@@ -288,7 +274,7 @@ void MKLDNNSplitNode::execute(mkldnn::stream strm) {
     }
 
     uint8_t* srcData = reinterpret_cast<uint8_t*>(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
-    size_t batch = this->getParentEdgeAt(0)->getDims()[0];
+    size_t batch = this->getParentEdgeAt(0)->getShape().getStaticDims()[0];
 
     if (batch != MB)
         optimizedParams.countStrides = optimizedParams.countStrides / batch * MB;
@@ -320,50 +306,47 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
     if (selected_pd == nullptr)
         THROW_ERROR << "Preferable primitive descriptor is not set.";
     auto config = selected_pd->getConfig();
-    if (isInitConfig(config))
+    if (isConfigDefined(config))
         return;
 
     for (size_t i = 0; i < config.inConfs.size(); i++) {
-        if (config.inConfs[i].desc.getLayout() == InferenceEngine::Layout::ANY ||
-            !isUninitTensorDesc(config.inConfs[i].desc))
+        if (config.inConfs[i].desc->isDefined())
             continue;
 
         int num = getParentEdgeAt(i)->getOutputNum();
         if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) {
             if (num >= 0) {
-                if (isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) &&
-                        getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].inPlace >= 0)
+                const auto& parentConfig = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num];
+                if (!parentConfig.desc->isDefined() && parentConfig.inPlace >= 0)
                     getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor();
-                if (!isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) &&
-                    MKLDNNExtensionUtils::initTensorsAreEqual(
-                            getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc,
-                            config.inConfs[i].desc)) {
-                    config.inConfs[i].desc = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc;
+                if (parentConfig.desc->isDefined() && parentConfig.desc->isCompatible(*config.inConfs[i].desc)) {
+                    config.inConfs[i].desc = parentConfig.desc->clone();
                     continue;
                 }
             }
         }
-        config.inConfs[i].desc = InferenceEngine::TensorDesc(config.inConfs[i].desc.getPrecision(),
-                                                              config.inConfs[i].desc.getDims(), {
-                                                                      config.inConfs[i].desc.getBlockingDesc().getBlockDims(),
-                                                                      config.inConfs[i].desc.getBlockingDesc().getOrder()
-                                                              });
+
+        // reset undefined offsets
+        config.inConfs[i].desc = MemoryDescUtils::resetOffset(config.inConfs[i].desc.get());
     }
-    if (config.outConfs.size() != outDims.size())
+    if (config.outConfs.size() != outputShapes.size())
         THROW_ERROR << "has invalid config";
+
+    auto firstInBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[0].desc);
     size_t offset = 0;
-    for (size_t i = 0; i < outDims.size(); i++) {
-        config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(),
-                                                              config.outConfs[i].desc.getDims(), {
-                                                                      config.outConfs[i].desc.getBlockingDesc().getBlockDims(),
-                                                                      config.outConfs[i].desc.getBlockingDesc().getOrder(),
-                                                                      config.inConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset,
-                                                                      config.inConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(),
-                                                                      config.inConfs[0].desc.getBlockingDesc().getStrides()
-                                                              });
+    for (size_t i = 0; i < outputShapes.size(); i++) {
+        auto outBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.outConfs[i].desc);
+        config.outConfs[i].desc = MKLDNNPlugin::make_unique<BlockedMemoryDesc>(outBlockingDesc.getPrecision(),
+                                                                 outBlockingDesc.getShape().getStaticDims(),
+                                                                 outBlockingDesc.getBlockDims(),
+                                                                 outBlockingDesc.getOrder(),
+                                                                 firstInBlockingDesc.getOffsetPadding() + offset,
+                                                                 firstInBlockingDesc.getOffsetPaddingToData(),
+                                                                 firstInBlockingDesc.getStrides());
+
         size_t axisSize = 1;
-        for (size_t j = axis; j < config.outConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) {
-            axisSize *= config.outConfs[i].desc.getBlockingDesc().getBlockDims()[j];
+        for (size_t j = axis; j < outBlockingDesc.getBlockDims().size(); j++) {
+            axisSize *= outBlockingDesc.getBlockDims()[j];
         }
         offset += axisSize;
     }
@@ -375,10 +358,9 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
     // This is needed mostly for the testing purposes, since for the planar layout Split works always in place, we need to enforce
     // the reference implementation when it is selected in a test to test that piece of code.
     if (!implPriorities.empty() && implPriorities[0] == impl_desc_type::ref) {
-        auto plain = PartialBlkDesc::makePlain(getParentEdgeAt(0)->getDims().ToSizeVector());
         for (size_t i = 0; i < supportedPrimitiveDescriptors.size(); ++i) {
             auto& pd = supportedPrimitiveDescriptors[i];
-            if (PartialBlkDesc::extractFrom(pd.getConfig().inConfs[0].desc) == plain &&
+            if (pd.getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp) &&
                 impl_desc_type::ref == pd.getImplementationType()) {
                     selectPrimitiveDescriptorByIndex(static_cast<int>(i));
                 return;
@@ -399,9 +381,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
             if (inNum < 0 || inNum >= parent_spd->getConfig().outConfs.size()) {
                 inNum = 0;
             }
-            if (MKLDNNExtensionUtils::initTensorsAreEqual(
-                    supportedPrimitiveDescriptors[i].getConfig().inConfs[0].desc,
-                    parent_spd->getConfig().outConfs[inNum].desc)) {
+            if (supportedPrimitiveDescriptors[i].getConfig().inConfs[0].desc->isCompatible(*parent_spd->getConfig().outConfs[inNum].desc)) {
                 canSelectPrimitive.push_back(i);
             }
         }
@@ -425,7 +405,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
             auto childEdge = getChildEdgeAt(i);
             auto childPtr = childEdge->getChild();
             auto& vecChildSpd = childPtr->getSupportedPrimitiveDescriptors();
-            const auto& outputDesc = supportedPrimitiveDescriptors[indx].getConfig().outConfs[i].desc;
+            const auto& outputDesc = supportedPrimitiveDescriptors[indx].getConfig().outConfs[childEdge->getInputNum()].desc;
 
             if (!vecChildSpd.empty()) {
                 int inNum = childEdge->getOutputNum();
@@ -437,7 +417,7 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() {
                     if (inNum >= childSpd.getConfig().inConfs.size()) {
                         inNum = 0;
                     }
-                    if (MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, childSpd.getConfig().inConfs[inNum].desc)) {
+                    if (outputDesc->isCompatible(*childSpd.getConfig().inConfs[inNum].desc)) {
                         hasMatchDesc = true;
                         break;
                     }
@@ -480,11 +460,11 @@ void MKLDNNSplitNode::prepareOptimizedParams() {
     auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
     if (!selectedPrimitiveDescriptor)
         IE_THROW() << "CPU Split node with name '" << getName() << "' doesn't have primitive descriptors.";
-    const auto& inpTensorDesc = selectedPrimitiveDescriptor->getConfig().inConfs[0].desc;
-    const auto outputPortsCount = outDims.size();
+    const auto inpTensorDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    const auto outputPortsCount = outputShapes.size();
 
     //find axis order position
-    const auto& order = inpTensorDesc.getBlockingDesc().getOrder();
+    const auto& order = inpTensorDesc.getOrder();
     unsigned axisOrderPos = std::numeric_limits<unsigned>::max();
     for (size_t i = 0; i < order.size(); ++i) {
         if (order[i] == axis) {
@@ -497,8 +477,8 @@ void MKLDNNSplitNode::prepareOptimizedParams() {
     }
 
     uint8_t srcDataSize = inpTensorDesc.getPrecision().size();
-    const auto& srcDims = inpTensorDesc.getBlockingDesc().getBlockDims();
-    const auto nDims = srcDims.size();
+    const auto& srcDims = inpTensorDesc.getBlockDims();
+    const auto getRank = srcDims.size();
 
     optimizedParams.countStrides = 1;
     for (int i = 0; i < axisOrderPos; i++)
@@ -511,8 +491,9 @@ void MKLDNNSplitNode::prepareOptimizedParams() {
         auto outputEdge = this->getChildEdgesAtPort(i).front();
         optimizedParams.dataSize[i] = srcDataSize;
 
-        for (size_t j = axisOrderPos; j < nDims; j++)
-            optimizedParams.dataSize[i] *= outputEdge->getDesc().getBlockingDesc().getBlockDims()[j];
+        auto desc = outputEdge->getMemory().GetDesc().as<BlockedMemoryDesc>();
+        for (size_t j = axisOrderPos; j < getRank; j++)
+            optimizedParams.dataSize[i] *= desc->getBlockDims()[j];
 
         optimizedParams.srcDataStride += optimizedParams.dataSize[i];
     }
@@ -526,31 +507,32 @@ void MKLDNNSplitNode::prepareOptimizedParams() {
 
 void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) {
     auto parentEdge = getParentEdgeAt(0);
-    const int ndims = parentEdge->getDims().ndims();
-    const size_t IC = parentEdge->getDims()[1];
-    const size_t D = ndims == 5 ? parentEdge->getDims()[ndims - 3] : 1;
-    const size_t H = parentEdge->getDims()[ndims - 2];
-    const size_t W = parentEdge->getDims()[ndims - 1];
+    const int rank = parentEdge->getShape().getRank();
+    const auto parentDims = parentEdge->getShape().getStaticDims();
+    const size_t IC = parentDims[1];
+    const size_t D = rank == 5 ? parentDims[rank - 3] : 1;
+    const size_t H = parentDims[rank - 2];
+    const size_t W = parentDims[rank - 1];
 
-    auto srcBlob = parentEdge->getBlob();
-    auto srcData = srcBlob->cbuffer().as<const uint8_t*>();
-    const auto dataSize = srcBlob->getTensorDesc().getPrecision().size();
+    auto& srcMem = parentEdge->getMemory();
+    auto srcData = reinterpret_cast<const uint8_t*>(srcMem.GetData());
+    const auto dataSize = srcMem.GetDesc().getPrecision().size();
 
     const size_t DHW = D*H*W;
     const size_t strideIB = DHW * IC * dataSize;
     const size_t strideIW = IC*dataSize;
     const size_t strideOC = DHW * dataSize;
 
-    for (size_t i = 0, sIdx = 0; i < outDims.size(); i++) {
+    for (size_t i = 0, sIdx = 0; i < outputShapes.size(); i++) {
         auto dstData = dstMemPtrs[i];
 
         size_t innerSize = 1;
-        auto dims = outDims[i].ToSizeVector();
+        auto dims = outputShapes[i].getStaticDims();
 
         for (size_t j = axis; j < dims.size(); j++) {
             innerSize *= dims[j];
         }
-        auto srcPtr = srcData + srcBlob->getTensorDesc().offset(sIdx) * dataSize;
+        auto srcPtr = srcData + srcMem.GetDesc().getElementOffset(sIdx) * dataSize;
 
         const size_t OC = dims[1];
         const size_t strideOB = OC * strideOC;
@@ -572,7 +554,7 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) {
 void MKLDNNSplitNode::initializeDstMemPtrs() {
     dstMemPtrs.clear();
 
-    for (size_t i = 0; i < outDims.size(); ++i) {
+    for (size_t i = 0; i < outputShapes.size(); ++i) {
         auto outputEdges = this->getChildEdgesAtPort(i);
         if (uint8_t* dstData = reinterpret_cast<uint8_t*>(outputEdges.front()->getMemoryPtr()->GetPtr())) {
             dstMemPtrs.push_back(dstData);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
index 1b70de9f0f8..4f98fc1099f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp
@@ -10,7 +10,7 @@
 #include "ie_parallel.hpp"
 #include "caseless.hpp"
 #include "common/cpu_memcpy.h"
-#include "common/tensor_desc_creator.h"
+#include "common/blocked_desc_creator.h"
 #include "utils/general_utils.h"
 #include "mkldnn_input_node.h"
 
@@ -54,7 +54,7 @@ MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr<ngraph::Nod
     if (isSupportedOperation(op, errorMessage)) {
         const auto ss = std::dynamic_pointer_cast<const ngraph::opset1::StridedSlice>(op);
 
-        const size_t nDims = std::max(inDims[DATA_ID].ndims(), outDims[0].ndims());
+        const size_t nDims = std::max(inputShapes[DATA_ID].getRank(), outputShapes[0].getRank());
 
         auto createMask = [&](const std::vector<int64_t> &origMask, const int bit = 0, bool needReverse = false) {
             std::vector<int> mask(origMask.begin(), origMask.end());
@@ -92,8 +92,8 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
     params.parametersAreConstant = isConstantNode(getParentEdgesAtPort(BEGIN_ID)[0]->getParent()) &&
                                    isConstantNode(getParentEdgesAtPort(END_ID)[0]->getParent());
 
-    const SizeVector srcDims = inDims[DATA_ID].ToSizeVector();
-    const SizeVector dstDims = outDims[0].ToSizeVector();
+    const SizeVector srcDims = inputShapes[DATA_ID].getStaticDims();
+    const SizeVector dstDims = outputShapes[0].getStaticDims();
     const size_t nSrcDims = srcDims.size();
     const size_t nDims = std::max(nSrcDims, dstDims.size());
 
@@ -102,21 +102,21 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() {
     if (!getChildEdges().size())
         THROW_ERROR << "has incorrect number of output edges";
 
-    beginDims = inDims[BEGIN_ID].ToSizeVector();
+    beginDims = inputShapes[BEGIN_ID].getStaticDims();
     if (beginDims.size() != 1)
         THROW_ERROR << " should have begin vector with 1 dimension";
 
-    endDims = inDims[END_ID].ToSizeVector();
+    endDims = inputShapes[END_ID].getStaticDims();
     if (endDims.size() != 1)
         THROW_ERROR << "should have end vector with 1 dimension";
     if (beginDims[0] != endDims[0])
         THROW_ERROR << "should have begin vector with size equal to end vector size";
 
-    if (inDims.size() > STRIDE_ID) {
+    if (inputShapes.size() > STRIDE_ID) {
         if (!isConstantNode(getParentEdgesAtPort(STRIDE_ID)[0]->getParent()))
             params.parametersAreConstant = false;
 
-        strideDims = inDims[STRIDE_ID].ToSizeVector();
+        strideDims = inputShapes[STRIDE_ID].getStaticDims();
         if (strideDims.size() > 1)
             THROW_ERROR << "should have stride vector with 1 dimension";
         if (beginDims[0] != strideDims[0])
@@ -206,11 +206,11 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
     if (hasStrides)
         stridePrecision = getOriginalInputPrecisionAtPort(STRIDE_ID);
 
-    auto srcDims = getParentEdgeAt(DATA_ID)->getDims();
-    auto dstDims = getChildEdgeAt(0)->getDims();
-    size_t nDims = srcDims.ndims();
+    auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
+    auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims();
+    size_t nDims = srcDims.size();
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = false;
     config.inConfs.resize(getParentEdges().size());
     config.inConfs[DATA_ID].inPlace = -1;
@@ -225,33 +225,35 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() {
     }
     config.outConfs.resize(1);
 
-    std::vector<TensorDescCreatorTypes> supportedTypes;
+    std::vector<LayoutType> supportedTypes;
     if (nDims > 2 && params.equalDims) {
         auto canUseBlocked = [=](const size_t blockSize) {
             return srcDims[1] % blockSize == 0 && abs(stride[1]) == 1 && (begin[1] > srcDims[1] || begin[1] % blockSize == 0);
         };
 
-        supportedTypes.push_back(TensorDescCreatorTypes::nspc);
+        supportedTypes.push_back(LayoutType::nspc);
         if (canUseBlocked(8lu))
-            supportedTypes.push_back(TensorDescCreatorTypes::nCsp8c);
+            supportedTypes.push_back(LayoutType::nCsp8c);
         if (canUseBlocked(16lu))
-            supportedTypes.push_back(TensorDescCreatorTypes::nCsp16c);
+            supportedTypes.push_back(LayoutType::nCsp16c);
     }
-    supportedTypes.push_back(TensorDescCreatorTypes::ncsp);
-    auto creators = TensorDescCreator::getCommonCreators();
-    auto range = TensorDescCreator::makeFilteredRange(creators, nDims, supportedTypes);
+    supportedTypes.push_back(LayoutType::ncsp);
+    auto creators = BlockedDescCreator::getCommonCreators();
+    auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes);
 
     for (auto itr = range.first; itr != range.second; ++itr) {
-        config.inConfs[0].desc = itr->second->createDesc(dataPrecision, getParentEdgeAt(DATA_ID)->getDims().ToSizeVector());
-        config.inConfs[BEGIN_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(BEGIN_ID)->getDims(), beginDataType, mkldnn::memory::format_tag::x);
-        config.inConfs[END_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(END_ID)->getDims(), endDataType, mkldnn::memory::format_tag::x);
+        config.inConfs[0].desc = itr->second->createUniqueDesc(dataPrecision, getParentEdgeAt(DATA_ID)->getShape().getStaticDims());
+        config.inConfs[BEGIN_ID].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(BEGIN_ID)->getShape().getStaticDims(), beginDataType,
+                                                                      mkldnn::memory::format_tag::x);
+        config.inConfs[END_ID].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(END_ID)->getShape().getStaticDims(), endDataType,
+                                                                    mkldnn::memory::format_tag::x);
         if (hasStrides)
-            config.inConfs[STRIDE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(STRIDE_ID)->getDims(),
+            config.inConfs[STRIDE_ID].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(STRIDE_ID)->getShape().getStaticDims(),
                                                               MKLDNNExtensionUtils::IEPrecisionToDataType(stridePrecision),
                                                               mkldnn::memory::format_tag::x);
 
-        config.outConfs[0].desc = itr->second->createDesc(dataPrecision, getChildEdgeAt(DATA_ID)->getDims().ToSizeVector());
-        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
+        config.outConfs[0].desc = itr->second->createUniqueDesc(dataPrecision, getChildEdgeAt(DATA_ID)->getShape().getStaticDims());
+        supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref);
     }
 }
 
@@ -265,16 +267,16 @@ void MKLDNNStridedSliceNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         THROW_ERROR << "has unidentified preferable primitive descriptor.";
 
-    auto srcBlockingDesc = getParentEdgeAt(DATA_ID)->getDesc().getBlockingDesc();
-    auto dstBlockingDesc = getChildEdgeAt(0)->getDesc().getBlockingDesc();
+    auto srcBlockingDesc = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    auto dstBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
     auto srcOrder = srcBlockingDesc.getOrder();
     params.srcDims = srcBlockingDesc.getBlockDims();
     params.dstDims = dstBlockingDesc.getBlockDims();
-    params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc.getPrecision().size();
+    params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc->getPrecision().size();
 
     if (params.parametersAreConstant) {
         size_t realNDims = params.dstDims.size();
-        if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isPlainFormat())
+        if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp))
             orderParametersByLayouts();
 
         SizeVector newSrcDims, newDstDims;
@@ -287,9 +289,10 @@ void MKLDNNStridedSliceNode::createPrimitive() {
 }
 
 void MKLDNNStridedSliceNode::orderParametersByLayouts() {
-    const bool isPerChannelLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isTailCFormat();
-    const bool isBlockedLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isBlockedCFormat();
-    auto srcOrder = getParentEdgeAt(DATA_ID)->getDesc().getBlockingDesc().getOrder();
+    const bool isPerChannelLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc);
+    const bool isBlockedLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) ||
+                                 getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c);
+    auto srcOrder = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType<BlockedMemoryDesc>().getOrder();
 
     if (isBlockedLayout) {
         const size_t blk = params.srcDims.back();
@@ -553,9 +556,9 @@ void MKLDNNStridedSliceNode::indicesCalculation() {
 
 void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) {
     if (!params.parametersAreConstant) {
-        auto srcDims = getParentEdgeAt(DATA_ID)->getDims();
-        auto dstDims = getChildEdgeAt(0)->getDims();
-        const size_t nDims = std::max(srcDims.ndims(), dstDims.ndims());
+        auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims();
+        auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims();
+        const size_t nDims = std::max(srcDims.size(), dstDims.size());
         const size_t ellipsisMaskCounter = std::accumulate(ellipsisMask.begin(), ellipsisMask.end(), 0);
 
         auto fillingInParameters = [&](std::vector<int> &parameter, const size_t type, const size_t size, const int value) {
@@ -574,15 +577,15 @@ void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) {
         if (strideDims.size())
             fillingInParameters(stride, STRIDE_ID, strideDims[0], 1);
 
-        if (srcDims.ndims() > 3 && params.equalDims && ellipsisMaskCounter != 0)
-            addHiddenDims(srcDims.ndims());
+        if (srcDims.size() > 3 && params.equalDims && ellipsisMaskCounter != 0)
+            addHiddenDims(srcDims.size());
 
-        if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().isPlainFormat())
+        if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp))
             orderParametersByLayouts();
 
         SizeVector newSrcDims, newDstDims;
         dimsNormalization(newSrcDims, newDstDims);
-        dimsGluing(dstDims.ndims(), newSrcDims, newDstDims);
+        dimsGluing(dstDims.size(), newSrcDims, newDstDims);
 
         if (params.dstDims.size() == 1 || params.nDimsForWork != 1)
             indicesCalculation();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
index d1d80e1b7cb..2e1a9f426ef 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
@@ -10,6 +10,7 @@
 #include <mkldnn_extension_utils.h>
 #include <ie_ngraph_utils.hpp>
 #include <utils/general_utils.h>
+#include "common/blocked_desc_creator.h"
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -17,15 +18,16 @@ using namespace InferenceEngine::details;
 
 namespace MKLDNNPlugin {
 
-static InferenceEngine::LayerConfig make_plain_config(const std::shared_ptr<ngraph::Node>& op) {
-    InferenceEngine::LayerConfig config;
+static NodeConfig make_plain_config(const std::shared_ptr<ngraph::Node>& op) {
+    NodeConfig config;
 
     for (size_t i = 0; i < op->get_input_size(); i++) {
         const auto& dims = op->get_input_shape(i);
         const auto prec = InferenceEngine::details::convertPrecision(op->get_input_element_type(i));
 
-        InferenceEngine::DataConfig data_conf {};
-        data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) };
+        PortConfig data_conf {};
+        auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp);
+        data_conf.desc = descCreator->createUniqueDesc(prec, dims);
         config.inConfs.push_back(data_conf);
     }
 
@@ -33,8 +35,9 @@ static InferenceEngine::LayerConfig make_plain_config(const std::shared_ptr<ngra
         const auto& dims = op->get_output_shape(i);
         const auto prec = InferenceEngine::details::convertPrecision(op->get_output_element_type(i));
 
-        InferenceEngine::DataConfig data_conf {};
-        data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) };
+        PortConfig data_conf {};
+        auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp);
+        data_conf.desc = descCreator->createUniqueDesc(prec, dims);
         config.outConfs.push_back(data_conf);
     }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
index 32e5eac70b2..3ba49ae9ad9 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
@@ -95,7 +95,7 @@ private:
     int loopTripCountIdx = -1;
     int loopExecutionConditionIdx = -1;
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
 
     const std::shared_ptr<ngraph::Node> ngraphOp;
 };
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp
index 663f3a376f8..c92193c6e92 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp
@@ -85,20 +85,18 @@ void MKLDNNTileNode::initSupportedPrimitiveDescriptors() {
         precision.size() != sizeof(PrecisionTrait<Precision::I8>::value_type)) {
         IE_THROW() << errorPrefix << " has unsupported input precision: " << precision;
     }
-    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
-    auto& inDims = getParentEdgeAt(0)->getDims();
-    memory::format_tag fmt = MKLDNNMemory::GetPlainFormat(inDims);
+    auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp);
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = true;
     config.inConfs.resize(2);
     config.outConfs.resize(1);
-    config.inConfs[TILE_INPUT].desc = MKLDNNMemoryDesc(getParentEdgeAt(TILE_INPUT)->getDims(), inputDataType, fmt);
-    config.inConfs[TILE_REPEATS].desc = MKLDNNMemoryDesc(getParentEdgeAt(TILE_REPEATS)->getDims(), memory::data_type::s32, memory::format_tag::x);
-    config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), inputDataType, fmt);
+    config.inConfs[TILE_INPUT].desc = descCreator->createUniqueDesc(precision, getParentEdgeAt(TILE_INPUT)->getShape().getStaticDims());
+    config.inConfs[TILE_REPEATS].desc = descCreator->createUniqueDesc(Precision::I32, getParentEdgeAt(TILE_REPEATS)->getShape().getStaticDims());
+    config.outConfs[0].desc = descCreator->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims());
     config.outConfs[0].inPlace = noTiling ? 0 : -1;
-    supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, fmt});
+    supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
 }
 
 void MKLDNNTileNode::createPrimitive() {
@@ -135,13 +133,13 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) {
         m_inner_dim *= batchToProcess();
     }
 
-    if (m_inner_dim == 1 && m_outer_dim % 8 == 0 && srcMemory.GetDesc().isBlockedCFormat(8)) {
+    if (m_inner_dim == 1 && m_outer_dim % 8 == 0 && srcMemory.GetDesc().hasLayoutType(LayoutType::nCsp8c)) {
         /*
          * We may enable tile processing directly to appropriate output format (nChw8c)
          */
         m_inner_dim *= 8;
         m_outer_dim /= 8;
-    } else if (m_inner_dim == 1 && m_outer_dim % 16 == 0 && srcMemory.GetDesc().isBlockedCFormat(16)) {
+    } else if (m_inner_dim == 1 && m_outer_dim % 16 == 0 && srcMemory.GetDesc().hasLayoutType(LayoutType::nCsp16c)) {
         /*
          * We may enable tile processing directly to appropriate output format (nChw16c)
          */
@@ -149,7 +147,7 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) {
         m_outer_dim /= 16;
     }
 
-    m_inner_dim *= srcMemory.GetDesc().GetElementSize();
+    m_inner_dim *= srcMemory.GetDesc().getPrecision().size();
     for (int i = 0; i < m_outer_dim; ++i) {
         for (int t = 0; t < tiles; ++t) {
             cpu_memcpy(dst_ptr, src_ptr, m_inner_dim);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp
index 1c78c44b48d..f3fa2e69b5f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp
@@ -84,14 +84,14 @@ void MKLDNNTopKNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    std::vector<DataConfigurator> outDataConf;
+    std::vector<PortConfigurator> outDataConf;
     outDataConf.reserve(getOriginalOutputsNumber());
-    outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::FP32);
+    outDataConf.emplace_back(LayoutType::ncsp, Precision::FP32);
     for (int i = 1; i < getOriginalOutputsNumber(); ++i)
-        outDataConf.emplace_back(TensorDescCreatorTypes::ncsp, Precision::I32);
+        outDataConf.emplace_back(LayoutType::ncsp, Precision::I32);
 
-    addSupportedPrimDesc({{TensorDescCreatorTypes::ncsp, Precision::FP32},
-                          {TensorDescCreatorTypes::ncsp, Precision::I32}},
+    addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
+                          {LayoutType::ncsp, Precision::I32}},
                          outDataConf,
                          impl_desc_type::ref_any);
 }
@@ -102,24 +102,24 @@ void MKLDNNTopKNode::execute(mkldnn::stream strm) {
     float* dst_data = nullptr;
     int* dst_idx = nullptr;
 
-    if (outDims.size() == 1) {
+    if (outputShapes.size() == 1) {
         if (getOriginalOutputPrecisionAtPort(0) == Precision::FP32) {
             dst_data = reinterpret_cast<float *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
         } else {
             dst_idx = reinterpret_cast<int *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
         }
-        SizeVector dstDims = getChildEdgesAtPort(0)[0]->getDims().ToSizeVector();
+        SizeVector dstDims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims();
 
         if (dstDims[axis] != static_cast<size_t>(src_k)) {
             std::string errorMsg = "Output tensor dimension mismatch";
             IE_THROW() << errorMsg;
         }
-    } else if (outDims.size() == 2) {
+    } else if (outputShapes.size() == 2) {
         dst_data = reinterpret_cast<float *>(getChildEdgesAtPort(TOPK_VALUE)[0]->getMemoryPtr()->GetPtr());
-        SizeVector dst_data_dims = getChildEdgesAtPort(TOPK_VALUE)[0]->getDims().ToSizeVector();
+        SizeVector dst_data_dims = getChildEdgesAtPort(TOPK_VALUE)[0]->getShape().getStaticDims();
 
         dst_idx = reinterpret_cast<int *>(getChildEdgesAtPort(TOPK_INDEX)[0]->getMemoryPtr()->GetPtr());
-        SizeVector dst_idx_dims = getChildEdgesAtPort(TOPK_INDEX)[0]->getDims().ToSizeVector();
+        SizeVector dst_idx_dims = getChildEdgesAtPort(TOPK_INDEX)[0]->getShape().getStaticDims();
 
         if (dst_idx_dims[axis] != static_cast<size_t>(src_k) || dst_data_dims[axis] != static_cast<size_t>(src_k)) {
             std::string errorMsg = "Output tensors dimension mismatch";
@@ -133,7 +133,7 @@ void MKLDNNTopKNode::execute(mkldnn::stream strm) {
     if (src_dims[axis] < static_cast<size_t>(src_k))
         src_k = src_dims[axis];
 
-    SizeVector in_dims = getParentEdgeAt(TOPK_DATA)->getDims().ToSizeVector();
+    SizeVector in_dims = getParentEdgeAt(TOPK_DATA)->getShape().getStaticDims();
 
     if (src_k == 1) {
         if (is_last_dim) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp
index 49bc1bb695d..5ea5b902e3e 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp
@@ -10,7 +10,7 @@
 #include <mkldnn_selective_build.h>
 #include "ie_parallel.hpp"
 #include "utils/bfloat16.hpp"
-
+#include <utils/general_utils.h>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -66,7 +66,7 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() {
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec);
     auto inputOrderDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1));
 
-    InferenceEngine::LayerConfig config;
+    NodeConfig config;
     config.dynBatchSupport = true;
     config.inConfs.resize(2);
     config.outConfs.resize(1);
@@ -74,53 +74,66 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() {
     config.inConfs[0].constant = false;
     config.outConfs[0].inPlace = -1;
     config.outConfs[0].constant = false;
-    config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), inputOrderDataType, memory::format_tag::x);
-    if (getParentEdgeAt(0)->getDims().ndims() == 4) {
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw);
-        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nchw});
+    config.inConfs[1].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(1)->getShape().getStaticDims(), inputOrderDataType,
+                                                                         memory::format_tag::x);
+    if (getParentEdgeAt(0)->getShape().getRank() == 4) {
+        config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType,
+                                                                             memory::format_tag::nchw);
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType,
+                                                                              memory::format_tag::nchw);
+        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
 
-        auto srcDims = getParentEdgeAt(0)->getDims();
+        auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims();
         if (srcDims[1] % 8 == 0) {
-            config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw8c);
-            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw8c});
+            config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType,
+                                                                   memory::format_tag::nChw8c);
+            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
         }
 
         if (srcDims[1] % 16 == 0) {
-            config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw16c);
-            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw16c});
+            config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType,
+                                                                   memory::format_tag::nChw16c);
+            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
         }
 
         if (prec == Precision::FP32 || prec == Precision::I8 || prec == Precision::U8) {
-            config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nhwc);
-            config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nhwc);
-            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nhwc});
+            config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType,
+                                                                   memory::format_tag::nhwc);
+            config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType,
+                                                                    memory::format_tag::nhwc);
+            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
         }
-    } else if (getParentEdgeAt(0)->getDims().ndims() == 5) {
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw);
-        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ncdhw});
+    } else if (getParentEdgeAt(0)->getShape().getRank() == 5) {
+        config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType,
+                                                               memory::format_tag::ncdhw);
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType,
+                                                                memory::format_tag::ncdhw);
+        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
 
-        auto srcDims = getParentEdgeAt(0)->getDims();
+        auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims();
         if (srcDims[1] % 8 == 0) {
-            config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw8c);
-            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw8c});
+            config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType,
+                                                                   memory::format_tag::nCdhw8c);
+            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
         }
 
         if (srcDims[1] % 16 == 0) {
-            config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw16c);
-            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw16c});
+            config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType,
+                                                                   memory::format_tag::nCdhw16c);
+            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
         }
 
         if (prec == Precision::FP32 || prec == Precision::I8 || prec == Precision::U8) {
-            config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ndhwc);
-            config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ndhwc);
-            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ndhwc});
+            config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType,
+                                                                   memory::format_tag::ndhwc);
+            config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType,
+                                                                    memory::format_tag::ndhwc);
+            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
         }
     } else {
         // general plain case
-        config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType);
-        config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType);
+        config.inConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType);
+        config.outConfs[0].desc = MKLDNNPlugin::make_unique<MKLDNNMemoryDesc>(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType);
         supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
     }
 }
@@ -135,23 +148,22 @@ void MKLDNNTransposeNode::createPrimitive() {
     if (getSelectedPrimitiveDescriptor() == nullptr)
         IE_THROW() << "Preferable primitive descriptor is not set.";
 
-    if (getParentEdgeAt(0)->getMemory().GetDesc().isPlainFormat() &&
+    if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) &&
         std::find(optimizedOrders.begin(), optimizedOrders.end(), order) != optimizedOrders.end()) {
         isOptimized = true;
         return;
     }
 
     PermuteParams params;
-    params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getPrecision().size();
+    params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size();
     params.order = order;
+    auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    params.src_block_dims = srcDesc.getBlockDims();
+    params.src_block_order = srcDesc.getOrder();
 
-    auto srcDesc = getParentEdgeAt(0)->getDesc();
-    params.src_block_dims = srcDesc.getBlockingDesc().getBlockDims();
-    params.src_block_order = srcDesc.getBlockingDesc().getOrder();
-
-    auto dstDesc = getChildEdgeAt(0)->getDesc();
-    params.dst_block_dims = dstDesc.getBlockingDesc().getBlockDims();
-    params.dst_block_order = dstDesc.getBlockingDesc().getOrder();
+    auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    params.dst_block_dims = dstDesc.getBlockDims();
+    params.dst_block_order = dstDesc.getOrder();
 
     permuteKernel = std::unique_ptr<PermuteKernel>(new PermuteKernel(params));
 }
@@ -263,7 +275,7 @@ void MKLDNNTransposeNode::execute(mkldnn::stream strm) {
     int MB = batchToProcess();
 
     if (isOptimized) {
-        const size_t dataSize = getParentEdgeAt(0)->getDesc().getPrecision().size();
+        const size_t dataSize = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size();
         TransposeContext ctx = {this, srcMemPtr, dstMemPtr, MB};
         OV_SWITCH(MKLDNNPlugin, TransposeOptimizedEmitter, ctx, dataSize,
                   OV_CASE(1, PrecisionTrait<Precision::U8>::value_type),
diff --git a/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp b/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp
index 7007c6ad00a..3aa58888b58 100644
--- a/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp
+++ b/inference-engine/src/mkldnn_plugin/normalize_preprocess.cpp
@@ -5,6 +5,7 @@
 #include "normalize_preprocess.h"
 #include "ie_parallel.hpp"
 #include "nodes/common/cpu_memcpy.h"
+#include "utils/general_utils.h"
 
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -12,7 +13,7 @@ using namespace InferenceEngine;
 NormalizePreprocess::NormalizePreprocess() : meanBuffer(nullptr) {
 }
 
-void NormalizePreprocess::Load(const MKLDNNDims& inputDims, InputInfo::Ptr inputInfo) {
+void NormalizePreprocess::Load(const Shape& inputShape, InputInfo::Ptr inputInfo) {
     PreProcessInfo &pp = inputInfo->getPreProcess();
     size_t inChannels = pp.getNumberOfChannels();
     if (inChannels == 0) {
@@ -20,7 +21,7 @@ void NormalizePreprocess::Load(const MKLDNNDims& inputDims, InputInfo::Ptr input
         return;
     }
 
-    if (inChannels != inputDims[1]) {
+    if (!dimsEqualStrong(inChannels, inputShape.getDims()[1])) {
         IE_THROW() << "channels mismatch between mean and input";
     }
 
@@ -76,10 +77,11 @@ void NormalizePreprocess::Load(const MKLDNNDims& inputDims, InputInfo::Ptr input
     }
 }
 
-void NormalizePreprocess::NormalizeImage(const MKLDNNDims &inputDims, float *input, InferenceEngine::Layout layout) {
+void NormalizePreprocess::NormalizeImage(const Shape &inputShape, float *input, InferenceEngine::Layout layout) {
     IE_ASSERT(input != nullptr);
 
-    if (inputDims.ndims() != 4) {
+    const auto inputDims = inputShape.getStaticDims();
+    if (inputDims.size() != 4) {
         IE_THROW() << "Expecting input as 4 dimension blob with format NxCxHxW.";
     }
 
@@ -88,7 +90,7 @@ void NormalizePreprocess::NormalizeImage(const MKLDNNDims &inputDims, float *inp
     }
 
     int MB = inputDims[0];
-    int srcSize = inputDims.size() / MB;
+    int srcSize = inputShape.getElementsCount() / MB;
 
     if (meanBuffer && meanBuffer->size()) {
         const float * meanBufferValues = meanBuffer->readOnly();
diff --git a/inference-engine/src/mkldnn_plugin/normalize_preprocess.h b/inference-engine/src/mkldnn_plugin/normalize_preprocess.h
index 1bc6d843195..72ba9fd27a8 100644
--- a/inference-engine/src/mkldnn_plugin/normalize_preprocess.h
+++ b/inference-engine/src/mkldnn_plugin/normalize_preprocess.h
@@ -6,7 +6,7 @@
 
 #include "ie_input_info.hpp"
 
-#include "mkldnn_dims.h"
+#include "cpu_shape.h"
 #include "ie_parallel.hpp"
 #include <vector>
 #include <limits>
@@ -18,14 +18,15 @@ public:
     NormalizePreprocess();
 
 public:
-    void Load(const MKLDNNDims& inputDims, InferenceEngine::InputInfo::Ptr inputInfo);
-    void NormalizeImage(const MKLDNNDims &inputDims, float *input, InferenceEngine::Layout layout);
+    void Load(const Shape& inputShape, InferenceEngine::InputInfo::Ptr inputInfo);
+    void NormalizeImage(const Shape &inputShape, float *input, InferenceEngine::Layout layout);
 
     template<typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
-    void NormalizeImage(const MKLDNNDims &inputDims, T *input, InferenceEngine::Layout layout) {
+    void NormalizeImage(const Shape &inputShape, T *input, InferenceEngine::Layout layout) {
         IE_ASSERT(input != nullptr);
 
-        if (inputDims.ndims() != 4) {
+        const auto inputDims = inputShape.getStaticDims();
+        if (inputDims.size() != 4) {
             IE_THROW() << "Expecting input as 4 dimension blob with format NxCxHxW.";
         }
 
@@ -34,7 +35,7 @@ public:
         }
 
         int MB = inputDims[0];
-        int srcSize = inputDims.size() / MB;
+        int srcSize = inputShape.getElementsCount() / MB;
 
         if (meanBuffer && meanBuffer->size()) {
             const float * meanBufferValues = meanBuffer->readOnly();
diff --git a/inference-engine/src/mkldnn_plugin/perf_count.h b/inference-engine/src/mkldnn_plugin/perf_count.h
index 3fce79b5e68..0f230c4c76f 100644
--- a/inference-engine/src/mkldnn_plugin/perf_count.h
+++ b/inference-engine/src/mkldnn_plugin/perf_count.h
@@ -46,4 +46,5 @@ public:
 
 }  // namespace MKLDNNPlugin
 
-#define PERF(_counter) PerfHelper __helper##__counter (_counter->PerfCounter());
+#define GET_PERF(_counter) std::unique_ptr<PerfHelper>(new PerfHelper(_counter->PerfCounter()))
+#define PERF(_need, _counter) auto pc = _need ? GET_PERF(_counter) : nullptr;
diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
index 17b13034f7f..1272183c68b 100644
--- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
@@ -5,10 +5,13 @@
 #include "blob_dump.h"
 #include "blob_factory.hpp"
 #include "mkldnn_memory.h"
+#include "mkldnn_extension_utils.h"
+#include <nodes/common/cpu_memcpy.h>
 
 #include "common/memory_desc_wrapper.hpp"
 
 #include <fstream>
+#include <cpu_memory_desc_utils.h>
 
 using namespace InferenceEngine;
 
@@ -35,7 +38,7 @@ struct IEB_HEADER {
     unsigned long scaling_data_size;
 };
 
-static IEB_HEADER prepare_header(const TensorDesc& desc) {
+static IEB_HEADER prepare_header(const MemoryDesc& desc) {
     IEB_HEADER header = {};
 
     header.magic[0] = IEB_MAGIC[0];
@@ -49,19 +52,20 @@ static IEB_HEADER prepare_header(const TensorDesc& desc) {
 
     header.precision = desc.getPrecision();
 
-    if (desc.getDims().size() > 7)
+    if (desc.getShape().getRank() > 7)
         IE_THROW() << "Dumper support max 7D blobs";
 
-    header.ndims = desc.getDims().size();
+    header.ndims = desc.getShape().getRank();
+    const auto &dims = desc.getShape().getStaticDims();
     for (int i = 0; i < header.ndims; i++)
-        header.dims[i] = desc.getDims()[i];
+        header.dims[i] = dims[i];
 
     header.scaling_axis = NO_SCALES;
 
     return header;
 }
 
-static TensorDesc parse_header(IEB_HEADER &header) {
+static MKLDNNMemoryDesc parse_header(IEB_HEADER &header) {
     if (header.magic[0] != IEB_MAGIC[0] ||
         header.magic[1] != IEB_MAGIC[1] ||
         header.magic[2] != IEB_MAGIC[2] ||
@@ -72,175 +76,126 @@ static TensorDesc parse_header(IEB_HEADER &header) {
         header.ver[1] != 1)
         IE_THROW() << "Dumper cannot parse file. Unsupported IEB format version.";
 
-    Precision prc = Precision(static_cast<Precision::ePrecision>(header.precision));
+    const auto prc = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision(static_cast<Precision::ePrecision>(header.precision)));
     SizeVector dims(header.ndims);
     for (int i = 0; i < header.ndims; i++)
         dims[i] = header.dims[i];
 
-    return TensorDesc {prc, dims, TensorDesc::getLayoutByDims(dims) };
+    return MKLDNNMemoryDesc{dims, prc, MKLDNNMemory::GetPlainFormatByRank(dims.size()) };
 }
 
+void BlobDumper::prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector<uint8_t> &data) const {
+    const auto &desc = memory->GetDesc();
+    size_t data_size = desc.getShape().getElementsCount();
+    const auto size = data_size * desc.getPrecision().size();
+    data.resize(size);
 
-bool is_plain(const Blob::Ptr &blob) {
-    bool res = true;
-
-    auto orig_strides = blob->getTensorDesc().getBlockingDesc().getStrides();
-    auto orig_order = blob->getTensorDesc().getBlockingDesc().getOrder();
-    auto dims = blob->getTensorDesc().getDims();
-
-    for (int stride = 1, i = dims.size() - 1; i >= 0; --i) {
-        if (stride != orig_strides[i] || i != orig_order[i]) res = false;
-        stride *= dims[i];
+    // check if it already plain
+    if (desc.hasLayoutType(LayoutType::ncsp)) {
+        cpu_memcpy(data.data(), reinterpret_cast<const uint8_t*>(memory->GetPtr()), size);
+        return;
     }
 
-    return res;
-}
-
-static Blob::Ptr prepare_plain_data(Blob::Ptr blob) {
-    // check if it already plain
-    if (is_plain(blob)) return blob;
-
-    Blob::Ptr pln_blob = make_plain_blob(blob->getTensorDesc().getPrecision(), blob->getTensorDesc().getDims());
-    pln_blob->allocate();
-
     // Copy to plain
-    MKLDNNMemoryDesc mdesc(blob->getTensorDesc());
-    mkldnn::memory::desc desc = mdesc;
-    mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data);
+    const void *ptr = memory->GetData();
 
-    size_t data_size = blob->size();
-
-    // TODO: make it with blob_copy utility
-    switch (blob->getTensorDesc().getPrecision()) {
+    switch (desc.getPrecision()) {
         case Precision::FP32:
         case Precision::I32: {
-            auto *pln_blob_ptr = pln_blob->buffer().as<int32_t*>();
-            auto *blob_ptr = blob->buffer().as<int32_t*>();
+            auto *pln_blob_ptr = reinterpret_cast<int32_t *>(data.data());
+            auto *blob_ptr = reinterpret_cast<const int32_t *>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
+                pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)];
             break;
         }
-        case Precision::I16:
-        case Precision::U16:
         case Precision::BF16: {
-            auto *pln_blob_ptr = pln_blob->buffer().as<int16_t *>();
-            auto *blob_ptr = blob->buffer().as<int16_t *>();
-            for (size_t i = 0; i < data_size; i++) pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
+            auto *pln_blob_ptr = reinterpret_cast<int16_t *>(data.data());
+            auto *blob_ptr = reinterpret_cast<const int16_t *>(ptr);
+            for (size_t i = 0; i < data_size; i++)
+                pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)];
             break;
         }
         case Precision::I8:
         case Precision::U8: {
-            auto *pln_blob_ptr = pln_blob->buffer().as<int8_t*>();
-            auto *blob_ptr = blob->buffer().as<int8_t *>();
+            auto *pln_blob_ptr = reinterpret_cast<int8_t*>(data.data());
+            auto *blob_ptr = reinterpret_cast<const int8_t *>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                pln_blob_ptr[i] = blob_ptr[blob_wrp.off_l(i)];
+                pln_blob_ptr[i] = blob_ptr[desc.getElementOffset(i)];
             break;
         }
         default:
             IE_THROW() << "Dumper. Unsupported precision";
     }
-
-    return pln_blob;
 }
 
 void BlobDumper::dump(std::ostream &stream) const {
-    if (!_blob)
-        IE_THROW() << "Dumper cannot dump empty Blob";
+    if (memory == nullptr)
+        IE_THROW() << "Dumper cannot dump. Memory is not allocated.";
 
-    if (_blob->buffer().as<float*>() == nullptr)
-        IE_THROW() << "Dumper cannot dump. Blob is not allocated.";
-
-    IEB_HEADER header = prepare_header(_blob->getTensorDesc());
-    Blob::Ptr pln_blob = prepare_plain_data(_blob);
+    IEB_HEADER header = prepare_header(memory->GetDesc());
+    std::vector<uint8_t> data;
+    prepare_plain_data(this->memory, data);
 
     header.data_offset = sizeof(header);
-    header.data_size = pln_blob->byteSize();
+    header.data_size = data.size();
     header.scaling_data_offset = 0;
     header.scaling_data_size = 0;
 
-    if (_scales) {
-        header.scaling_axis = 1;
-        header.scaling_data_offset = header.data_offset + header.data_size;
-        header.scaling_data_size = _scales->byteSize();
-    }
-
-    stream.write(reinterpret_cast<char*>(&header), sizeof(header));
-    stream.write(pln_blob->buffer().as<char*>(), pln_blob->byteSize());
-
-    if (_scales) {
-        stream.write(_scales->buffer().as<char*>(), _scales->byteSize());
-    }
+    stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
+    stream.write(reinterpret_cast<char*>(data.data()), data.size());
 }
 
 void BlobDumper::dumpAsTxt(std::ostream &stream) const {
-    if (!_blob)
-        IE_THROW() << "Dumper cannot dump empty Blob";
+    if (memory == nullptr)
+        IE_THROW() << "Dumper cannot dump. Memory is not allocated.";
 
-    if (_blob->buffer().as<float*>() == nullptr)
-        IE_THROW() << "Dumper cannot dump. Blob is not allocated.";
-
-    SizeVector dims = _blob->getTensorDesc().getDims();
+    const auto dims = memory->GetDims();
+    const auto &desc = memory->GetDesc();
+    size_t data_size = desc.getShape().getElementsCount();
 
     // Header like "U8 4D shape: 2 3 224 224 ()
-    stream << _blob->getTensorDesc().getPrecision().name() << " "
+    stream << memory->GetDesc().getPrecision().name() << " "
            << dims.size() << "D "
            << "shape: ";
     for (size_t d : dims) stream << d << " ";
-    stream << "(" << _blob->size() << ")" <<
-    " by address 0x" << std::hex << _blob->buffer().as<long long>() << std::dec <<std::endl;
+    stream << "(" << data_size << ")" <<
+    " by address 0x" << std::hex << reinterpret_cast<const long long *>(memory->GetData()) << std::dec <<std::endl;
 
-    // Dump data
-    MKLDNNMemoryDesc mdesc(_blob->getTensorDesc());
-    mkldnn::memory::desc desc = mdesc;
-    mkldnn::impl::memory_desc_wrapper blob_wrp(desc.data);
+    const void *ptr = memory->GetData();
 
-    size_t data_size = _blob->size();
-    switch (_blob->getTensorDesc().getPrecision()) {
-        case Precision::FP32: {
-            auto *blob_ptr = _blob->buffer().as<float*>();
+    switch (desc.getPrecision()) {
+        case Precision::FP32 : {
+            auto *blob_ptr = reinterpret_cast<const float*>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                stream << blob_ptr[blob_wrp.off_l(i)] << std::endl;
+                stream << blob_ptr[desc.getElementOffset(i)] << std::endl;
             break;
         }
-        case Precision::BF16:
-        {
-            auto *blob_ptr = _blob->buffer().as<int16_t *>();
+        case Precision::BF16: {
+            auto *blob_ptr = reinterpret_cast<const int16_t*>(ptr);
             for (size_t i = 0; i < data_size; i++) {
-                int i16n = blob_ptr[blob_wrp.off_l(i)];
+                int i16n = blob_ptr[desc.getElementOffset(i)];
                 i16n = i16n << 16;
-                float fn = *(reinterpret_cast<float *>(&i16n));
+                float fn = *(reinterpret_cast<const float *>(&i16n));
                 stream << fn << std::endl;
             }
             break;
         }
         case Precision::I32: {
-            auto *blob_ptr = _blob->buffer().as<int32_t*>();
+            auto *blob_ptr = reinterpret_cast<const int32_t*>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                stream << blob_ptr[blob_wrp.off_l(i)] << std::endl;
-            break;
-        }
-        case Precision::I16: {
-            auto *blob_ptr = _blob->buffer().as<int16_t*>();
-            for (size_t i = 0; i < data_size; i++)
-                stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
-            break;
-        }
-        case Precision::U16: {
-            auto *blob_ptr = _blob->buffer().as<uint16_t*>();
-            for (size_t i = 0; i < data_size; i++)
-                stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
+                stream << blob_ptr[desc.getElementOffset(i)] << std::endl;
             break;
         }
         case Precision::I8: {
-            auto *blob_ptr = _blob->buffer().as<int8_t*>();
+            auto *blob_ptr = reinterpret_cast<const int8_t*>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
+                stream << static_cast<int>(blob_ptr[desc.getElementOffset(i)]) << std::endl;
             break;
         }
         case Precision::U8: {
-            auto *blob_ptr = _blob->buffer().as<uint8_t*>();
+            auto *blob_ptr = reinterpret_cast<const uint8_t*>(ptr);
             for (size_t i = 0; i < data_size; i++)
-                stream << static_cast<int>(blob_ptr[blob_wrp.off_l(i)]) << std::endl;
+                stream << static_cast<int>(blob_ptr[desc.getElementOffset(i)]) << std::endl;
             break;
         }
         default:
@@ -252,29 +207,12 @@ BlobDumper BlobDumper::read(std::istream &stream) {
     IEB_HEADER header;
     stream.read(reinterpret_cast<char*>(&header), sizeof(header));
 
-    TensorDesc desc = parse_header(header);
-    Blob::Ptr blob = make_blob_with_precision(desc);
-    blob->allocate();
+    const auto desc = parse_header(header);
 
+    BlobDumper res(desc);
     stream.seekg(header.data_offset, stream.beg);
-    stream.read(blob->buffer().as<char*>(), header.data_size);
+    stream.read(reinterpret_cast<char *>(res.getDataPtr()), header.data_size);
 
-    BlobDumper res(blob);
-
-    // Parse scales fields.
-    if (header.scaling_axis != NO_SCALES) {
-        if (header.scaling_axis != 1)
-            IE_THROW() << "Dumper support scaling only for channel dims.";
-
-        size_t scl_size = header.scaling_data_size / sizeof(float);
-        auto scl = make_blob_with_precision({Precision::FP32, {scl_size}, C});
-        scl->allocate();
-
-        stream.seekg(header.scaling_data_offset, stream.beg);
-        stream.read(scl->buffer().as<char*>(), header.scaling_data_size);
-
-        res._scales = scl;
-    }
     return res;
 }
 
@@ -309,73 +247,4 @@ void BlobDumper::dumpAsTxt(const std::string& dump_path) const {
     dump_file.close();
 }
 
-Blob::Ptr BlobDumper::get() {
-    return _blob;
-}
-
-template <typename data_t>
-static void plain_copy(const Blob::Ptr &from, const Blob::Ptr &scls, Blob::Ptr &to) {
-    auto dims = from->getTensorDesc().getDims();
-
-    size_t data_size = from->size();
-    size_t outer_size = dims[0];
-    size_t c_size = dims.size() > 1 ? dims[1] : 1;
-    size_t inner_size = dims.size() == 4 ? dims[2]*dims[3] :
-                        dims.size() == 3 ? dims[2] : 1;
-
-    auto to_data  = to->buffer().as<float*>();
-    auto from_data = from->buffer().as<data_t*>();
-
-    if (scls) {
-        auto scls_data = scls->buffer().as<float*>();
-
-        for (size_t o=0; o < outer_size; o++)
-        for (size_t c=0; c < c_size; c++)
-        for (size_t i=0; i < inner_size; i++)
-            *to_data++ = static_cast<float>(*from_data++) * scls_data[c];
-    } else {
-        for (size_t i=0; i < data_size; i++)
-            *to_data++ = static_cast<float>(*from_data++);
-    }
-}
-
-Blob::Ptr BlobDumper::getRealValue() {
-    if (_blob->getTensorDesc().getPrecision() == Precision::FP32 && !_scales)
-        return _blob;
-
-    auto res = make_plain_blob(Precision::FP32, _blob->getTensorDesc().getDims());
-    res->allocate();
-
-    switch (_blob->getTensorDesc().getPrecision()) {
-        case Precision::U8: plain_copy<uint8_t>(_blob, _scales, res); break;
-        case Precision::FP32: plain_copy<float>(_blob, _scales, res); break;
-        case Precision::I8: plain_copy<int8_t >(_blob, _scales, res); break;
-        default: IE_THROW() << "Unsupported precesion for getRealValue method.";
-    }
-
-    return res;
-}
-
-
-BlobDumper& BlobDumper::withScales(InferenceEngine::Blob::Ptr scales) {
-    if ( _blob->getTensorDesc().getDims().size() < 2  ||
-        scales->getTensorDesc().getDims().size() != 1 ||
-        scales->getTensorDesc().getDims()[0] != _blob->getTensorDesc().getDims()[1] ||
-        scales->getTensorDesc().getPrecision() != Precision::FP32)
-        IE_THROW() << "Dumper cannot use passed scales. Blob has incompatible shape.";
-
-    _scales = scales;
-    return *this;
-}
-
-BlobDumper& BlobDumper::withoutScales() {
-    _scales.reset();
-    return *this;
-}
-
-
-const InferenceEngine::Blob::Ptr& BlobDumper::getScales() const {
-    return _scales;
-}
-
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
index c2cc793e421..5271f351d6b 100644
--- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
+++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-#include "ie_blob.h"
+#include "mkldnn_memory.h"
 
 #include <string>
 
@@ -19,15 +19,21 @@ namespace MKLDNNPlugin {
  * NB! Channel is a second dimension for all blob types.
  */
 class BlobDumper {
-    InferenceEngine::Blob::Ptr _blob;
-    InferenceEngine::Blob::Ptr _scales;
+    MKLDNNMemoryPtr memory;
+
+    void prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector<uint8_t> &data) const;
 
 public:
     BlobDumper() = default;
+    BlobDumper(const MKLDNNMemoryDesc &desc) {
+        mkldnn::engine eng(mkldnn::engine::kind::cpu, 0);
+        memory = std::make_shared<MKLDNNMemory>(eng);
+        memory->Create(desc);
+    }
     BlobDumper(const BlobDumper&) = default;
     BlobDumper& operator = (BlobDumper&&) = default;
 
-    explicit BlobDumper(const InferenceEngine::Blob::Ptr blob):_blob(blob) {}
+    explicit BlobDumper(const MKLDNNMemoryPtr &_memory) : memory(_memory) {}
 
     static BlobDumper read(const std::string &file_path);
     static BlobDumper read(std::istream &stream);
@@ -38,13 +44,9 @@ public:
     void dumpAsTxt(const std::string &file_path) const;
     void dumpAsTxt(std::ostream &stream) const;
 
-    BlobDumper& withScales(InferenceEngine::Blob::Ptr scales);
-    BlobDumper& withoutScales();
-
-    const InferenceEngine::Blob::Ptr& getScales() const;
-
-    InferenceEngine::Blob::Ptr get();
-    InferenceEngine::Blob::Ptr getRealValue();
+    void *getDataPtr() const {
+        return memory->GetPtr();
+    }
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp
index 0754e346a6e..0cd3975c39a 100644
--- a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp
+++ b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp
@@ -90,5 +90,4 @@ inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine:
     }
     return precision;
 }
-
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/utils/general_utils.h b/inference-engine/src/mkldnn_plugin/utils/general_utils.h
index 952bf43dbf5..35640212a55 100644
--- a/inference-engine/src/mkldnn_plugin/utils/general_utils.h
+++ b/inference-engine/src/mkldnn_plugin/utils/general_utils.h
@@ -6,6 +6,7 @@
 
 #include <cassert>
 #include <inference_engine.hpp>
+#include "cpu_shape.h"
 
 namespace MKLDNNPlugin {
 
@@ -40,6 +41,11 @@ constexpr inline bool implication(bool cause, bool cond) {
     return !cause || !!cond;
 }
 
+template<typename T, typename... Args>
+std::unique_ptr<T> make_unique(Args&&... args) {
+    return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+
 inline std::string getExceptionDescWithoutStatus(const InferenceEngine::Exception& ex) {
     std::string desc = ex.what();
     IE_SUPPRESS_DEPRECATED_START
@@ -70,4 +76,62 @@ std::string vec2str(const std::vector<T> &vec) {
     return std::string("()");
 }
 
+/**
+ * @brief Compares that two dims are equal and defined
+ * @param lhs
+ * first dim
+ * @param rhs
+ * second dim
+ * @return result of comparison
+ */
+inline bool dimsEqualStrong(size_t lhs, size_t rhs) {
+    return (lhs == rhs && lhs != Shape::UNDEFINED_DIM && rhs != Shape::UNDEFINED_DIM);
+}
+
+/**
+ * @brief Compares that two dims are equal or undefined
+ * @param lhs
+ * first dim
+ * @param rhs
+ * second dim
+ * @return result of comparison
+ */
+inline bool dimsEqualWeak(size_t lhs, size_t rhs) {
+    return (lhs == Shape::UNDEFINED_DIM || rhs == Shape::UNDEFINED_DIM || lhs == rhs);
+}
+
+/**
+ * @brief Compares that two shapes are equal or undefined
+ * @param lhs
+ * first shape
+ * @param rhs
+ * second shape
+ * @param skipAxis
+ * marks shape axis which shouldn't be validated
+ * @return order
+ */
+inline bool dimsEqualWeak(const std::vector<size_t>& lhs, const std::vector<size_t>& rhs, size_t skipAxis = Shape::UNDEFINED_DIM) {
+    if (lhs.size() != rhs.size())
+        return false;
+
+    for (size_t i = 0; i < lhs.size(); i++) {
+        if (i != skipAxis && !dimsEqualWeak(lhs[i], rhs[i]))
+            return false;
+    }
+
+    return true;
+}
+
+inline InferenceEngine::Precision getMaxPrecision(std::vector<InferenceEngine::Precision> precisions) {
+    if (!precisions.empty()) {
+        std::sort(precisions.begin(), precisions.end(),
+                  [](const InferenceEngine::Precision &lhs, const InferenceEngine::Precision &rhs) {
+                      return lhs.size() > rhs.size();
+                  });
+        return precisions[0];
+    }
+
+    return InferenceEngine::Precision::UNSPECIFIED;
+}
+
 }  // namespace MKLDNNPlugin
\ No newline at end of file
diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
index 1cfbae1ab5f..2e0b06c0e4d 100644
--- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp
@@ -9,6 +9,7 @@
 #include "ie_common.h"
 #include "utils/blob_dump.h"
 #include "utils/debug_capabilities.h"
+#include "cpu_memory_desc_utils.h"
 
 #include <array>
 #include <regex>
@@ -65,14 +66,11 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const {
         auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name;
         std::cout << "Dump inputs: " << dump_file << std::endl;
 
-        TensorDesc desc = prEdge->getDesc();
+        auto& desc = prEdge->getMemory().GetDesc();
         if (desc.getPrecision() == Precision::BIN)
             continue;
 
-        BlobDumper dumper(prEdge->getBlob());
-        if (pr->ext_scales)
-            dumper.withScales(pr->ext_scales);
-
+        BlobDumper dumper(prEdge->getMemoryPtr());
         dump(dumper, dump_file);
     }
 
@@ -101,14 +99,11 @@ void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const {
         auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name;
         std::cout << "Dump outputs:  " << dump_file << std::endl;
 
-        TensorDesc desc = childEdge->getDesc();
+        auto& desc = childEdge->getMemory().GetDesc();
         if (desc.getPrecision() == Precision::BIN)
             continue;
 
-        BlobDumper dumper(childEdge->getBlob());
-        if (node->ext_scales)
-            dumper.withScales(node->ext_scales);
-
+        BlobDumper dumper(childEdge->getMemoryPtr());
         dump(dumper, dump_file);
     }
 }
@@ -126,7 +121,9 @@ void NodeDumper::dumpInternalBlobs(const MKLDNNNodePtr& node) const {
         if (desc.getPrecision() == Precision::BIN)
             continue;
 
-        BlobDumper dumper(blb);
+        MKLDNNMemoryPtr memory = std::make_shared<MKLDNNMemory>(node->getEngine());
+        memory->Create(MemoryDescUtils::convertToMKLDNNMemoryDesc(desc), blb->buffer());
+        BlobDumper dumper(memory);
         dump(dumper, dump_file);
     }
 }
diff --git a/inference-engine/src/offline_transformations/include/disable_shapeof_constant_folding.hpp b/inference-engine/src/offline_transformations/include/disable_shapeof_constant_folding.hpp
new file mode 100644
index 00000000000..678b41af0ef
--- /dev/null
+++ b/inference-engine/src/offline_transformations/include/disable_shapeof_constant_folding.hpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class DisableShapeOfConstantFolding;
+
+}  // namespace pass
+}  // namespace ngraph
+
+
+class ngraph::pass::DisableShapeOfConstantFolding: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    DisableShapeOfConstantFolding();
+};
diff --git a/inference-engine/src/offline_transformations/src/disable_shapeof_constant_folding.cpp b/inference-engine/src/offline_transformations/src/disable_shapeof_constant_folding.cpp
new file mode 100644
index 00000000000..456ba721647
--- /dev/null
+++ b/inference-engine/src/offline_transformations/src/disable_shapeof_constant_folding.cpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <ostream>
+#include <fstream>
+
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/opsets/opset2.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/variant.hpp>
+#include <transformations/rt_info/disable_constant_folding.hpp>
+
+#include "disable_shapeof_constant_folding.hpp"
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::DisableShapeOfConstantFolding, "DisableShapeOfConstantFolding", 0);
+
+ngraph::pass::DisableShapeOfConstantFolding::DisableShapeOfConstantFolding() {
+    auto shape_of = pattern::wrap_type<opset2::ShapeOf, opset3::ShapeOf>([=](const Output<Node> & output) {
+        const auto & shape = output.get_partial_shape();
+        return shape.is_dynamic() || shape_size(shape.get_shape()) != 1;
+    });
+
+    ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        disable_constant_folding(m.get_match_root());
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(shape_of, "DisableShapeOfConstantFolding");
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/offline_transformations/src/moc_transformations.cpp b/inference-engine/src/offline_transformations/src/moc_transformations.cpp
index 0b7d66f3743..1a23f72e607 100644
--- a/inference-engine/src/offline_transformations/src/moc_transformations.cpp
+++ b/inference-engine/src/offline_transformations/src/moc_transformations.cpp
@@ -5,8 +5,10 @@
 #include <memory>
 
 #include "moc_transformations.hpp"
+#include "disable_shapeof_constant_folding.hpp"
 
 #include <ngraph/pass/manager.hpp>
+#include <ngraph/pass/constant_folding.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/common_optimizations/gelu_fusion.hpp>
 #include <transformations/common_optimizations/softplus_fusion.hpp>
@@ -18,6 +20,21 @@
 #include <transformations/common_optimizations/convert_quantize_dequantize.hpp>
 #include <transformations/common_optimizations/pad_fusion.hpp>
 #include <transformations/common_optimizations/simplify_shape_of_sub_graph.hpp>
+#include <transformations/op_conversions/convert_scatter_elements_to_scatter.hpp>
+#include <transformations/common_optimizations/clamp_fusion.hpp>
+#include <transformations/common_optimizations/mvn_fusion.hpp>
+#include <transformations/common_optimizations/dilated_convolution_converter.hpp>
+#include <transformations/common_optimizations/binarize_weights.hpp>
+#include <transformations/common_optimizations/conv_to_binary_conv.hpp>
+#include <transformations/common_optimizations/eliminate_unsqueeze_gather.hpp>
+#include <transformations/common_optimizations/split_squeeze_concat_fusion.hpp>
+#include <transformations/common_optimizations/transpose_sinking.hpp>
+#include <transformations/common_optimizations/broadcast_elementwise_fusion.hpp>
+#include <transformations/op_conversions/batch_norm_decomposition.hpp>
+#include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
+#include <transformations/common_optimizations/conv_mul_fusion.hpp>
+#include <transformations/common_optimizations/nop_elimination.hpp>
+#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
 
 NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
 
@@ -34,20 +51,56 @@ bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::F
     ngraph::pass::Manager manager(get_pass_config());
 
     manager.register_pass<ngraph::pass::InitNodeInfo>();
+    manager.register_pass<ngraph::pass::DisableConvertConstantFoldingOnConstPath>(
+            element::TypeVector{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 });
+    manager.register_pass<ngraph::pass::DisableShapeOfConstantFolding>();
+    manager.register_pass<ngraph::pass::ConstantFolding>();
     manager.register_pass<ngraph::pass::RemoveFilteringBoxesBySize>();
     manager.register_pass<ngraph::pass::ConvertQuantizeDequantize>();
     manager.register_pass<ngraph::pass::SimplifyShapeOfSubGraph>();
 
+    auto transpose_sinking = manager.register_pass<ngraph::pass::GraphRewrite>();
+    transpose_sinking->add_matcher<ngraph::pass::TransposeSinking>();
+    // SplitSqueezeConcatFusion should work in same GraphRewrite as TransposesSinking,
+    // because it replaces pattern that may contain Transposes which must be optimized before
+    // the transformation and it also inserts Transpose that can be optimized by TransposeSinking
+    transpose_sinking->add_matcher<ngraph::pass::SplitSqueezeConcatFusion>();
+
+    auto eliminations = manager.register_pass<ngraph::pass::GraphRewrite>();
+    eliminations->add_matcher<ngraph::pass::EliminateUnsqueezeGather>();
+    eliminations->add_matcher<ngraph::pass::NopElimination>(false /* do not use shape for elimination */);
+    eliminations->set_name("ngraph::pass::CommonEliminations");
+
     auto common_fusions = manager.register_pass<ngraph::pass::GraphRewrite>();
+    common_fusions->add_matcher<ngraph::pass::ConvertScatterElementsToScatter>();
+    common_fusions->add_matcher<ngraph::pass::BroadcastElementwiseFusion>();
     common_fusions->add_matcher<ngraph::pass::SoftPlusFusion>();
     common_fusions->add_matcher<ngraph::pass::SoftPlusToMishFusion>();
     common_fusions->add_matcher<ngraph::pass::SwishFusion>();
     common_fusions->add_matcher<ngraph::pass::HSwishFusion>();
     common_fusions->add_matcher<ngraph::pass::HSigmoidFusion>();
+    common_fusions->add_matcher<ngraph::pass::ClampFusion>();
     common_fusions->add_matcher<ngraph::pass::PadFusion>();
+    common_fusions->add_matcher<ngraph::pass::MVNFusion>();
+    common_fusions->add_matcher<ngraph::pass::DilatedConvolutionConverter>();
     common_fusions->add_matcher<ngraph::pass::GeluFusion>();
     common_fusions->set_name("ngraph::pass::CommonFusions");
 
+    manager.register_pass<ngraph::pass::BinarizeWeights>();
+    manager.register_pass<ngraph::pass::ConvToBinaryConv>();
+
+    auto decomp = manager.register_pass<ngraph::pass::GraphRewrite>();
+    decomp->add_matcher<ngraph::pass::BatchNormDecomposition>();
+
+    manager.register_pass<ngraph::pass::LinOpSequenceFusion>();
+
+    auto conv_fusions = manager.register_pass<ngraph::pass::GraphRewrite>();
+    conv_fusions->add_matcher<ngraph::pass::ConvolutionMultiplyFusion>();
+    conv_fusions->add_matcher<ngraph::pass::GroupConvolutionMultiplyFusion>();
+    conv_fusions->add_matcher<ngraph::pass::ConvolutionBackpropDataMultiplyFusion>();
+    conv_fusions->add_matcher<ngraph::pass::GroupConvolutionBackpropDataMultiplyFusion>();
+    conv_fusions->set_name("ngraph::pass::ConvFusions");
+
     manager.run_passes(f);
 
     // Restore original shapes to the nGraph Function
diff --git a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
index 271b200f31b..e944ffff57b 100644
--- a/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
+++ b/inference-engine/src/offline_transformations/src/pruning/propagate_masks.cpp
@@ -19,12 +19,12 @@ namespace mask_propagation {
 
 class Convolution;
 class GroupConvolution;
+class GroupConvolutionReshape;
 class Elementwise;
 class PassThrough;
 class StopPropagation;
 class FakeQuantize;
 class Concat;
-class Reshape;
 
 } // namespace mask_propagation
 } // namespace pass
@@ -192,9 +192,9 @@ public:
     }
 };
 
-class ngraph::pass::mask_propagation::Reshape : public MatcherPass {
+class ngraph::pass::mask_propagation::GroupConvolutionReshape : public MatcherPass {
 public:
-    Reshape() {
+    GroupConvolutionReshape() {
         auto input = pattern::any_input(pattern::has_static_shape());
         auto shape = pattern::any_input();
         // Working only for Reshapes on Group Convolution weights
@@ -258,10 +258,12 @@ public:
             ngraph::replace_node(old_shape_const, new_const);
 
             setMask(m_output, output_mask);
-            return true;
+            // This transformation propagates only Reshape mask and doesn't do anything with GroupConvolution.
+            // So, not to disable GroupConvolution mask propagation we return false here.
+            return false;
         };
 
-        auto m = std::make_shared<ngraph::pattern::Matcher>(reshape, "ReshapeMaskPropagation");
+        auto m = std::make_shared<ngraph::pattern::Matcher>(gconv, "ReshapeMaskPropagation");
         register_matcher(m, callback);
     }
 };
@@ -419,13 +421,12 @@ public:
             auto fq_node = std::dynamic_pointer_cast<op::FakeQuantize>(m_output.get_node_shared_ptr());
             size_t idx = 0;
             if (fq_node->get_auto_broadcast() != ngraph::op::AutoBroadcastType::NONE) {
-                for (auto const_node : fq_params_nodes) {
+                for (auto node : fq_params_nodes) {
+                    auto const_node = std::dynamic_pointer_cast<op::Constant>(node);
+                    if (!const_node) throw ngraph_error("Unexpected operation type.");
                     auto new_shape = broadcast_shape_to_rank(const_node->get_shape(),
                                                              m_input.get_partial_shape().rank().get_length());
-                    auto const_copy = const_node->clone_with_new_inputs(const_node->input_values());
-                    auto new_const = std::dynamic_pointer_cast<op::Constant>(const_copy);
-                    new_const->set_data_shape(new_shape);
-                    new_const->validate_and_infer_types();
+                    auto new_const = std::make_shared<op::Constant>(*const_node, new_shape);
                     new_const->set_friendly_name(const_node->get_friendly_name());
                     ngraph::copy_runtime_info(const_node, new_const);
                     ngraph::replace_node(const_node, new_const);
@@ -605,11 +606,11 @@ public:
 
 ngraph::pass::PropagateMasks::PropagateMasks() {
     add_matcher<mask_propagation::Convolution>();
+    add_matcher<mask_propagation::GroupConvolutionReshape>();
     add_matcher<mask_propagation::GroupConvolution>();
     add_matcher<mask_propagation::Elementwise>();
     add_matcher<mask_propagation::PassThrough>();
     add_matcher<mask_propagation::FakeQuantize>();
     add_matcher<mask_propagation::Concat>();
-    add_matcher<mask_propagation::Reshape>();
     add_matcher<mask_propagation::StopPropagation>();
 }
diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp
index ffefeed06f0..3af55071aa9 100644
--- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp
+++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp
@@ -349,7 +349,7 @@ template <typename typelist, typename default_t, typename pred_t, typename type_
 namespace {
 struct cv_type_id {
     template <typename type>
-    const int operator()(type_to_type<type>) { return cv_type_to_depth<type>::depth; }
+    int operator()(type_to_type<type>) { return cv_type_to_depth<type>::depth; }
 };
 
 }  // namespace
diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
index cc00ec77114..5faf7bc37c4 100644
--- a/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
+++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.cpp
@@ -716,9 +716,9 @@ V10Parser::V10Parser::GenericLayerParams XmlDeserializer::parseGenericParams(
             int64_t dim = 0;
             const pugi::char_t* dimVal = node.child_value();
             std::stringstream ss(dimVal);
-            if (!(ss >> dim) || dim < 0) {
+            if (!(ss >> dim) || dim < -1) {
                 IE_THROW() << "dimension (" << dimVal << ") in node " << node.name()
-                                   << " must be a non-negative integer: at offset "
+                                   << " must be greater or equal to -1: at offset "
                                    << node.offset_debug();
             }
             port.dims.push_back(dim);
@@ -855,7 +855,7 @@ std::shared_ptr<ngraph::Node> XmlDeserializer::createNode(
 
         size_t index{0};
         for (const auto & output_params : params.outputPorts) {
-            ngraphNode->set_output_type(index, output_params.precision, ngraph::Shape(output_params.dims));
+            ngraphNode->set_output_type(index, output_params.precision, ngraph::PartialShape(output_params.dims));
             ++index;
         }
     }
diff --git a/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp b/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp
index 540f8454887..15ac63f531e 100644
--- a/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp
+++ b/inference-engine/src/readers/ir_reader/ie_ir_parser.hpp
@@ -67,7 +67,7 @@ public:
     struct GenericLayerParams {
         struct LayerPortData {
             size_t portId;
-            SizeVector dims;
+            std::vector<ngraph::Dimension> dims;
             ngraph::element::Type_t precision;
             std::unordered_set<std::string> names;
         };
diff --git a/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp b/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp
new file mode 100644
index 00000000000..3bed4a37e6a
--- /dev/null
+++ b/inference-engine/src/transformations/include/ngraph_ops/nms_static_shape_ie.hpp
@@ -0,0 +1,114 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <vector>
+#include <algorithm>
+#include <string>
+
+#include <transformations_visibility.hpp>
+
+#include "ngraph/op/op.hpp"
+
+namespace ngraph {
+namespace op {
+namespace internal {
+
+template <typename BaseNmsOp>
+class NmsStaticShapeIE : public BaseNmsOp {
+public:
+    NGRAPH_RTTI_DECLARATION;
+
+    using Attributes = typename BaseNmsOp::Attributes;
+
+    /// \brief Constructs a NmsStaticShapeIE operation
+    ///
+    /// \param boxes Node producing the box coordinates
+    /// \param scores Node producing the box scores
+    /// \param attrs Attributes of the operation
+    NmsStaticShapeIE(const Output<Node>& boxes,
+                     const Output<Node>& scores,
+                     const Attributes& attrs) : BaseNmsOp(boxes, scores, attrs) {
+        this->constructor_validate_and_infer_types();
+    }
+    void validate_and_infer_types() override;
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override {
+        return std::make_shared<NmsStaticShapeIE>(new_args.at(0), new_args.at(1), this->m_attrs);
+    }
+};
+
+template <typename BaseNmsOp>
+void NmsStaticShapeIE<BaseNmsOp>::validate_and_infer_types() {
+    const auto boxes_ps = this->get_input_partial_shape(0);
+    const auto scores_ps = this->get_input_partial_shape(1);
+
+    auto first_dim_shape = Dimension::dynamic();
+
+    if (boxes_ps.rank().is_static() && scores_ps.rank().is_static()) {
+        const auto num_boxes_boxes = boxes_ps[1];
+        if (num_boxes_boxes.is_static() && scores_ps[0].is_static() && scores_ps[1].is_static()) {
+            const auto num_boxes = num_boxes_boxes.get_length();
+            auto num_classes = scores_ps[1].get_length();
+            if (this->m_attrs.background_class >=0 && this->m_attrs.background_class <= num_classes) {
+                num_classes = num_classes - 1;
+            }
+            int64_t max_output_boxes_per_class = 0;
+            if (this->m_attrs.nms_top_k >= 0)
+                max_output_boxes_per_class = std::min(num_boxes, static_cast<int64_t>(this->m_attrs.nms_top_k));
+            else
+                max_output_boxes_per_class = num_boxes;
+
+            auto max_output_boxes_per_batch = max_output_boxes_per_class * num_classes;
+            if (this->m_keep_top_k >= 0)
+                max_output_boxes_per_batch =
+                    std::min(max_output_boxes_per_batch, static_cast<int64_t>(this->m_attrs.keep_top_k));
+
+            first_dim_shape = max_output_boxes_per_batch * scores_ps[0].get_length();
+        }
+    }
+
+    // 'selected_outputs' have the following format:
+    //      [number of selected boxes, [class_id, box_score, xmin, ymin, xmax, ymax]]
+    this->set_output_type(0, element::f32, {first_dim_shape, 6});
+    // 'selected_indices' have the following format:
+    //      [number of selected boxes, 1]
+    this->set_output_type(1, this->m_attrs.output_type, {first_dim_shape, 1});
+    // 'selected_num' have the following format:
+    //      [num_batches, ]
+    if (boxes_ps.rank().is_static() && boxes_ps.rank().get_length() > 0) {
+        this->set_output_type(2, this->m_attrs.output_type, {boxes_ps[0]});
+    } else {
+        this->set_output_type(2, this->m_attrs.output_type, {Dimension::dynamic()});
+    }
+}
+
+template <typename BaseNmsOp>
+const ::ngraph::Node::type_info_t& NmsStaticShapeIE<BaseNmsOp>::get_type_info() const { return get_type_info_static(); }
+
+template <typename BaseNmsOp>
+const ::ngraph::Node::type_info_t& NmsStaticShapeIE<BaseNmsOp>::get_type_info_static() {
+    auto BaseNmsOpTypeInfoPtr = &BaseNmsOp::get_type_info_static();
+
+    // TODO: it should be static const std::string name = std::string("NmsStaticShapeIE_") + BaseNmsOpTypeInfoPtr->name;
+    //       but currently it will not pass conversion ot Legacy Opset correctly
+    static const std::string name = BaseNmsOpTypeInfoPtr->name;
+
+    static const ::ngraph::Node::type_info_t type_info_static{
+        name.c_str(), BaseNmsOpTypeInfoPtr->version, BaseNmsOpTypeInfoPtr};
+    return type_info_static;
+}
+
+template <typename BaseNmsOp>
+const ::ngraph::Node::type_info_t NmsStaticShapeIE<BaseNmsOp>::type_info = NmsStaticShapeIE<BaseNmsOp>::get_type_info_static();
+
+#ifdef __clang__
+extern template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE<op::v8::MulticlassNms>;
+extern template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE<op::v8::MatrixNms>;
+#endif  // __clang__
+
+}  // namespace internal
+}  // namespace op
+}  // namespace ngraph
diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/algebraic_simplification.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/algebraic_simplification.hpp
index c363f513d9e..02f7860e203 100644
--- a/inference-engine/src/transformations/include/transformations/common_optimizations/algebraic_simplification.hpp
+++ b/inference-engine/src/transformations/include/transformations/common_optimizations/algebraic_simplification.hpp
@@ -20,5 +20,5 @@ class TRANSFORMATIONS_API AlgebraicSimplification;
 class ngraph::pass::AlgebraicSimplification : public GraphRewrite {
 public:
     NGRAPH_RTTI_DECLARATION;
-    AlgebraicSimplification();
+    AlgebraicSimplification() = default;
 };
diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp
new file mode 100644
index 00000000000..79e203485fa
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <memory>
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API LeakyReluFusion;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief LeakyReluFusion transformation replaces following graph:
+ * Multiply->Maximum to LeakyRelu
+ */
+
+class ngraph::pass::LeakyReluFusion: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    LeakyReluFusion();
+};
diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/nop_elimination.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/nop_elimination.hpp
index a5d9f7cd19f..ca5028d5126 100644
--- a/inference-engine/src/transformations/include/transformations/common_optimizations/nop_elimination.hpp
+++ b/inference-engine/src/transformations/include/transformations/common_optimizations/nop_elimination.hpp
@@ -15,13 +15,80 @@
 namespace ngraph {
 namespace pass {
 
+class TRANSFORMATIONS_API EliminatePad;
+class TRANSFORMATIONS_API EliminateConvert;
+class TRANSFORMATIONS_API EliminateConvertNonZero;
+class TRANSFORMATIONS_API EliminateConcat;
+class TRANSFORMATIONS_API EliminateSplit;
+class TRANSFORMATIONS_API EliminateTranspose;
 class TRANSFORMATIONS_API NopElimination;
 
 }  // namespace pass
 }  // namespace ngraph
 
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief EliminatePad eliminates pad that does nothing
+ */
+class ngraph::pass::EliminatePad: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    EliminatePad();
+};
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief EliminateConvert eliminates convert that does nothing
+ */
+class ngraph::pass::EliminateConvert: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    EliminateConvert();
+};
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief EliminateConvertNonZero eliminates convert before NonZero
+ */
+class ngraph::pass::EliminateConvertNonZero: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    EliminateConvertNonZero();
+};
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief EliminateConcat eliminates concat that does nothing
+ */
+class ngraph::pass::EliminateConcat: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    EliminateConcat();
+};
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief EliminateSplit eliminates split that does nothing
+ */
+class ngraph::pass::EliminateSplit: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    EliminateSplit();
+};
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief EliminateTranspose eliminates transpose that does nothing
+ */
+class ngraph::pass::EliminateTranspose: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    EliminateTranspose();
+};
+
+
 class ngraph::pass::NopElimination: public GraphRewrite {
 public:
     NGRAPH_RTTI_DECLARATION;
-    NopElimination();
+    NopElimination(bool use_shape_for_elimination = true);
 };
diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/pad_fusion.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/pad_fusion.hpp
index 37c903de952..7c11f1ee02a 100644
--- a/inference-engine/src/transformations/include/transformations/common_optimizations/pad_fusion.hpp
+++ b/inference-engine/src/transformations/include/transformations/common_optimizations/pad_fusion.hpp
@@ -12,7 +12,6 @@ namespace ngraph {
 namespace pass {
 
 class TRANSFORMATIONS_API PadFusion;
-class TRANSFORMATIONS_API PadElimination;
 class TRANSFORMATIONS_API PadFusionAvgPool;
 class TRANSFORMATIONS_API PadFusionMaxPool;
 class TRANSFORMATIONS_API PadFusionConvolution;
@@ -23,16 +22,6 @@ class TRANSFORMATIONS_API PadFusionGroupConvolutionBackpropData;
 }  // namespace pass
 }  // namespace ngraph
 
-/**
- * @ingroup ie_transformation_common_api
- * @brief PadElimination eliminates pad that does nothing
- */
-class ngraph::pass::PadElimination: public ngraph::pass::MatcherPass {
-public:
-    NGRAPH_RTTI_DECLARATION;
-    PadElimination();
-};
-
 /**
  * @ingroup ie_transformation_common_api
  * @brief PadFusion transformation replaces following graph:
@@ -124,6 +113,5 @@ public:
         add_matcher<ngraph::pass::PadFusionConvolutionBackpropData>();
         add_matcher<ngraph::pass::PadFusionGroupConvolution>();
         add_matcher<ngraph::pass::PadFusionGroupConvolutionBackpropData>();
-        add_matcher<ngraph::pass::PadElimination>();
     }
 };
diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp
index 752054baa49..2c7c7e5cd0d 100644
--- a/inference-engine/src/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp
+++ b/inference-engine/src/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp
@@ -21,6 +21,7 @@ class TRANSFORMATIONS_API SimplifyShapeOfSubGraph;
 class TRANSFORMATIONS_API SharedShapeOf;
 class TRANSFORMATIONS_API GroupedGatherElimination;
 class TRANSFORMATIONS_API GatherNopElimination;
+class TRANSFORMATIONS_API SimplifyGatherShapeOf;
 
 }  // namespace pass
 }  // namespace ngraph
@@ -69,3 +70,15 @@ public:
     NGRAPH_RTTI_DECLARATION;
     GatherNopElimination();
 };
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief SimplifyGatherShapeOf optimizes `gather->shapeof` into `shapeof->gather` for 0D indices.
+ * Other cases into Concat of shapeof/gather(data) + shapeof(indices) transformation optimizes out
+ * useless Gather operations
+ */
+class ngraph::pass::SimplifyGatherShapeOf: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    SimplifyGatherShapeOf();
+};
diff --git a/inference-engine/src/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp b/inference-engine/src/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp
index 79ad6e3e882..f5405daa6d6 100644
--- a/inference-engine/src/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp
+++ b/inference-engine/src/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp
@@ -22,5 +22,5 @@ class ngraph::pass::DisableConvertConstantFoldingOnConstPath : public ngraph::pa
 public:
     NGRAPH_RTTI_DECLARATION;
     DisableConvertConstantFoldingOnConstPath(
-        const std::vector<ngraph::element::Type>& inputPrecisions = {});
+        const element::TypeVector & inputPrecisions = {});
 };
diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp
new file mode 100644
index 00000000000..080a0868322
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <utility>
+#include <memory>
+
+#include <transformations_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API ConvertMatrixNmsToMatrixNmsIE;
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertMatrixNmsToMatrixNmsIE: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertMatrixNmsToMatrixNmsIE();
+};
diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp
new file mode 100644
index 00000000000..b639364b24e
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <utility>
+#include <memory>
+
+#include <transformations_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API ConvertMulticlassNmsToMulticlassNmsIE;
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertMulticlassNmsToMulticlassNmsIE();
+};
diff --git a/inference-engine/src/transformations/include/transformations/rt_info/disable_constant_folding.hpp b/inference-engine/src/transformations/include/transformations/rt_info/disable_constant_folding.hpp
new file mode 100644
index 00000000000..1e04ce22dcc
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/rt_info/disable_constant_folding.hpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <assert.h>
+#include <functional>
+#include <memory>
+#include <string>
+#include <set>
+
+#include <ngraph/node.hpp>
+#include <ngraph/variant.hpp>
+#include <transformations_visibility.hpp>
+
+
+namespace ngraph {
+
+/**
+ * @ingroup ie_runtime_attr_api
+ * @brief DisableConstantFolding disable ConstantFolding for given operation
+ */
+class TRANSFORMATIONS_API DisableConstantFolding {
+public:
+    DisableConstantFolding() = default;
+};
+
+extern template class TRANSFORMATIONS_API VariantImpl<DisableConstantFolding>;
+
+template<>
+class TRANSFORMATIONS_API VariantWrapper<DisableConstantFolding> : public VariantImpl<DisableConstantFolding> {
+public:
+    static constexpr VariantTypeInfo type_info{"DISABLED_CONSTANT_FOLDING", 0};
+
+    const VariantTypeInfo &get_type_info() const override {
+        return type_info;
+    }
+
+    VariantWrapper(const value_type &value) : VariantImpl<value_type>(value) {}
+
+    bool is_copyable() const override { return false; }
+};
+
+TRANSFORMATIONS_API void disable_constant_folding(const std::shared_ptr<Node>& node);
+}  // namespace ngraph
diff --git a/inference-engine/src/transformations/include/transformations/rt_info/strides_property.hpp b/inference-engine/src/transformations/include/transformations/rt_info/strides_property.hpp
index a964c490fe8..2ec78ce6892 100644
--- a/inference-engine/src/transformations/include/transformations/rt_info/strides_property.hpp
+++ b/inference-engine/src/transformations/include/transformations/rt_info/strides_property.hpp
@@ -4,6 +4,8 @@
 
 #pragma once
 
+#include <ngraph/strides.hpp>
+#include <ngraph/node_input.hpp>
 #include <ngraph/variant.hpp>
 #include <transformations_visibility.hpp>
 
diff --git a/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp
new file mode 100644
index 00000000000..8f173eafcae
--- /dev/null
+++ b/inference-engine/src/transformations/src/ngraph_ops/nms_static_shape_ie.cpp
@@ -0,0 +1,19 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+
+#include "ngraph/ops.hpp"
+#include "ngraph_ops/nms_static_shape_ie.hpp"
+
+namespace ngraph {
+namespace op {
+namespace internal {
+
+template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE<op::v8::MulticlassNms>;
+template class TRANSFORMATIONS_API op::internal::NmsStaticShapeIE<op::v8::MatrixNms>;
+
+}  // namespace internal
+}  // namespace op
+}  // namespace ngraph
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/algebraic_simplification.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/algebraic_simplification.cpp
index 519c48aa05f..14ed78d4e97 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/algebraic_simplification.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/algebraic_simplification.cpp
@@ -2,163 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <memory>
-#include <numeric>
-#include <set>
-
 #include "transformations/common_optimizations/algebraic_simplification.hpp"
-#include "itt.hpp"
-
-#include <ngraph/log.hpp>
-#include <ngraph/opsets/opset2.hpp>
-#include <ngraph/opsets/opset3.hpp>
-#include <ngraph/rt_info.hpp>
-#include <ngraph/pattern/op/wrap_type.hpp>
-
-using namespace std;
-using namespace ngraph;
 
 NGRAPH_RTTI_DEFINITION(ngraph::pass::AlgebraicSimplification, "AlgebraicSimplification", 0);
-
-//`simplify_gather`, optimizes gather if Gather is gathering the
-// whole input tensor
-static bool simplify_gather(std::shared_ptr<Node> node) {
-    if (auto gather = as_type_ptr<opset3::Gather>(node)) {
-        // check if we are gathering the whole input
-        auto data = gather->input_value(0);
-        auto indices = gather->input_value(1);
-
-        // we need to know data and indices shape to infer if gather is Nop
-        if (data.get_partial_shape().is_dynamic() || indices.get_partial_shape().is_dynamic()) {
-            return false;
-        }
-        // if rank of data and gather output dont match, we will skip
-        if (data.get_shape().size() != node->get_shape().size()) {
-            return false;
-        }
-
-        auto axis = gather->get_axis();
-        if (axis == opset3::Gather::AXIS_NOT_SET_VALUE) {
-            NGRAPH_DEBUG << "axis value not set";
-            return false;
-        }
-
-        // case_1 : if the input tensor is of shape (4, 1, 4)
-        // and axis = 1, then the gather would be simply
-        // gathering the whole input tensor, so we can optimize this
-        // op has Nop
-
-        if (data.get_shape()[axis] == 1 && data.get_shape() == node->get_shape()) {
-            return replace_output_update_name(gather->output(0), gather->input_value(0));
-        }
-
-        // case_2 : if the input tensor is of shape (4, 3, 4)
-        // we need to check the contents of indices, if indices
-        // is 1D tensor of value {0, 1, 2}, we can optimize this
-        // op has Nop
-
-        // check if the indices is constant
-        auto constant_indices =
-            as_type_ptr<opset3::Constant>(gather->input_value(1).get_node_shared_ptr());
-        if (!constant_indices) {
-            return false;
-        } else {
-            // if ref_inidices == indices, we are capturing the
-            // entire input tensor
-            std::vector<int64_t> ref_indices(data.get_shape()[axis], 0);
-            std::iota(ref_indices.begin(), ref_indices.end(), 0);
-            if (ref_indices == constant_indices->cast_vector<int64_t>()) {
-                return replace_output_update_name(gather->output(0), gather->input_value(0));
-            }
-        }
-    }
-    return false;
-}
-
-// optimizes `gather->shapeof` into `shapeof->gather` for 0D indices
-// other cases into Concat of shapeof/gather(data) + shapeof(indices)
-static bool simplify_gather_shapeof(shared_ptr<Node> node) {
-    auto gather = as_type_ptr<opset3::Gather>(node->input_value(0).get_node_shared_ptr());
-    if (!gather) {
-        return false;
-    }
-    auto gather_in_rank = gather->get_input_partial_shape(0).rank();
-    auto indices_rank = gather->get_input_partial_shape(1).rank();
-    auto axis = gather->get_axis();
-    if (gather_in_rank.is_dynamic() || indices_rank.is_dynamic() ||
-        axis == opset3::Gather::AXIS_NOT_SET_VALUE) {
-        NGRAPH_DEBUG << gather << " cannot simplify gather->shapeof";
-        return false;
-    }
-
-    auto zero_axis = opset3::Constant::create<int64_t>(element::i64, Shape{}, {0});
-    NodeVector new_ops;
-    auto new_shapeof = make_shared<opset3::ShapeOf>(gather->input_value(0), node->get_output_element_type(0));
-    new_ops.push_back(new_shapeof);
-    std::shared_ptr<Node> replace_op;
-    if (indices_rank.get_length() == 0) {
-        std::vector<int64_t> vi(gather_in_rank.get_length());
-        std::iota(vi.begin(), vi.end(), 0);
-        vi.erase(vi.begin() + axis);
-        auto new_indices = opset3::Constant::create<int64_t>(element::i64, Shape{vi.size()}, vi);
-        replace_op = make_shared<opset3::Gather>(new_shapeof, new_indices, zero_axis);
-        new_ops.push_back(replace_op);
-    } else {
-        NodeVector concat_inputs;
-        if (axis > 0) {
-            std::vector<int64_t> vi(axis);
-            std::iota(vi.begin(), vi.end(), 0);
-            auto indices = opset3::Constant::create<int64_t>(element::i64, Shape{vi.size()}, vi);
-            auto gather = make_shared<opset3::Gather>(new_shapeof, indices, zero_axis);
-            new_ops.push_back(gather);
-            concat_inputs.push_back(gather);
-        }
-        auto shapeof_indices = make_shared<opset3::ShapeOf>(gather->input_value(1), node->get_output_element_type(0));
-        new_ops.push_back(shapeof_indices);
-
-        concat_inputs.push_back(shapeof_indices);
-
-        if (gather_in_rank.get_length() - 1 > axis) {
-            std::vector<int64_t> vi(gather_in_rank.get_length() - (axis + 1));
-            std::iota(vi.begin(), vi.end(), axis + 1);
-            auto indices = opset3::Constant::create<int64_t>(element::i64, Shape{vi.size()}, vi);
-            auto gather = make_shared<opset3::Gather>(new_shapeof, indices, zero_axis);
-            new_ops.push_back(gather);
-            concat_inputs.push_back(gather);
-        }
-        replace_op = make_shared<opset3::Concat>(concat_inputs, 0);
-        new_ops.push_back(replace_op);
-    }
-    replace_op->set_friendly_name(node->get_friendly_name());
-    copy_runtime_info(node, new_ops);
-    replace_node(node, replace_op);
-    return true;
-}
-
-#define ECHO(NAME) #NAME
-#define STR(NAME) ECHO(NAME)
-#define SIMPLE_MATCHER_PASS_DEFINITION(NAME, OP, FUNC) \
-class NAME : public ngraph::pass::MatcherPass { \
-public: \
-NGRAPH_RTTI_DECLARATION; \
-NAME() { \
-    MATCHER_SCOPE(NAME);    \
-    auto match_node = ngraph::pattern::wrap_type<OP>(); \
-    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { \
-        return FUNC(m.get_match_root()); \
-    }; \
-    auto m = std::make_shared<ngraph::pattern::Matcher>(match_node, matcher_name); \
-    register_matcher(m, callback); \
-}  \
-}; \
-NGRAPH_RTTI_DEFINITION(NAME, STR(NAME), 0);
-
-SIMPLE_MATCHER_PASS_DEFINITION(EliminateGather, opset3::Gather, simplify_gather);
-SIMPLE_MATCHER_PASS_DEFINITION(SimplifyShapeOf2Gather, opset2::ShapeOf, simplify_gather_shapeof);
-SIMPLE_MATCHER_PASS_DEFINITION(SimplifyShapeOf3Gather, opset3::ShapeOf, simplify_gather_shapeof);
-
-ngraph::pass::AlgebraicSimplification::AlgebraicSimplification() {
-    add_matcher<EliminateGather>();
-    add_matcher<SimplifyShapeOf2Gather>();
-    add_matcher<SimplifyShapeOf3Gather>();
-}
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
index 415ecb11610..e0089d644da 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@@ -6,7 +6,6 @@
 
 #include "transformations/init_node_info.hpp"
 #include "itt.hpp"
-#include "transformations/common_optimizations/algebraic_simplification.hpp"
 #include "transformations/common_optimizations/broadcast_elementwise_fusion.hpp"
 #include "transformations/common_optimizations/nop_elimination.hpp"
 #include "transformations/common_optimizations/common_optimizations.hpp"
@@ -21,6 +20,7 @@
 #include "transformations/common_optimizations/swish_fusion.hpp"
 #include "transformations/common_optimizations/normalize_l2_fusion.hpp"
 #include "transformations/common_optimizations/pull_transpose_through_fq.hpp"
+#include "transformations/common_optimizations/leaky_relu_fusion.hpp"
 #include "transformations/common_optimizations/lin_op_sequence_fusion.hpp"
 #include "transformations/common_optimizations/remove_filtering_boxes_by_size.hpp"
 #include "transformations/common_optimizations/hsigmoid_fusion.hpp"
@@ -108,7 +108,6 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
 
     auto eliminations = manager.register_pass<ngraph::pass::GraphRewrite>();
     eliminations->add_matcher<ngraph::pass::EliminateUnsqueezeGather>();
-    eliminations->add_matcher<ngraph::pass::AlgebraicSimplification>(); // may introduce fake dynamism
     eliminations->add_matcher<ngraph::pass::NopElimination>(); // may introduce fake dynamism
     eliminations->set_name("ngraph::pass::CommonEliminations");
 
@@ -133,6 +132,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
     common_fusions->add_matcher<ngraph::pass::DilatedConvolutionConverter>();
     common_fusions->add_matcher<ngraph::pass::GeluFusion>();
     common_fusions->add_matcher<ngraph::pass::TransposeToReshape>();
+    common_fusions->add_matcher<ngraph::pass::LeakyReluFusion>();
     common_fusions->set_name("ngraph::pass::CommonFusions");
 
     manager.register_pass<ngraph::pass::ConvertPadToGroupConvolution, false>();
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp
new file mode 100644
index 00000000000..388d2f17104
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/leaky_relu_fusion.hpp"
+#include "transformations/utils/utils.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset8.hpp>
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include "itt.hpp"
+
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::LeakyReluFusion, "LeakyReluFusion", 0);
+
+ngraph::pass::LeakyReluFusion::LeakyReluFusion() {
+    MATCHER_SCOPE(LeakyReluFusion);
+    auto data_pattern = ngraph::pattern::any_input();
+    auto alpha_pattern = ngraph::pattern::any_input(pattern::has_static_shape());
+    auto multiply_pattern = ngraph::pattern::wrap_type<opset8::Multiply>({data_pattern, alpha_pattern}, pattern::consumers_count(1));
+    auto max_pattern = ngraph::pattern::wrap_type<opset8::Maximum>({data_pattern, multiply_pattern});
+
+    ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        auto pattern_map = m.get_pattern_value_map();
+        auto data = pattern_map.at(data_pattern);
+        const auto & original_alpha_pattern = pattern_map.at(alpha_pattern);
+
+        if (shape_size(original_alpha_pattern.get_shape()) != 1)
+            return false;
+
+        auto leaky_relu = register_new_node<ngraph::opset8::PRelu>(data, original_alpha_pattern);
+        auto maximum = pattern_map.at(max_pattern);
+        leaky_relu->set_friendly_name(maximum.get_node()->get_friendly_name());
+
+        copy_runtime_info({
+                            pattern_map.at(multiply_pattern).get_node_shared_ptr(),
+                            maximum.get_node_shared_ptr()
+                          },
+                          leaky_relu);
+        replace_node(maximum.get_node_shared_ptr(), leaky_relu);
+
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(max_pattern, matcher_name);
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/nop_elimination.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/nop_elimination.cpp
index db1ea01fd3d..32f139a15ed 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/nop_elimination.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/nop_elimination.cpp
@@ -5,11 +5,10 @@
 #include "itt.hpp"
 #include <functional>
 #include <memory>
-#include <typeindex>
-#include <typeinfo>
-#include <unordered_map>
+#include <numeric>
 
 #include <ngraph/opsets/opset3.hpp>
+#include <ngraph/opsets/opset8.hpp>
 #include <ngraph/util.hpp>
 #include <ngraph/log.hpp>
 #include <transformations/common_optimizations/nop_elimination.hpp>
@@ -18,9 +17,60 @@
 using namespace std;
 using namespace ngraph;
 
-#define TI(x) x::type_info
+//`simplify_gather`, optimizes gather if Gather is gathering the
+// whole input tensor
+static bool simplify_gather(std::shared_ptr<Node> node) {
+    if (auto gather = as_type_ptr<opset3::Gather>(node)) {
+        // check if we are gathering the whole input
+        auto data = gather->input_value(0);
+        auto indices = gather->input_value(1);
 
-NGRAPH_RTTI_DEFINITION(ngraph::pass::NopElimination, "NopElimination", 0);
+        // we need to know data and indices shape to infer if gather is Nop
+        if (data.get_partial_shape().is_dynamic() || indices.get_partial_shape().is_dynamic()) {
+            return false;
+        }
+        // if rank of data and gather output dont match, we will skip
+        if (data.get_shape().size() != node->get_shape().size()) {
+            return false;
+        }
+
+        auto axis = gather->get_axis();
+        if (axis == opset3::Gather::AXIS_NOT_SET_VALUE) {
+            NGRAPH_DEBUG << "axis value not set";
+            return false;
+        }
+
+        // case_1 : if the input tensor is of shape (4, 1, 4)
+        // and axis = 1, then the gather would be simply
+        // gathering the whole input tensor, so we can optimize this
+        // op has Nop
+
+        if (data.get_shape()[axis] == 1 && data.get_shape() == node->get_shape()) {
+            return replace_output_update_name(gather->output(0), gather->input_value(0));
+        }
+
+        // case_2 : if the input tensor is of shape (4, 3, 4)
+        // we need to check the contents of indices, if indices
+        // is 1D tensor of value {0, 1, 2}, we can optimize this
+        // op has Nop
+
+        // check if the indices is constant
+        auto constant_indices =
+                as_type_ptr<opset3::Constant>(gather->input_value(1).get_node_shared_ptr());
+        if (!constant_indices) {
+            return false;
+        } else {
+            // if ref_inidices == indices, we are capturing the
+            // entire input tensor
+            std::vector<int64_t> ref_indices(data.get_shape()[axis], 0);
+            std::iota(ref_indices.begin(), ref_indices.end(), 0);
+            if (ref_indices == constant_indices->cast_vector<int64_t>()) {
+                return replace_output_update_name(gather->output(0), gather->input_value(0));
+            }
+        }
+    }
+    return false;
+}
 
 static bool eliminate_nop(const std::shared_ptr<Node>& node) {
     // skip if shapes are dynamic
@@ -35,34 +85,6 @@ static bool eliminate_nop(const std::shared_ptr<Node>& node) {
     return false;
 }
 
-static bool eliminate_convert(const std::shared_ptr<Node>& node) {
-    bool is_out_type_agnostic = false;
-    static const std::set<NodeTypeInfo> type_agnostic{TI(opset3::NonZero)};
-    if (node->output(0).get_target_inputs().size() == 1) {
-        Input<Node> out = *node->output(0).get_target_inputs().begin();
-        is_out_type_agnostic = type_agnostic.count(out.get_node()->get_type_info()) == 1;
-    }
-    auto convert = as_type_ptr<opset3::Convert>(node);
-    auto input = convert->input_value(0);
-    if (convert->get_convert_element_type() == input.get_element_type() || is_out_type_agnostic) {
-        if (is_out_type_agnostic && is_type<opset3::Convert>(input.get_node())) {
-            input = input.get_node()->input_value(0);
-        }
-        return replace_output_update_name(node->output(0), input);
-    }
-    return false;
-}
-
-static bool eliminate_concat(const std::shared_ptr<Node>& node) {
-    auto node_input = node->input_value(0);
-
-    // remove concat with single input
-    if (node->get_input_size() == 1) {
-        return replace_output_update_name(node->output(0), node_input);
-    }
-    return false;
-}
-
 static bool eliminate_reshape_v1(const std::shared_ptr<Node>& node) {
     auto input = node->input_value(0);
     // check if reshape is not identity op
@@ -336,20 +358,168 @@ NAME() { \
 }; \
 NGRAPH_RTTI_DEFINITION(NAME, STR(NAME), 0);
 
-SIMPLE_MATCHER_PASS_DEFINITION(EliminatePad, opset3::Pad, eliminate_nop);
-SIMPLE_MATCHER_PASS_DEFINITION(EliminateConvert, opset3::Convert, eliminate_convert);
 SIMPLE_MATCHER_PASS_DEFINITION(EliminateReshape, opset3::Reshape, eliminate_reshape_v1);
-SIMPLE_MATCHER_PASS_DEFINITION(EliminateConcat, opset3::Concat, eliminate_concat);
 SIMPLE_MATCHER_PASS_DEFINITION(EliminateSqueeze, opset3::Squeeze, eliminate_squeeze);
 SIMPLE_MATCHER_PASS_DEFINITION(EliminateUnsqueeze, opset3::Unsqueeze, eliminate_unsqueeze);
 SIMPLE_MATCHER_PASS_DEFINITION(EliminateBroadcast, op::v1::Broadcast, eliminate_nop);
+SIMPLE_MATCHER_PASS_DEFINITION(EliminateGather, opset3::Gather, simplify_gather);
 
-ngraph::pass::NopElimination::NopElimination() {
+
+NGRAPH_RTTI_DEFINITION(pass::EliminatePad, "EliminatePad", 0);
+
+pass::EliminatePad::EliminatePad() {
+    MATCHER_SCOPE(EliminatePad);
+    auto pad_node_pattern = pattern::wrap_type<opset8::Pad>();
+
+    matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        auto pad = m.get_match_root();
+
+        auto pad_begin_const = ngraph::get_constant_from_source(pad->input_value(1));
+        auto pad_end_const = ngraph::get_constant_from_source(pad->input_value(2));
+
+        if (!pad_begin_const || !pad_end_const) {
+            return false;
+        }
+
+        const auto pad_begin_value = pad_begin_const->cast_vector<int64_t>();
+        const auto pad_end_value = pad_end_const->cast_vector<int64_t>();
+
+        if (std::any_of(pad_begin_value.begin(), pad_begin_value.end(), [](int64_t value) { return value != 0; }) ||
+            std::any_of(pad_end_value.begin(), pad_end_value.end(), [](int64_t value) { return value != 0; })) {
+            return false;
+        }
+
+        return replace_output_update_name(pad->output(0), pad->input_value(0));
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(pad_node_pattern, matcher_name);
+    this->register_matcher(m, callback);
+}
+
+NGRAPH_RTTI_DEFINITION(pass::EliminateConvert, "EliminateConvert", 0);
+
+pass::EliminateConvert::EliminateConvert() {
+    MATCHER_SCOPE(EliminateConvert);
+    auto convert_pattern = pattern::wrap_type<opset8::Convert>();
+
+    matcher_pass_callback callback = [](pattern::Matcher& m) {
+        auto convert = std::dynamic_pointer_cast<opset8::Convert>(m.get_match_root());
+        if (!convert) {
+            return false;
+        }
+        if (convert->get_input_element_type(0) == convert->get_element_type()) {
+            return replace_output_update_name(convert->output(0), convert->input_value(0));
+        }
+        return false;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(convert_pattern, matcher_name);
+    this->register_matcher(m, callback);
+}
+
+NGRAPH_RTTI_DEFINITION(pass::EliminateConvertNonZero, "EliminateConvertNonZero", 0);
+
+pass::EliminateConvertNonZero::EliminateConvertNonZero() {
+    MATCHER_SCOPE(EliminateConvertNonZero);
+    auto convert_pattern = pattern::wrap_type<opset8::Convert>(pattern::consumers_count(1));
+    auto non_zero = pattern::wrap_type<opset8::NonZero>({convert_pattern});
+
+    matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        const auto & pattern_map = m.get_pattern_map();
+        auto convert = pattern_map.at(convert_pattern);
+        // remove convert
+        convert->output(0).replace(convert->input_value(0));
+        // to make this elimination recursive we register NonZero as a node which will be used to repeat matching
+        register_new_node(m.get_match_root());
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(non_zero, matcher_name);
+    this->register_matcher(m, callback);
+}
+
+NGRAPH_RTTI_DEFINITION(pass::EliminateConcat, "EliminateConcat", 0);
+
+pass::EliminateConcat::EliminateConcat() {
+    MATCHER_SCOPE(EliminateConcat);
+    auto convert_pattern = pattern::wrap_type<opset8::Concat>();
+
+    matcher_pass_callback callback = [](pattern::Matcher& m) {
+        auto concat = m.get_match_root();
+        if (concat->inputs().size() == 1) {
+            return replace_output_update_name(concat->output(0), concat->input_value(0));
+        }
+        return false;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(convert_pattern, matcher_name);
+    this->register_matcher(m, callback);
+}
+
+NGRAPH_RTTI_DEFINITION(pass::EliminateSplit, "EliminateSplit", 0);
+
+pass::EliminateSplit::EliminateSplit() {
+    MATCHER_SCOPE(EliminateConcat);
+    auto convert_pattern = pattern::wrap_type<opset8::Split>();
+
+    matcher_pass_callback callback = [](pattern::Matcher& m) {
+        auto split = std::dynamic_pointer_cast<opset8::Split>(m.get_match_root());
+        if (!split || split->get_num_splits() != 1) {
+            return false;
+        }
+        return replace_output_update_name(split->output(0), split->input_value(0));
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(convert_pattern, matcher_name);
+    this->register_matcher(m, callback);
+}
+
+NGRAPH_RTTI_DEFINITION(pass::EliminateTranspose, "EliminateTranspose", 0);
+
+pass::EliminateTranspose::EliminateTranspose() {
+    MATCHER_SCOPE(EliminateTranspose);
+    auto order = pattern::wrap_type<opset8::Constant>();
+    auto transpose_pattern = pattern::wrap_type<opset8::Transpose>({pattern::any_input(), order});
+
+    matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        const auto & pattern_map = m.get_pattern_map();
+        auto order_const = std::dynamic_pointer_cast<opset8::Constant>(pattern_map.at(order));
+        if (!order_const) {
+            return false;
+        }
+
+        const auto & order_values = order_const->cast_vector<int64_t>();
+        vector<int64_t> ref_values(order_values.size());
+        std::iota(ref_values.begin(), ref_values.end(), 0);
+        if (order_values != ref_values) {
+            return false;
+        }
+
+        auto transpose = m.get_match_root();
+        return replace_output_update_name(transpose->output(0), transpose->input_value(0));
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(transpose_pattern, matcher_name);
+    this->register_matcher(m, callback);
+}
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::NopElimination, "NopElimination", 0);
+
+ngraph::pass::NopElimination::NopElimination(bool use_shape_for_elimination) {
+    // shape-agnostic transformations
     add_matcher<EliminatePad>();
     add_matcher<EliminateConvert>();
-    add_matcher<EliminateReshape>();
+    add_matcher<EliminateConvertNonZero>();
     add_matcher<EliminateConcat>();
-    add_matcher<EliminateSqueeze>();
-    add_matcher<EliminateUnsqueeze>();
-    add_matcher<EliminateBroadcast>();
+    add_matcher<EliminateSplit>();
+    add_matcher<EliminateTranspose>();
+
+    // shape-dependent transformations
+    if (use_shape_for_elimination) {
+        add_matcher<EliminateReshape>();
+        add_matcher<EliminateSqueeze>();
+        add_matcher<EliminateUnsqueeze>();
+        add_matcher<EliminateBroadcast>();
+        add_matcher<EliminateGather>();
+    }
 }
\ No newline at end of file
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/pad_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/pad_fusion.cpp
index b2655f8797c..30ffdf934c8 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/pad_fusion.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/pad_fusion.cpp
@@ -386,34 +386,3 @@ pass::PadFusionGroupConvolutionBackpropData::PadFusionGroupConvolutionBackpropDa
     auto m = std::make_shared<pattern::Matcher>(conv_pattern, matcher_name);
     this->register_matcher(m, callback);
 }
-
-NGRAPH_RTTI_DEFINITION(pass::PadElimination, "PadElimination", 0);
-
-pass::PadElimination::PadElimination() {
-    MATCHER_SCOPE(PadElimination);
-    auto pad_node_pattern = pattern::wrap_type<opset5::Pad>();
-
-    matcher_pass_callback callback = [=](pattern::Matcher& m) {
-        auto pad = m.get_match_root();
-
-        auto pad_begin_const = ngraph::get_constant_from_source(pad->input_value(1));
-        auto pad_end_const = ngraph::get_constant_from_source(pad->input_value(2));
-
-        if (!pad_begin_const || !pad_end_const) {
-            return false;
-        }
-
-        const auto pad_begin_value = pad_begin_const->cast_vector<int64_t>();
-        const auto pad_end_value = pad_end_const->cast_vector<int64_t>();
-
-        if (std::any_of(pad_begin_value.begin(), pad_begin_value.end(), [](int64_t value) { return value != 0; }) ||
-            std::any_of(pad_end_value.begin(), pad_end_value.end(), [](int64_t value) { return value != 0; })) {
-            return false;
-        }
-
-        return replace_output_update_name(pad->output(0), pad->input_value(0));
-    };
-
-    auto m = std::make_shared<pattern::Matcher>(pad_node_pattern, matcher_name);
-    this->register_matcher(m, callback);
-}
\ No newline at end of file
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp
index ec4614241b9..244670d3678 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp
@@ -7,6 +7,7 @@
 
 #include "itt.hpp"
 #include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset2.hpp>
 #include <ngraph/opsets/opset3.hpp>
 #include <ngraph/opsets/opset7.hpp>
 #include <ngraph/rt_info.hpp>
@@ -121,16 +122,85 @@ ngraph::pass::GatherNopElimination::GatherNopElimination() {
     this->register_matcher(m, callback);
 }
 
+NGRAPH_RTTI_DEFINITION(ngraph::pass::SimplifyGatherShapeOf, "SimplifyGatherShapeOf", 0);
+
+ngraph::pass::SimplifyGatherShapeOf::SimplifyGatherShapeOf() {
+    MATCHER_SCOPE(SimplifyGatherShapeOf);
+    const auto gather_pattern = ngraph::pattern::wrap_type<op::util::GatherBase>();
+    const auto shape_of_pattern = ngraph::pattern::wrap_type<opset2::ShapeOf, opset3::ShapeOf>({gather_pattern});
+
+    ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) {
+        auto node = m.get_match_root();
+        auto gather = as_type_ptr<opset3::Gather>(node->input_value(0).get_node_shared_ptr());
+        if (!gather) {
+            return false;
+        }
+        auto gather_in_rank = gather->get_input_partial_shape(0).rank();
+        auto indices_rank = gather->get_input_partial_shape(1).rank();
+        auto axis = gather->get_axis();
+        if (gather_in_rank.is_dynamic() || indices_rank.is_dynamic() ||
+            axis == opset3::Gather::AXIS_NOT_SET_VALUE) {
+            return false;
+        }
+
+        auto zero_axis = opset3::Constant::create<int64_t>(element::i64, Shape{}, {0});
+        NodeVector new_ops;
+        auto new_shapeof = std::make_shared<opset3::ShapeOf>(gather->input_value(0), node->get_output_element_type(0));
+        new_ops.push_back(new_shapeof);
+        std::shared_ptr<Node> replace_op;
+        if (indices_rank.get_length() == 0) {
+            std::vector<int64_t> vi(gather_in_rank.get_length());
+            std::iota(vi.begin(), vi.end(), 0);
+            vi.erase(vi.begin() + axis);
+            auto new_indices = opset3::Constant::create<int64_t>(element::i64, Shape{vi.size()}, vi);
+            replace_op = std::make_shared<opset3::Gather>(new_shapeof, new_indices, zero_axis);
+            new_ops.push_back(replace_op);
+        } else {
+            NodeVector concat_inputs;
+            if (axis > 0) {
+                std::vector<int64_t> vi(axis);
+                std::iota(vi.begin(), vi.end(), 0);
+                auto indices = opset3::Constant::create<int64_t>(element::i64, Shape{vi.size()}, vi);
+                auto new_gather = std::make_shared<opset3::Gather>(new_shapeof, indices, zero_axis);
+                new_ops.push_back(new_gather);
+                concat_inputs.push_back(new_gather);
+            }
+            auto shapeof_indices = std::make_shared<opset3::ShapeOf>(gather->input_value(1), node->get_output_element_type(0));
+            new_ops.push_back(shapeof_indices);
+
+            concat_inputs.push_back(shapeof_indices);
+
+            if (gather_in_rank.get_length() - 1 > axis) {
+                std::vector<int64_t> vi(gather_in_rank.get_length() - (axis + 1));
+                std::iota(vi.begin(), vi.end(), axis + 1);
+                auto indices = opset3::Constant::create<int64_t>(element::i64, Shape{vi.size()}, vi);
+                auto new_gather = std::make_shared<opset3::Gather>(new_shapeof, indices, zero_axis);
+                new_ops.push_back(new_gather);
+                concat_inputs.push_back(new_gather);
+            }
+            replace_op = std::make_shared<opset3::Concat>(concat_inputs, 0);
+            new_ops.push_back(replace_op);
+        }
+        replace_op->set_friendly_name(node->get_friendly_name());
+        copy_runtime_info(node, new_ops);
+        replace_node(node, replace_op);
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(shape_of_pattern, matcher_name);
+    this->register_matcher(m, callback);
+}
 
 NGRAPH_RTTI_DEFINITION(ngraph::pass::SimplifyShapeOfSubGraph, "SimplifyShapeOfSubGraph", 0);
 
 bool ngraph::pass::SimplifyShapeOfSubGraph::run_on_function(std::shared_ptr<ngraph::Function> f) {
-    RUN_ON_FUNCTION_SCOPE(GroupedGatherElimination);
+    RUN_ON_FUNCTION_SCOPE(SimplifyShapeOfSubGraph);
     ngraph::pass::Manager manager;
     manager.register_pass<ngraph::pass::EliminateGatherUnsqueeze>();
     manager.register_pass<ngraph::pass::SharedShapeOf>();
     manager.register_pass<ngraph::pass::GroupedGatherElimination>();
     manager.register_pass<ngraph::pass::GatherNopElimination>();
+    manager.register_pass<ngraph::pass::SimplifyGatherShapeOf>();
     manager.run_passes(f);
     return false;
 }
diff --git a/inference-engine/src/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp b/inference-engine/src/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp
index d5c30e73e4f..44d05860c4f 100644
--- a/inference-engine/src/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp
+++ b/inference-engine/src/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp
@@ -20,7 +20,7 @@ using namespace ngraph;
 NGRAPH_RTTI_DEFINITION(ngraph::pass::DisableConvertConstantFoldingOnConstPath, "DisableConvertConstantFoldingOnConstPath", 0);
 
 ngraph::pass::DisableConvertConstantFoldingOnConstPath::DisableConvertConstantFoldingOnConstPath(
-    const std::vector<ngraph::element::Type>& inputPrecisions) {
+    const element::TypeVector & inputPrecisions) {
     auto matcherData = ngraph::pattern::any_input();
     auto matcherConvert = ngraph::pattern::wrap_type<opset3::Convert>({ matcherData }, pattern::consumers_count(1));
 
diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp
new file mode 100644
index 00000000000..34163fc4860
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp
@@ -0,0 +1,66 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "itt.hpp"
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset5.hpp>
+#include <ngraph/opsets/opset8.hpp>
+
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include "ngraph_ops/nms_static_shape_ie.hpp"
+#include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp"
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMatrixNmsToMatrixNmsIE, "ConvertMatrixNmsToMatrixNmsIE", 0);
+
+ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE() {
+    MATCHER_SCOPE(ConvertMatrixNmsToMatrixNmsIE);
+    auto nms = ngraph::pattern::wrap_type<ngraph::opset8::MatrixNms>();
+
+    ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) {
+        auto nms = std::dynamic_pointer_cast<ngraph::opset8::MatrixNms>(m.get_match_root());
+        if (!nms) {
+            return false;
+        }
+
+        const auto new_args = nms->input_values();
+        // vector of new nGraph operations
+        NodeVector new_ops;
+        auto attrs = nms->get_attrs();
+        attrs.output_type = element::i32;
+        auto nms_new = std::make_shared<op::internal::NmsStaticShapeIE<ngraph::opset8::MatrixNms>>(
+                new_args.at(0),
+                new_args.at(1),
+                attrs);
+        new_ops.emplace_back(nms_new);
+
+        Output<Node> output_0 = nms_new->output(0);
+        Output<Node> output_1 = nms_new->output(1);
+        Output<Node> output_2 = nms_new->output(2);
+
+        if (nms->output(1).get_element_type() != output_1.get_element_type()) {
+            output_1 = std::make_shared<opset1::Convert>(output_1, nms->output(1).get_element_type());
+            output_1.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.1");
+            new_ops.emplace_back(output_1.get_node_shared_ptr());
+        }
+
+        if (nms->output(2).get_element_type() != output_2.get_element_type()) {
+            output_2 = std::make_shared<opset1::Convert>(output_2, nms->output(2).get_element_type());
+            output_2.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.2");
+            new_ops.emplace_back(output_2.get_node_shared_ptr());
+        }
+
+        nms_new->set_friendly_name(nms->get_friendly_name());
+        ngraph::copy_runtime_info(nms, new_ops);
+        ngraph::replace_node(nms, {output_0, output_1, output_2});
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(nms, matcher_name);
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp
new file mode 100644
index 00000000000..1f236610e53
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "itt.hpp"
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset5.hpp>
+#include <ngraph/opsets/opset8.hpp>
+
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include "ngraph_ops/nms_static_shape_ie.hpp"
+#include "transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp"
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE, "ConvertMulticlassNmsToMulticlassNmsIE", 0);
+
+ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulticlassNmsIE() {
+    MATCHER_SCOPE(ConvertMulticlassNmsToMulticlassNmsIE);
+    auto nms = ngraph::pattern::wrap_type<ngraph::opset8::MulticlassNms>();
+
+    ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) {
+        auto nms = std::dynamic_pointer_cast<ngraph::opset8::MulticlassNms>(m.get_match_root());
+        if (!nms) {
+            return false;
+        }
+
+        const auto new_args = nms->input_values();
+        // vector of new nGraph operations
+        NodeVector new_ops;
+        auto attrs = nms->get_attrs();
+        attrs.output_type = element::i32;
+
+        auto nms_new = std::make_shared<op::internal::NmsStaticShapeIE<ngraph::opset8::MulticlassNms>>(
+                new_args.at(0),
+                new_args.at(1),
+                attrs);
+        new_ops.emplace_back(nms_new);
+
+        Output<Node> output_0 = nms_new->output(0);
+        Output<Node> output_1 = nms_new->output(1);
+        Output<Node> output_2 = nms_new->output(2);
+
+        if (nms->output(1).get_element_type() != output_1.get_element_type()) {
+            output_1 = std::make_shared<opset1::Convert>(output_1, nms->output(1).get_element_type());
+            output_1.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.1");
+            new_ops.emplace_back(output_1.get_node_shared_ptr());
+        }
+
+        if (nms->output(2).get_element_type() != output_2.get_element_type()) {
+            output_2 = std::make_shared<opset1::Convert>(output_2, nms->output(2).get_element_type());
+            output_2.get_node_shared_ptr()->set_friendly_name(nms->get_friendly_name() + "/convert.2");
+            new_ops.emplace_back(output_2.get_node_shared_ptr());
+        }
+
+        nms_new->set_friendly_name(nms->get_friendly_name());
+        ngraph::copy_runtime_info(nms, new_ops);
+        ngraph::replace_node(nms, {output_0, output_1, output_2});
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(nms, matcher_name);
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp
index 40ae55c0f90..69e57b69547 100644
--- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp
@@ -15,7 +15,6 @@
 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/rt_info.hpp>
 #include <ngraph/graph_util.hpp>
-#include <ngraph/specialize_function.hpp>
 #include <ngraph/pattern/op/wrap_type.hpp>
 
 NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertTensorIteratorToLSTMSequence, "ConvertTensorIteratorToLSTMSequence", 0);
diff --git a/inference-engine/src/transformations/src/transformations/rt_info/disable_constant_folding.cpp b/inference-engine/src/transformations/src/transformations/rt_info/disable_constant_folding.cpp
new file mode 100644
index 00000000000..791102ed1f4
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/rt_info/disable_constant_folding.cpp
@@ -0,0 +1,14 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/rt_info/disable_constant_folding.hpp"
+
+template class ngraph::VariantImpl<ngraph::DisableConstantFolding>;
+
+constexpr ngraph::VariantTypeInfo ngraph::VariantWrapper<ngraph::DisableConstantFolding>::type_info;
+
+void ngraph::disable_constant_folding(const std::shared_ptr<Node>& node) {
+    auto & rt_info = node->get_rt_info();
+    rt_info[VariantWrapper<DisableConstantFolding>::type_info.name] = make_variant<DisableConstantFolding>({});
+}
\ No newline at end of file
diff --git a/inference-engine/src/transformations/src/transformations/serialize.cpp b/inference-engine/src/transformations/src/transformations/serialize.cpp
index 93f9c24e4b8..0ce92c208ea 100644
--- a/inference-engine/src/transformations/src/transformations/serialize.cpp
+++ b/inference-engine/src/transformations/src/transformations/serialize.cpp
@@ -642,8 +642,6 @@ bool resolve_dynamic_shapes(const ngraph::Function& f) {
                            [](const Dimension& d) -> Dimension {
                                return d.get_max_length();
                            });
-            NGRAPH_CHECK(PartialShape(out_shape).is_static(),
-                         "Dynamic dimension cannot be resolved in ", op);
             return out_shape;
         };
 
@@ -685,6 +683,7 @@ void ngfunction_2_irv10(pugi::xml_node& netXml,
         create_layer_ids(f);
     std::unordered_set<std::string> unique_names;
 
+    // TODO remove resolve_dynamic_shapes function completely when support for -1 will be implemented in the MO
     bool has_dynamic_shapes = resolve_dynamic_shapes(f);
 
     const bool exec_graph = is_exec_graph(f);
@@ -711,9 +710,6 @@ void ngfunction_2_irv10(pugi::xml_node& netXml,
         if (node->get_input_size() > 0) {
             pugi::xml_node input = layer.append_child("input");
             for (const auto & i : node->inputs()) {
-                NGRAPH_CHECK(i.get_partial_shape().is_static(),
-                             "Unsupported dynamic input shape in ", node);
-
                 // WA for LSTMCellv0, peephole input shall not be serialized
                 if (i.get_index() == 6 && dynamic_cast<opset1::LSTMCell *>(node)) {
                     port_id++;
@@ -724,10 +720,14 @@ void ngfunction_2_irv10(pugi::xml_node& netXml,
                 port.append_attribute("id").set_value(port_id++);
                 port.append_attribute("precision")
                         .set_value(get_precision_name(i.get_element_type()).c_str());
-                for (auto d : i.get_shape()) {
+                for (auto d : i.get_partial_shape()) {
                     pugi::xml_node dim = port.append_child("dim");
-                    dim.append_child(pugi::xml_node_type::node_pcdata)
-                        .set_value(std::to_string(d).c_str());
+                    if (d.is_dynamic()) {
+                        dim.append_child(pugi::xml_node_type::node_pcdata).set_value("-1");
+                    } else {
+                        dim.append_child(pugi::xml_node_type::node_pcdata)
+                                .set_value(std::to_string(d.get_length()).c_str());
+                    }
                 }
             }
 
@@ -739,9 +739,6 @@ void ngfunction_2_irv10(pugi::xml_node& netXml,
         if ((node->get_output_size() > 0) && !ngraph::op::is_output(node)) {
             pugi::xml_node output = layer.append_child("output");
             for (const auto & o : node->outputs()) {
-                NGRAPH_CHECK(o.get_partial_shape().is_static(),
-                             "Unsupported dynamic output shape in ", node);
-
                 pugi::xml_node port = output.append_child("port");
                 port.append_attribute("id").set_value(port_id++);
                 port.append_attribute("precision")
@@ -762,10 +759,14 @@ void ngfunction_2_irv10(pugi::xml_node& netXml,
                     port.append_attribute("names").set_value(names.c_str());
                 }
 
-                for (auto d : o.get_shape()) {
+                for (auto d : o.get_partial_shape()) {
                     pugi::xml_node dim = port.append_child("dim");
-                    dim.append_child(pugi::xml_node_type::node_pcdata)
-                        .set_value(std::to_string(d).c_str());
+                    if (d.is_dynamic()) {
+                        dim.append_child(pugi::xml_node_type::node_pcdata).set_value("-1");
+                    } else {
+                        dim.append_child(pugi::xml_node_type::node_pcdata)
+                                .set_value(std::to_string(d.get_length()).c_str());
+                    }
                 }
             }
             if (node_type_name == "TensorIterator" || node_type_name == "Loop") {
@@ -851,7 +852,7 @@ bool pass::Serialize::run_on_function(std::shared_ptr<ngraph::Function> f) {
 
         try {
             serializeFunc(xml_file, bin_file);
-        } catch (const ngraph::CheckFailure& e) {
+        } catch (const ngraph::CheckFailure&) {
             // optimization decission was made to create .bin file upfront and
             // write to it directly instead of buffering its content in memory,
             // hence we need to delete it here in case of failure
diff --git a/inference-engine/src/vpu/common/CMakeLists.txt b/inference-engine/src/vpu/common/CMakeLists.txt
index 71c727b631a..d8b55be4825 100644
--- a/inference-engine/src/vpu/common/CMakeLists.txt
+++ b/inference-engine/src/vpu/common/CMakeLists.txt
@@ -15,7 +15,7 @@ function(add_common_target TARGET_NAME STATIC_IE)
         UNITY
     )
 
-    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    if(CMAKE_COMPILER_IS_GNUCXX)
         # TODO: enable some day and fix all warnings
 #        target_compile_options(${TARGET_NAME} PRIVATE "-Wall")
         target_compile_options(${TARGET_NAME} PRIVATE "-Werror=unused-function")
diff --git a/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp b/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp
index 745613c977e..ada40a74d84 100644
--- a/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp
+++ b/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp
@@ -11,24 +11,33 @@
 
 namespace vpu {
 
-template<class Key, class Value, template<class...> class Map>
-inline std::vector<Key> getKeys(const Map<Key, Value>& map) {
+template <template <typename, typename...> class Map,
+          typename Key,
+          typename Value,
+          typename... AddParams>
+inline std::vector<Key> getKeys(const Map<Key, Value, AddParams...>& map) {
     auto keys = std::vector<Key>{};
     keys.reserve(map.size());
     std::transform(map.cbegin(), map.cend(), std::back_inserter(keys), [](const std::pair<Key, Value>& entry) { return entry.first; });
     return keys;
 }
 
-template<class Key, class Value, template<class...> class Map>
-inline std::vector<Value> getValues(const Map<Key, Value>& map) {
+template <template <typename, typename...> class Map,
+          typename Key,
+          typename Value,
+          typename... AddParams>
+inline std::vector<Value> getValues(const Map<Key, Value, AddParams...>& map) {
     auto values = std::vector<Value>{};
     values.reserve(map.size());
     std::transform(map.cbegin(), map.cend(), std::back_inserter(values), [](const std::pair<Key, Value>& entry) { return entry.second; });
     return values;
 }
 
-template<class Key, class Value, template<class...> class Map>
-inline Map<Value, Key> inverse(const Map<Key, Value>& map) {
+template <template <typename, typename...> class Map,
+          typename Key,
+          typename Value,
+          typename... AddParams>
+inline Map<Value, Key> inverse(const Map<Key, Value, AddParams...>& map) {
     auto inverted = Map<Value, Key>{};
     for (const auto& entry : map) {
         const auto& insertion = inverted.emplace(entry.second, entry.first);
diff --git a/inference-engine/src/vpu/common/src/configuration/options/tiling_cmx_limit_kb.cpp b/inference-engine/src/vpu/common/src/configuration/options/tiling_cmx_limit_kb.cpp
index 3982b5eb3fd..84160dcec1a 100644
--- a/inference-engine/src/vpu/common/src/configuration/options/tiling_cmx_limit_kb.cpp
+++ b/inference-engine/src/vpu/common/src/configuration/options/tiling_cmx_limit_kb.cpp
@@ -18,7 +18,7 @@ void TilingCMXLimitKBOption::validate(const std::string& value) {
     int intValue;
     try {
         intValue = std::stoi(value);
-    } catch (const std::exception& e) {
+    } catch (const std::exception&) {
         VPU_THROW_FORMAT(R"(unexpected {} option value "{}", must be a number)", key(), value);
     }
 
@@ -54,7 +54,7 @@ TilingCMXLimitKBOption::value_type TilingCMXLimitKBOption::parse(const std::stri
     int intValue;
     try {
         intValue = std::stoi(value);
-    } catch (const std::exception& e) {
+    } catch (const std::exception&) {
         VPU_THROW_FORMAT(R"(unexpected {} option value "{}", must be a number)", key(), value);
     }
 
diff --git a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
index dd7de66f5c3..8fdae7418b4 100644
--- a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
+++ b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt
@@ -20,7 +20,7 @@ function(add_graph_transformer_target TARGET_NAME STATIC_IE)
     set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE)
     set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE)
 
-    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    if(CMAKE_COMPILER_IS_GNUCXX)
         # TODO: enable some day and fix all warnings
 #        target_compile_options(${TARGET_NAME} PRIVATE "-Wall")
         target_compile_options(${TARGET_NAME} PRIVATE "-Werror=unused-function")
diff --git a/inference-engine/src/vpu/myriad_plugin/CMakeLists.txt b/inference-engine/src/vpu/myriad_plugin/CMakeLists.txt
index b2a18d15de3..66e15697baa 100644
--- a/inference-engine/src/vpu/myriad_plugin/CMakeLists.txt
+++ b/inference-engine/src/vpu/myriad_plugin/CMakeLists.txt
@@ -23,7 +23,7 @@ target_include_directories(${TARGET_NAME}
     PRIVATE
         "${CMAKE_CURRENT_SOURCE_DIR}")
 
-if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+if(CMAKE_COMPILER_IS_GNUCXX)
     target_compile_options(${TARGET_NAME}
         PRIVATE
             -Wall)
diff --git a/inference-engine/tests/functional/inference_engine/CMakeLists.txt b/inference-engine/tests/functional/inference_engine/CMakeLists.txt
index 4154e95ba86..6b556455897 100644
--- a/inference-engine/tests/functional/inference_engine/CMakeLists.txt
+++ b/inference-engine/tests/functional/inference_engine/CMakeLists.txt
@@ -4,7 +4,7 @@
 
 set(TARGET_NAME ieFuncTests)
 
-set(INCLUDES ${IE_MAIN_SOURCE_DIR}/src/inference_engine)
+set(INCLUDES ${IE_MAIN_SOURCE_DIR}/src/inference_engine/src)
 
 set(LINK_LIBRARIES
     gmock
@@ -25,13 +25,21 @@ set(DEPENDENCIES
     mock_engine
     inference_engine_ir_reader
     inference_engine_ir_v7_reader
+    HeteroPlugin
+    AutoPlugin
+    MultiDevicePlugin
     template_extension
     lptNgraphFunctions
     sharedTestClasses
+    test_model_zoo
 )
 
-if (NOT NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_USE_PROTOBUF_LITE)
-    set(EXCLUDED_SOURCE_PATHS "${CMAKE_CURRENT_SOURCE_DIR}/onnx_reader")
+if (NOT NGRAPH_ONNX_IMPORT_ENABLE)
+    list(APPEND EXCLUDED_SOURCE_PATHS "${CMAKE_CURRENT_SOURCE_DIR}/onnx_reader")
+endif()
+
+if (NOT NGRAPH_PDPD_FRONTEND_ENABLE)
+    list(APPEND EXCLUDED_SOURCE_PATHS "${CMAKE_CURRENT_SOURCE_DIR}/paddle_reader")
 endif()
 
 addIeTargetTest(
@@ -51,13 +59,14 @@ set_ie_threading_interface_for(${TARGET_NAME})
 if(NGRAPH_ONNX_IMPORT_ENABLE)
     target_compile_definitions(${TARGET_NAME} PRIVATE
         NGRAPH_ONNX_IMPORT_ENABLE
-        ONNX_TEST_MODELS="${CMAKE_CURRENT_SOURCE_DIR}/onnx_reader/models/")
+        ONNX_TEST_MODELS="${TEST_MODEL_ZOO}/onnx_reader/models/")
     add_dependencies(${TARGET_NAME} inference_engine_onnx_reader)
 endif()
 
 if(NGRAPH_PDPD_FRONTEND_ENABLE)
     target_compile_definitions(${TARGET_NAME} PRIVATE
-        PDPD_TEST_MODELS="${CMAKE_CURRENT_SOURCE_DIR}/pdpd_reader/models/")
+            PADDLE_TEST_MODELS="${CMAKE_CURRENT_SOURCE_DIR}/paddle_reader/models/")
+    add_dependencies(${TARGET_NAME} paddlepaddle_ngraph_frontend)
 endif()
 
 ie_faster_build(${TARGET_NAME}
@@ -101,7 +110,7 @@ function(ie_headers_compilation_with_custom_flags)
     cmake_parse_arguments(IE_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
     if(IE_TEST_PLUGIN_API)
-        set(IE_TEST_INCLUDE_DIRECTORY 
+        set(IE_TEST_INCLUDE_DIRECTORY
             $<TARGET_PROPERTY:inference_engine_plugin_api,INTERFACE_INCLUDE_DIRECTORIES>)
     else()
         set(IE_TEST_INCLUDE_DIRECTORY
@@ -214,7 +223,7 @@ ie_headers_compilation_with_custom_flags(TEST_SUFFIX Cxx17
 if(UNIX)
     if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
         ie_headers_compilation_with_custom_flags(TEST_SUFFIX WarningsAreErrors
-                                                 FLAGS -Werror-all -Werror -Wall)
+                                                 FLAGS -Wremarks -Wcheck -Werror-all -Werror -Wall)
     else()
         ie_headers_compilation_with_custom_flags(TEST_SUFFIX Pedantic FLAGS -Wpedantic)
     endif()
@@ -254,4 +263,5 @@ else()
 endif()
 
 # ir serialization functional tests variables
-target_compile_definitions(${TARGET_NAME} PRIVATE IR_SERIALIZATION_MODELS_PATH="${CMAKE_CURRENT_SOURCE_DIR}/ir_serialization/models/")
+target_compile_definitions(${TARGET_NAME} PRIVATE
+    IR_SERIALIZATION_MODELS_PATH="${TEST_MODEL_ZOO}/ir_serialization/models/")
diff --git a/inference-engine/tests/functional/inference_engine/cnn_network/cnn_ngraph_impl_tests.cpp b/inference-engine/tests/functional/inference_engine/cnn_network/cnn_ngraph_impl_tests.cpp
index f25b38037fe..40f19415754 100644
--- a/inference-engine/tests/functional/inference_engine/cnn_network/cnn_ngraph_impl_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/cnn_network/cnn_ngraph_impl_tests.cpp
@@ -56,6 +56,26 @@ TEST(CNNNGraphImplTests, TestReshapeWithSameShape) {
     ASSERT_NO_THROW(net.reshape({{"input", SizeVector({1, 4000})}}));
 }
 
+TEST(CNNNGraphImplTests, TestTwoResultsFromOneTensor) {
+    std::shared_ptr<ngraph::Function> ngraph;
+    {
+        ngraph::PartialShape shape({1, 3, 22, 22});
+        ngraph::element::Type type(ngraph::element::Type_t::f32);
+        auto param = std::make_shared<ngraph::op::Parameter>(type, shape);
+        auto relu = std::make_shared<ngraph::op::Relu>(param);
+        auto result1 = std::make_shared<ngraph::op::Result>(relu);
+        auto result2 = std::make_shared<ngraph::op::Result>(relu);
+
+        ngraph::ParameterVector params = {param};
+        ngraph::ResultVector results = {result1, result2};
+
+        ngraph = std::make_shared<ngraph::Function>(results, params);
+    }
+
+    InferenceEngine::CNNNetwork cnnNet(ngraph);
+    ASSERT_NO_THROW(auto convertedNet = std::make_shared<details::CNNNetworkImpl>(cnnNet));
+}
+
 TEST(CNNNGraphImplTests, TestInvalidReshape) {
     std::shared_ptr<ngraph::Function> f;
     {
diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/cleanup.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/cleanup.cpp
index 692722023c7..734c96089cf 100644
--- a/inference-engine/tests/functional/inference_engine/ir_serialization/cleanup.cpp
+++ b/inference-engine/tests/functional/inference_engine/ir_serialization/cleanup.cpp
@@ -9,10 +9,6 @@
 #include "ie_core.hpp"
 #include "ngraph_functions/builders.hpp"
 
-#ifndef IR_SERIALIZATION_MODELS_PATH  // should be already defined by cmake
-#define IR_SERIALIZATION_MODELS_PATH ""
-#endif
-
 class SerializationCleanupTest : public CommonTestUtils::TestsCommon {
 protected:
     const std::string test_name = GetTestName() + "_" + GetTimestamp();
@@ -21,7 +17,7 @@ protected:
 
     void TearDown() override {
         std::remove(m_out_xml_path.c_str());
-        std::remove(m_out_xml_path.c_str());
+        std::remove(m_out_bin_path.c_str());
     }
 };
 
@@ -49,15 +45,15 @@ TEST_F(SerializationCleanupTest, SerializationShouldWork) {
     ASSERT_TRUE(std::ifstream(m_out_bin_path, std::ios::in).good());
 }
 
-TEST_F(SerializationCleanupTest, SerializationShouldFail) {
+TEST_F(SerializationCleanupTest, SerializationShouldWorkWithDynamicFunction) {
     const auto f =
-        CreateTestFunction("DynamicFunction", ngraph::PartialShape::dynamic());
+        CreateTestFunction("DynamicFunction",
+                           ngraph::PartialShape{ngraph::Dimension()});
 
     const InferenceEngine::CNNNetwork net{f};
-    ASSERT_THROW(net.serialize(m_out_xml_path, m_out_bin_path),
-                 InferenceEngine::Exception);
+    net.serialize(m_out_xml_path, m_out_bin_path);
 
-    // .xml & .bin files shouldn't be present
-    ASSERT_FALSE(std::ifstream(m_out_xml_path, std::ios::in).good());
-    ASSERT_FALSE(std::ifstream(m_out_bin_path, std::ios::in).good());
+    // .xml & .bin files should be present
+    ASSERT_TRUE(std::ifstream(m_out_xml_path, std::ios::in).good());
+    ASSERT_TRUE(std::ifstream(m_out_bin_path, std::ios::in).good());
 }
\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/const_compression.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/const_compression.cpp
index 6871736ed24..4ab43af6536 100644
--- a/inference-engine/tests/functional/inference_engine/ir_serialization/const_compression.cpp
+++ b/inference-engine/tests/functional/inference_engine/ir_serialization/const_compression.cpp
@@ -11,10 +11,6 @@
 #include <ngraph/function.hpp>
 #include <transformations/serialize.hpp>
 
-#ifndef IR_SERIALIZATION_MODELS_PATH // should be already defined by cmake
-#define IR_SERIALIZATION_MODELS_PATH ""
-#endif
-
 class SerializatioConstantCompressionTest : public ::testing::Test {
 protected:
     std::string test_name =
diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp
index 1e9f493b0b1..08de701d499 100644
--- a/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp
+++ b/inference-engine/tests/functional/inference_engine/ir_serialization/custom_ops.cpp
@@ -8,13 +8,14 @@
 #include <ie_api.h>
 #include <ie_iextension.h>
 #include <ie_network_reader.hpp>
+#include "common_test_utils/file_utils.hpp"
 #include "common_test_utils/ngraph_test_utils.hpp"
 #include "ie_core.hpp"
 #include "ngraph/ngraph.hpp"
 #include "transformations/serialize.hpp"
 
 #ifndef IR_SERIALIZATION_MODELS_PATH  // should be already defined by cmake
-#define IR_SERIALIZATION_MODELS_PATH ""
+# error "IR_SERIALIZATION_MODELS_PATH is not defined"
 #endif
 
 #ifndef IE_BUILD_POSTFIX  // should be already defined by cmake
@@ -40,7 +41,8 @@ protected:
 };
 
 TEST_F(CustomOpsSerializationTest, CustomOpUser_MO) {
-    const std::string model = IR_SERIALIZATION_MODELS_PATH "custom_op.xml";
+    const std::string model = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "custom_op.xml");
 
     InferenceEngine::Core ie;
     ie.AddExtension(
@@ -62,7 +64,8 @@ TEST_F(CustomOpsSerializationTest, CustomOpUser_MO) {
 #ifdef NGRAPH_ONNX_IMPORT_ENABLE
 
 TEST_F(CustomOpsSerializationTest, CustomOpUser_ONNXImporter) {
-    const std::string model = IR_SERIALIZATION_MODELS_PATH "custom_op.prototxt";
+    const std::string model = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "custom_op.onnx");
 
     InferenceEngine::Core ie;
     ie.AddExtension(
@@ -84,7 +87,8 @@ TEST_F(CustomOpsSerializationTest, CustomOpUser_ONNXImporter) {
 #endif
 
 TEST_F(CustomOpsSerializationTest, CustomOpTransformation) {
-    const std::string model = IR_SERIALIZATION_MODELS_PATH "custom_op.xml";
+    const std::string model = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "custom_op.xml");
 
     InferenceEngine::Core ie;
     auto extension =
@@ -123,7 +127,8 @@ public:
 };
 
 TEST_F(CustomOpsSerializationTest, CustomOpNoExtensions) {
-    const std::string model = IR_SERIALIZATION_MODELS_PATH "custom_op.xml";
+    const std::string model = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "custom_op.xml");
 
     InferenceEngine::Core ie;
     auto extension = std::make_shared<FrameworkNodeExtension>();
diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/deterministicity.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/deterministicity.cpp
index cbcd1b3093d..1a010a15c87 100644
--- a/inference-engine/tests/functional/inference_engine/ir_serialization/deterministicity.cpp
+++ b/inference-engine/tests/functional/inference_engine/ir_serialization/deterministicity.cpp
@@ -4,12 +4,13 @@
 
 #include <fstream>
 
+#include "common_test_utils/file_utils.hpp"
 #include "common_test_utils/ngraph_test_utils.hpp"
 #include "gtest/gtest.h"
 #include "ie_core.hpp"
 
 #ifndef IR_SERIALIZATION_MODELS_PATH  // should be already defined by cmake
-#define IR_SERIALIZATION_MODELS_PATH ""
+# error "IR_SERIALIZATION_MODELS_PATH is not defined"
 #endif
 
 class SerializationDeterministicityTest : public ::testing::Test {
@@ -49,7 +50,8 @@ protected:
 #ifdef NGRAPH_ONNX_IMPORT_ENABLE
 
 TEST_F(SerializationDeterministicityTest, BasicModel) {
-    const std::string model = IR_SERIALIZATION_MODELS_PATH "add_abc.prototxt";
+    const std::string model = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "add_abc.onnx");
 
     InferenceEngine::Core ie;
     auto expected = ie.ReadNetwork(model);
@@ -66,8 +68,8 @@ TEST_F(SerializationDeterministicityTest, BasicModel) {
 }
 
 TEST_F(SerializationDeterministicityTest, ModelWithMultipleLayers) {
-    const std::string model =
-        IR_SERIALIZATION_MODELS_PATH "addmul_abc.prototxt";
+    const std::string model = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "addmul_abc.onnx");
 
     InferenceEngine::Core ie;
     auto expected = ie.ReadNetwork(model);
@@ -86,10 +88,10 @@ TEST_F(SerializationDeterministicityTest, ModelWithMultipleLayers) {
 #endif
 
 TEST_F(SerializationDeterministicityTest, ModelWithMultipleOutputs) {
-    const std::string model =
-        IR_SERIALIZATION_MODELS_PATH "split_equal_parts_2d.xml";
-    const std::string weights =
-        IR_SERIALIZATION_MODELS_PATH "split_equal_parts_2d.bin";
+    const std::string model = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "split_equal_parts_2d.xml");
+    const std::string weights = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "split_equal_parts_2d.bin");
 
     InferenceEngine::Core ie;
     auto expected = ie.ReadNetwork(model, weights);
@@ -106,10 +108,10 @@ TEST_F(SerializationDeterministicityTest, ModelWithMultipleOutputs) {
 }
 
 TEST_F(SerializationDeterministicityTest, ModelWithConstants) {
-    const std::string model =
-        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.xml";
-    const std::string weights =
-        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.bin";
+    const std::string model = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.xml");
+    const std::string weights = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.bin");
 
     InferenceEngine::Core ie;
     auto expected = ie.ReadNetwork(model, weights);
@@ -126,10 +128,10 @@ TEST_F(SerializationDeterministicityTest, ModelWithConstants) {
 }
 
 TEST_F(SerializationDeterministicityTest, SerializeToStream) {
-    const std::string model =
-        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.xml";
-    const std::string weights =
-        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.bin";
+    const std::string model = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.xml");
+    const std::string weights = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.bin");
 
     std::stringstream m_out_xml_buf, m_out_bin_buf;
     InferenceEngine::Blob::Ptr binBlob;
@@ -154,10 +156,10 @@ TEST_F(SerializationDeterministicityTest, SerializeToStream) {
 }
 
 TEST_F(SerializationDeterministicityTest, SerializeToBlob) {
-    const std::string model =
-        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.xml";
-    const std::string weights =
-        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.bin";
+    const std::string model = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.xml");
+    const std::string weights = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "add_abc_initializers.bin");
 
     std::stringstream m_out_xml_buf;
     InferenceEngine::Blob::Ptr m_out_bin_buf;
diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/models/dynamic_input_shape.xml b/inference-engine/tests/functional/inference_engine/ir_serialization/models/dynamic_input_shape.xml
new file mode 100644
index 00000000000..be043ade39b
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/ir_serialization/models/dynamic_input_shape.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<net name="Function_0" version="10">
+	<layers>
+		<layer id="0" name="Parameter_68" type="Parameter" version="opset1">
+			<data shape="1,-1,3" element_type="f32" />
+			<output>
+				<port id="0" precision="FP32">
+					<dim>1</dim>
+					<dim>-1</dim>
+					<dim>3</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="1" name="Relu_70" type="Relu" version="opset1">
+			<data />
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>-1</dim>
+					<dim>3</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1" precision="FP32">
+					<dim>1</dim>
+					<dim>-1</dim>
+					<dim>3</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="2" name="Result_72" type="Result" version="opset1">
+			<data />
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>-1</dim>
+					<dim>3</dim>
+				</port>
+			</input>
+		</layer>
+	</layers>
+	<edges>
+		<edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
+		<edge from-layer="1" from-port="1" to-layer="2" to-port="0" />
+	</edges>
+</net>
diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/serialize.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/serialize.cpp
index c9efc087a31..49c9baa9f1e 100644
--- a/inference-engine/tests/functional/inference_engine/ir_serialization/serialize.cpp
+++ b/inference-engine/tests/functional/inference_engine/ir_serialization/serialize.cpp
@@ -5,11 +5,12 @@
 #include <fstream>
 
 #include "common_test_utils/ngraph_test_utils.hpp"
+#include "common_test_utils/file_utils.hpp"
 #include "gtest/gtest.h"
 #include "ie_core.hpp"
 
 #ifndef IR_SERIALIZATION_MODELS_PATH  // should be already defined by cmake
-#define IR_SERIALIZATION_MODELS_PATH ""
+# error "IR_SERIALIZATION_MODELS_PATH is not defined"
 #endif
 
 typedef std::tuple<std::string, std::string> SerializationParams;
@@ -23,9 +24,11 @@ public:
     std::string m_out_bin_path;
 
     void SetUp() override {
-        m_model_path = IR_SERIALIZATION_MODELS_PATH + std::get<0>(GetParam());
+        m_model_path = CommonTestUtils::getModelFromTestModelZoo(
+            IR_SERIALIZATION_MODELS_PATH + std::get<0>(GetParam()));
         if (!std::get<1>(GetParam()).empty()) {
-            m_binary_path = IR_SERIALIZATION_MODELS_PATH + std::get<1>(GetParam());
+            m_binary_path = CommonTestUtils::getModelFromTestModelZoo(
+                IR_SERIALIZATION_MODELS_PATH + std::get<1>(GetParam()));
         }
 
         const std::string test_name =  GetTestName() + "_" + GetTimestamp();
@@ -69,6 +72,7 @@ INSTANTIATE_TEST_SUITE_P(IRSerialization, SerializationTest,
                         std::make_tuple("experimental_detectron_detection_output_opset6.xml", ""),
                         std::make_tuple("nms5.xml", "nms5.bin"),
                         std::make_tuple("shape_of.xml", ""),
+                        std::make_tuple("dynamic_input_shape.xml", ""),
                         std::make_tuple("pad_with_shape_of.xml", ""),
                         std::make_tuple("conv_with_rt_info.xml", ""),
                         std::make_tuple("loop_2d_add.xml", "loop_2d_add.bin"),
@@ -77,9 +81,9 @@ INSTANTIATE_TEST_SUITE_P(IRSerialization, SerializationTest,
 #ifdef NGRAPH_ONNX_IMPORT_ENABLE
 
 INSTANTIATE_TEST_SUITE_P(ONNXSerialization, SerializationTest,
-        testing::Values(std::make_tuple("add_abc.prototxt", ""),
-                        std::make_tuple("split_equal_parts_2d.prototxt", ""),
-                        std::make_tuple("addmul_abc.prototxt", ""),
-                        std::make_tuple("add_abc_initializers.prototxt", "")));
+        testing::Values(std::make_tuple("add_abc.onnx", ""),
+                        std::make_tuple("split_equal_parts_2d.onnx", ""),
+                        std::make_tuple("addmul_abc.onnx", ""),
+                        std::make_tuple("add_abc_initializers.onnx", "")));
 
 #endif
diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp
index cccad0f70a5..1a751b270ef 100644
--- a/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp
+++ b/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp
@@ -4,6 +4,7 @@
 
 #include <fstream>
 
+#include "common_test_utils/file_utils.hpp"
 #include "common_test_utils/ngraph_test_utils.hpp"
 #include "gtest/gtest.h"
 #include "ie_core.hpp"
@@ -12,7 +13,7 @@
 #include "pugixml.hpp"
 
 #ifndef IR_SERIALIZATION_MODELS_PATH  // should be already defined by cmake
-#define IR_SERIALIZATION_MODELS_PATH ""
+# error "IR_SERIALIZATION_MODELS_PATH is not defined"
 #endif
 
 class SerializationTensorIteratorTest : public ::testing::Test {
@@ -24,7 +25,7 @@ protected:
 
     void TearDown() override {
         std::remove(m_out_xml_path.c_str());
-        std::remove(m_out_xml_path.c_str());
+        std::remove(m_out_bin_path.c_str());
     }
 
     void serialize_and_compare(const std::string& model_path, InferenceEngine::Blob::Ptr weights) {
@@ -47,7 +48,8 @@ protected:
 };
 
 TEST_F(SerializationTensorIteratorTest, TiResnet) {
-    const std::string model_path = IR_SERIALIZATION_MODELS_PATH "ti_resnet.xml";
+    const std::string model_path = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "ti_resnet.xml");
 
     size_t weights_size = 8396840;
 
@@ -67,7 +69,8 @@ TEST_F(SerializationTensorIteratorTest, TiResnet) {
 }
 
 TEST_F(SerializationTensorIteratorTest, TiNegativeStride) {
-    const std::string model_path = IR_SERIALIZATION_MODELS_PATH "ti_negative_stride.xml";
+    const std::string model_path = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "ti_negative_stride.xml");
 
     size_t weights_size = 3149864;
 
@@ -87,8 +90,10 @@ TEST_F(SerializationTensorIteratorTest, TiNegativeStride) {
 }
 
 TEST_F(SerializationTensorIteratorTest, SerializationExternalPortIdInXmlFile) {
-    const std::string model_path = IR_SERIALIZATION_MODELS_PATH "loop_2d_add.xml";
-    const std::string binary_path = IR_SERIALIZATION_MODELS_PATH "loop_2d_add.bin";
+    const std::string model_path = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "loop_2d_add.xml");
+    const std::string binary_path = CommonTestUtils::getModelFromTestModelZoo(
+        IR_SERIALIZATION_MODELS_PATH "loop_2d_add.bin");
 
     InferenceEngine::Core ie;
     InferenceEngine::CNNNetwork expected;
diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/transformation.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/transformation.cpp
index 2567e506d20..9142d0ce491 100644
--- a/inference-engine/tests/functional/inference_engine/ir_serialization/transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/ir_serialization/transformation.cpp
@@ -8,9 +8,10 @@
 #include "ie_core.hpp"
 #include "ngraph/ngraph.hpp"
 #include "transformations/serialize.hpp"
+#include "common_test_utils/file_utils.hpp"
 
 #ifndef IR_SERIALIZATION_MODELS_PATH  // should be already defined by cmake
-#define IR_SERIALIZATION_MODELS_PATH ""
+# error "IR_SERIALIZATION_MODELS_PATH is not defined"
 #endif
 
 class SerializationTransformationTest : public ::testing::Test {
@@ -22,8 +23,10 @@ protected:
     std::shared_ptr<ngraph::Function> m_function;
 
     void SetUp() override {
-        const std::string model = IR_SERIALIZATION_MODELS_PATH "add_abc.xml";
-        const std::string weights = IR_SERIALIZATION_MODELS_PATH "add_abc.bin";
+        const std::string model = CommonTestUtils::getModelFromTestModelZoo(
+            IR_SERIALIZATION_MODELS_PATH "add_abc.xml");
+        const std::string weights = CommonTestUtils::getModelFromTestModelZoo(
+            IR_SERIALIZATION_MODELS_PATH "add_abc.bin");
         InferenceEngine::Core ie;
         m_function = ie.ReadNetwork(model, weights).getFunction();
     }
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp
index c88a17b32db..2b0ea5f013e 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/add_transformation.cpp
@@ -156,7 +156,8 @@ public:
             testValues.actual.dequantization2 << "_" <<
             testValues.constInput << "_" <<
             testValues.actual.constValues << "_" <<
-            testValues.additionalLayer;
+            testValues.additionalLayer << "_" <<
+            (testValues.params.updatePrecisions ? "true" : "false");
         return result.str();
     }
 };
@@ -776,7 +777,7 @@ INSTANTIATE_TEST_SUITE_P(
     AddTransformation::getTestCaseName);
 } // namespace testValues3
 
-namespace testValues4 {
+namespace spatialDimensions {
 const std::vector<std::pair<ngraph::PartialShape, ngraph::PartialShape>> inputShapes4D = {
     {{1, 2, 2, 2}, {1, 2, 2, 2}},
 };
@@ -784,7 +785,7 @@ const std::vector<std::pair<ngraph::PartialShape, ngraph::PartialShape>> inputSh
 const std::vector<AddTransformationTestValues> specialTestValues = {
     // constant input: Add -> Subtract
     {
-    ngraph::element::f32,
+        ngraph::element::f32,
         false,
         1,
         LayerTransformation::createParamsU8I8(),
@@ -842,10 +843,10 @@ INSTANTIATE_TEST_SUITE_P(
         ::testing::ValuesIn(inputShapes4D),
         ::testing::ValuesIn(specialTestValues)),
     AddTransformation::getTestCaseName);
-} // namespace testValues4
+} // namespace spatialDimensions
 
-namespace testValues5 {
-const std::vector<std::pair<ngraph::PartialShape, ngraph::PartialShape>> inputShapes4D = {
+namespace tensor2D {
+const std::vector<std::pair<ngraph::PartialShape, ngraph::PartialShape>> inputShapes = {
     {{4, 1}, {4, 1}},
 };
 
@@ -879,22 +880,26 @@ INSTANTIATE_TEST_SUITE_P(
     AddTransformation,
     ::testing::Combine(
         ::testing::ValuesIn(netPrecision),
-        ::testing::ValuesIn(inputShapes4D),
+        ::testing::ValuesIn(inputShapes),
         ::testing::ValuesIn(specialTestValues)),
     AddTransformation::getTestCaseName);
-} // namespace testValues5
+} // namespace tensor2D
+
+namespace oneBranchQuantizationFp16 {
+const std::vector<ngraph::element::Type> netPrecision = {
+    element::f16
+};
 
-namespace testValues6 {
 const std::vector<std::pair<ngraph::PartialShape, ngraph::PartialShape>> inputShapesWithDynamicChannels = {
+    {{1, 4, 16, 16}, {1, 4, 16, 16}},
     {{1, 4, 16, 16}, {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}},
     {
         {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()},
         {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}
-    },
-    { PartialShape::dynamic(), PartialShape::dynamic() }
+    }
 };
 
-const std::vector<AddTransformationTestValues> specialTestValues = {
+const std::vector<AddTransformationTestValues> testValues = {
     {
         ngraph::element::f32,
         false,
@@ -909,14 +914,37 @@ const std::vector<AddTransformationTestValues> specialTestValues = {
         },
         {
             ngraph::element::f32,
-            { },
+            { {ngraph::element::f32},  { 508.f }, { 0.25f } },
             ngraph::element::u8,
-            { {ngraph::element::f32},  { 127.f }, { 4.f }},
             { },
+            { {},  {}, { 4.f }},
             { }
         },
         ""
     },
+    {
+        ngraph::element::f32,
+        false,
+        -1,
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            { {ngraph::element::f32},  { 127.f }, { 4.f }},
+            ngraph::element::f32,
+            { },
+            { }
+        },
+        {
+            ngraph::element::u8,
+            { },
+            ngraph::element::f32,
+            { {ngraph::element::f32},  { 508.f }, { 0.25f } },
+            { {},  {}, { 4.f }},
+            { }
+        },
+        ""
+    },
+    // multiply with zero
     {
         ngraph::element::f32,
         false,
@@ -947,7 +975,220 @@ INSTANTIATE_TEST_SUITE_P(
     ::testing::Combine(
         ::testing::ValuesIn(netPrecision),
         ::testing::ValuesIn(inputShapesWithDynamicChannels),
-        ::testing::ValuesIn(specialTestValues)),
+        ::testing::ValuesIn(testValues)),
     AddTransformation::getTestCaseName);
-} // namespace testValues6
+} // namespace oneBranchQuantizationFp16
+
+namespace oneBranchQuantizationFp32 {
+const std::vector<ngraph::element::Type> netPrecision = {
+    element::f32,
+};
+
+const std::vector<std::pair<ngraph::PartialShape, ngraph::PartialShape>> inputShapesWithDynamicChannels = {
+    {{1, 4, 16, 16}, {1, 4, 16, 16}},
+    {{1, 4, 16, 16}, {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}},
+    {
+        {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()},
+        {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}
+    }
+};
+
+const std::vector<AddTransformationTestValues> testValues = {
+    {
+        ngraph::element::f32,
+        false,
+        -1,
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::f32,
+            { },
+            ngraph::element::u8,
+            { {ngraph::element::f32},  { 127.f }, { 4.f }},
+            { }
+        },
+        {
+            ngraph::element::f32,
+            { {},  { 508.f }, { 0.25f } },
+            ngraph::element::u8,
+            { },
+            { {},  {}, { 4.f }},
+            { }
+        },
+        ""
+    },
+    {
+        ngraph::element::f32,
+        false,
+        -1,
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            { {ngraph::element::f32},  { 127.f }, { 4.f }},
+            ngraph::element::f32,
+            { },
+            { }
+        },
+        {
+            ngraph::element::u8,
+            { },
+            ngraph::element::f32,
+            { {},  { 508.f }, { 0.25f } },
+            { {},  {}, { 4.f }},
+            { }
+        },
+        ""
+    },
+    // multiply with zero
+    {
+        ngraph::element::f32,
+        false,
+        -1,
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::f32,
+            { },
+            ngraph::element::u8,
+            { {ngraph::element::f32},  { {7.f, 8.f, 9.f, 10.f} }, { {1.f, 0.f, 2.f, 3.f} }},
+            { }
+        },
+        {
+            ngraph::element::f32,
+            { },
+            ngraph::element::u8,
+            { {ngraph::element::f32},  { {7.f, 8.f, 9.f, 10.f} }, { {1.f, 0.f, 2.f, 3.f} }},
+            { },
+            { }
+        },
+        ""
+    },
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_LPT,
+    AddTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(inputShapesWithDynamicChannels),
+        ::testing::ValuesIn(testValues)),
+        AddTransformation::getTestCaseName);
+} // namespace oneBranchQuantizationFp32
+
+namespace oneBranchQuantizationFp32DontUpdatePrecision {
+const std::vector<ngraph::element::Type> netPrecision = {
+    element::f32,
+};
+
+const std::vector<std::pair<ngraph::PartialShape, ngraph::PartialShape>> inputShapesWithDynamicChannels = {
+    {{1, 4, 16, 16}, {1, 4, 16, 16}},
+    {{1, 4, 16, 16}, {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}},
+    {
+        {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()},
+        {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}
+    }
+};
+
+const std::vector<AddTransformationTestValues> testValues = {
+    // FP32 model, quantized branch: 1
+    {
+        ngraph::element::f32,
+        false,
+        -1,
+        LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
+        {
+            ngraph::element::f32,
+            { },
+            ngraph::element::f32,
+            { {ngraph::element::f32},  { 127.f }, { 4.f }},
+            { }
+        },
+        {
+            ngraph::element::f32,
+            { {},  { 508.f }, { 0.25f } },
+            ngraph::element::f32,
+            { },
+            { {},  {}, { 4.f }},
+            { }
+        },
+        ""
+    },
+    // FP32 model, quantized branch: 0
+    {
+        ngraph::element::f32,
+        false,
+        -1,
+        LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
+        {
+            ngraph::element::f32,
+            { {ngraph::element::f32},  { 127.f }, { 4.f }},
+            ngraph::element::f32,
+            { },
+            { }
+        },
+        {
+            ngraph::element::f32,
+            { },
+            ngraph::element::f32,
+            { {},  { 508.f }, { 0.25f } },
+            { {},  {}, { 4.f }},
+            { }
+        },
+        ""
+    },
+    // INT8 model (FQ decomposition before LPT), quantized branch: 1
+    {
+        ngraph::element::f32,
+        false,
+        -1,
+        LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
+        {
+            ngraph::element::f32,
+            { },
+            ngraph::element::u8,
+            { {ngraph::element::f32},  { 127.f }, { 4.f }},
+            { }
+        },
+        {
+            ngraph::element::f32,
+            { {},  { 508.f }, { 0.25f } },
+            ngraph::element::u8,
+            { },
+            { {},  {}, { 4.f }},
+            { }
+        },
+        ""
+    },
+    // INT8 model (FQ decomposition before LPT), quantized branch: 0
+    {
+        ngraph::element::f32,
+        false,
+        -1,
+        LayerTransformation::createParamsU8I8().setUpdatePrecisions(false),
+        {
+            ngraph::element::u8,
+            { {ngraph::element::f32},  { 127.f }, { 4.f }},
+            ngraph::element::f32,
+            { },
+            { }
+        },
+        {
+            ngraph::element::u8,
+            { },
+            ngraph::element::f32,
+            { {},  { 508.f }, { 0.25f } },
+            { {},  {}, { 4.f }},
+            { }
+        },
+        ""
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_LPT,
+    AddTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecision),
+        ::testing::ValuesIn(inputShapesWithDynamicChannels),
+        ::testing::ValuesIn(testValues)),
+        AddTransformation::getTestCaseName);
+} // namespace oneBranchQuantizationFp32DontUpdatePrecision
 } // namespace
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp
index 8b1c65fc439..9c539b7504a 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp
@@ -7,22 +7,18 @@
 #include <sstream>
 #include <memory>
 #include <vector>
-
 #include <gtest/gtest.h>
 
-#include <transformations/utils/utils.hpp>
-#include <transformations/init_node_info.hpp>
-
-#include <low_precision/low_precision.hpp>
+#include <low_precision/rt_info/precision_preserved_attribute.hpp>
+#include <low_precision/rt_info/intervals_alignment_attribute.hpp>
+#include <low_precision/rt_info/quantization_alignment_attribute.hpp>
 
+#include <low_precision/common/operation_precision_restriction.hpp>
+#include <low_precision/common/operation_per_tensor_quantization_restriction.hpp>
 #include <low_precision/concat.hpp>
 #include <low_precision/fake_quantize_decomposition.hpp>
-#include <low_precision/rt_info/precision_preserved_attribute.hpp>
-#include <low_precision/align_quantization_parameters.hpp>
 #include <low_precision/fuse_subtract_to_fake_quantize.hpp>
 #include <low_precision/fuse_multiply_to_fake_quantize.hpp>
-#include <low_precision/markup_can_be_quantized.hpp>
-#include <low_precision/markup_per_tensor_quantization.hpp>
 
 #include "common_test_utils/ngraph_test_utils.hpp"
 #include "lpt_ngraph_functions/concat_function.hpp"
diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/reshape_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/reshape_transformation.cpp
index 8383c79267a..377c8ca043b 100644
--- a/inference-engine/tests/functional/inference_engine/lp_transformations/reshape_transformation.cpp
+++ b/inference-engine/tests/functional/inference_engine/lp_transformations/reshape_transformation.cpp
@@ -140,7 +140,7 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
             {{ngraph::element::f32}, {}, {0.1f}}
         }
     },
-    // U8: 3D -> 4D: dynamic rank
+    // U8: 3D -> 4D: dynamic rank: per tensor quantization
     {
         PartialShape::dynamic(),
         { 0, 384, 16, 64 },
@@ -151,7 +151,39 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
         },
         {
             ngraph::element::u8,
-            {{ngraph::element::f32}, {}, {0.1f}},
+            {},
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {0.1f}}
+        }
+    },
+    // U8: 3D -> 4D: dynamic rank: per tensor quantization
+    {
+        PartialShape::dynamic(),
+        { 0, 384, 16, 64 },
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128}, {0.1f}}
+        },
+        {
+            ngraph::element::u8,
+            {},
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {128}, {0.1f}}
+        }
+    },
+    // U8: 3D -> 4D: dynamic rank
+    {
+        PartialShape::dynamic(),
+        { 0, 3, 16, 64 },
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.1f, 0.2f, 0.3f}, element::f32, {1, 3, 1, 1}}}
+        },
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.1f, 0.2f, 0.3f}, element::f32, {1, 3, 1, 1}}},
             ngraph::element::f32,
             {}
         }
@@ -340,8 +372,7 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
             {}
         }
     },
-    // U8: no subtract 2D -> 4D: channels are affected: per tensor quantization
-    // TODO: story 38439
+    // U8: no subtract 4D -> 2D: channels are affected: per tensor quantization
     {
         { 1, 16, 384, 384 },
         { 6144, -1 },
@@ -352,12 +383,12 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
         },
         {
             ngraph::element::u8,
-            {{ngraph::element::f32}, {}, {0.1f}},
-            ngraph::element::f32,
-            {}
+            {},
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {0.1f}}
         }
     },
-    // U8: no subtract 2D -> 4D: channels are affected: per channel quantization
+    // U8: no subtract 4D -> 2D: channels are affected: per channel quantization
     {
         { 1, 3, 4, 5 },
         { 12, -1 },
@@ -437,8 +468,83 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
             {}
         }
     },
+    // U8: no subtract 4D -> 5D: channels are not affected: no subtract
+    {
+        { 1, 3, 4, 5 },
+        { 1, 3, 20, 1, 1},
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.1f, 0.2f, 0.3f}, ngraph::element::f32, {1, 3, 1, 1}}}
+        },
+        {
+            ngraph::element::u8,
+            {},
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.1f, 0.2f, 0.3f}, ngraph::element::f32, {1, 3, 1, 1, 1}}},
+        }
+    },
+    // U8: no subtract 4D -> 5D: channels are affected: no subtract
+    {
+        { 1, 3, 2, 3 },
+        { 1, 18, 1, 1, 1},
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.1f, 0.2f, 0.3f}, ngraph::element::f32, {1, 3, 1, 1}}}
+        },
+        {
+            ngraph::element::u8,
+            {},
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {},
+                {
+                    {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f},
+                    ngraph::element::f32,
+                    {1, 18, 1, 1, 1}
+                }
+            },
+        }
+    },
+    // U8: no subtract 4D -> 5D: channels are affected: no subtract
+    {
+        { 1, 3, 4, 5 },
+        { 1, 12, 1, 1, 5},
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.1f}, ngraph::element::f32, {}}}
+        },
+        {
+            ngraph::element::u8,
+            {},
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.1f}, ngraph::element::f32, {}}},
+        }
+    },
+    // U8: no subtract 4D -> 5D: channels are affected: no subtract
+    {
+        { 1, 3, 4, 5 },
+        { 1, 12, 1, 1, 5},
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {}, {{0.1f, 0.2f, 0.3f}, ngraph::element::f32, {1, 3, 1, 1}}}
+        },
+        {
+            ngraph::element::u8,
+            {},
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {},
+                {{0.1f, 0.1f, 0.1f, 0.1f, 0.2f, 0.2f, 0.2f, 0.2f, 0.3f, 0.3f, 0.3f, 0.3f}, ngraph::element::f32, {1, 12, 1, 1, 1}}
+            }
+        }
+    },
     // U8: no subtract 4D -> 2D: channels are not affected: per tensor quantization
-    // TODO: story 38439
     {
         { 1, 3, 4, 5 },
         { 0, -1 },
@@ -454,7 +560,7 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
             {{ngraph::element::f32}, {{128.f}, ngraph::element::f32, {}}, {{0.1f}, ngraph::element::f32, {}}}
         }
     },
-    // U8: no subtract 4D -> 2D: channels are not affected: per tensor quantization
+    // U8: no subtract 4D -> 2D: channels are affected: per channel quantization
     {
         { 1, 3, 2, 2 },
         { 0, -1 },
@@ -474,6 +580,26 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
             }
         }
     },
+    // U8: no subtract 4D -> 2D: channels are affected: per channel quantization
+    {
+        { 1, 3, 2, 2 },
+        { 0, -1 },
+        LayerTransformation::createParamsU8I8(),
+        {
+            ngraph::element::u8,
+            {{ngraph::element::f32}, {{0.f, 128.f, 255.f}, ngraph::element::f32, {3, 1, 1}}, {{0.1f, 0.2f, 0.3f}, ngraph::element::f32, {3, 1, 1}}}
+        },
+        {
+            ngraph::element::u8,
+            {},
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{0.f, 0.f, 0.f, 0.f, 128.f, 128.f, 128.f, 128.f, 255.f, 255.f, 255.f, 255.f}, ngraph::element::f32, {1, 12}},
+                {{0.1f, 0.1f, 0.1f, 0.1f, 0.2f, 0.2f, 0.2f, 0.2f, 0.3f, 0.3f, 0.3f, 0.3f}, ngraph::element::f32, {1, 12}}
+            }
+        }
+    },
     // U8: 4D -> 2D: per channel dq and dynamic batch
     {
         { Dimension::dynamic(), 3, 2, 2 },
@@ -485,9 +611,13 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
         },
         {
             ngraph::element::u8,
-            {{ngraph::element::f32}, {{0.f, 128.f, 255.f}, ngraph::element::f32, {1, 3, 1, 1}}, {{0.1f, 0.2f, 0.3f}, ngraph::element::f32, {1, 3, 1, 1}}},
-            ngraph::element::f32,
-            {}
+            {},
+            ngraph::element::u8,
+            {
+                {ngraph::element::f32},
+                {{0.f, 0.f, 0.f, 0.f, 128.f, 128.f, 128.f, 128.f, 255.f, 255.f, 255.f, 255.f}, ngraph::element::f32, {1, 12}},
+                {{0.1f, 0.1f, 0.1f, 0.1f, 0.2f, 0.2f, 0.2f, 0.2f, 0.3f, 0.3f, 0.3f, 0.3f}, ngraph::element::f32, {1, 12}}
+            }
         }
     },
     // U8: no subtract 4D -> 2D: channels are not affected: per tensor quantization
@@ -603,7 +733,7 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
             ngraph::element::u8,
             {{}, {}, {}},
             ngraph::element::u8,
-            {{ngraph::element::f32}, {}, {{0.1f}, ngraph::element::f32, {1ul}}}
+            {{ngraph::element::f32}, {}, {{0.1f}, ngraph::element::f32, {}}}
         }
     },
     // U8: no subtract 4D -> 2D
@@ -619,7 +749,7 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
             ngraph::element::u8,
             {{}, {}, {}},
             ngraph::element::u8,
-            {{ngraph::element::f32}, {}, {{0.1f}, ngraph::element::f32, {1, 1}}}
+            {{ngraph::element::f32}, {}, {{0.1f}, ngraph::element::f32, {}}}
         }
     },
     // U8: no subtract 4D -> 2D: channels are not affected
@@ -635,7 +765,7 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
             ngraph::element::u8,
             {{}, {}, {}},
             ngraph::element::u8,
-            {{ngraph::element::f32}, {}, {{0.1f}, ngraph::element::f32, {1, 1}}}
+            {{ngraph::element::f32}, {}, {{0.1f}, ngraph::element::f32, {}}}
         }
     },
     // U8: no subtract 4D -> 2D: channels are not affected, dynamic batch
@@ -651,7 +781,7 @@ const std::vector<ReshapeTransformationTestValues> testValues = {
             ngraph::element::u8,
             {{}, {}, {}},
             ngraph::element::u8,
-            {{ngraph::element::f32}, {}, {{0.1f}, ngraph::element::f32, {1, 1}}}
+            {{ngraph::element::f32}, {}, {{0.1f}, ngraph::element::f32, {}}}
         }
     },
     // U8: no subtract 4D -> 4D: channels are affected
diff --git a/inference-engine/tests/functional/inference_engine/ngraph_reader/abs_tests.cpp b/inference-engine/tests/functional/inference_engine/ngraph_reader/abs_tests.cpp
index eb49336ca03..add8234e22a 100644
--- a/inference-engine/tests/functional/inference_engine/ngraph_reader/abs_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reader/abs_tests.cpp
@@ -23,7 +23,7 @@ public:
     std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override {
         return std::make_shared<FakeAbs>(new_args.at(0));
     }
-    bool visit_attributes(ngraph::AttributeVisitor& visitor) override {
+    bool visit_attributes(ngraph::AttributeVisitor&) override {
         return true;
     }
 };
diff --git a/inference-engine/tests/functional/inference_engine/ngraph_reshape_tests.cpp b/inference-engine/tests/functional/inference_engine/ngraph_reshape_tests.cpp
index 03529fd6785..8ddefc82d60 100644
--- a/inference-engine/tests/functional/inference_engine/ngraph_reshape_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/ngraph_reshape_tests.cpp
@@ -141,6 +141,35 @@ TEST_F(NGraphReshapeTests, ReshapeSpatialReLU) {
     ASSERT_EQ(ngraph->get_results()[0]->get_shape(), ngraph::Shape({1, 3, 25, 25}));
 }
 
+TEST_F(NGraphReshapeTests, ReshapeSpatialReLUWithoutReplaceParameter) {
+    std::shared_ptr<ngraph::Function> ngraph;
+    {
+        ngraph::PartialShape shape({1, 3, 22, 22});
+        ngraph::element::Type type(ngraph::element::Type_t::f32);
+        auto param = std::make_shared<ngraph::op::Parameter>(type, shape);
+        auto relu = std::make_shared<ngraph::op::Relu>(param);
+        auto result = std::make_shared<ngraph::op::Result>(relu);
+
+        ngraph::ParameterVector params = {param};
+        ngraph::ResultVector results = {result};
+
+        ngraph = std::make_shared<ngraph::Function>(results, params);
+    }
+
+    ASSERT_EQ(ngraph->get_parameters()[0]->get_shape(), ngraph::Shape({1, 3, 22, 22}));
+    ASSERT_EQ(ngraph->get_results()[0]->get_shape(), ngraph::Shape({1, 3, 22, 22}));
+
+    {
+        ngraph->get_parameters()[0]->set_partial_shape({1, 3, 25, 25});
+
+        ngraph->validate_nodes_and_infer_types();
+    }
+
+    ASSERT_EQ(ngraph->get_parameters()[0]->get_shape(), ngraph::Shape({1, 3, 25, 25}));
+    ASSERT_EQ(ngraph->get_results()[0]->get_shape(), ngraph::Shape({1, 3, 25, 25}));
+}
+
+
 TEST_F(NGraphReshapeTests, CNNReshapeSpatialReLU) {
     std::shared_ptr<const ngraph::Function> ngraph;
     {
diff --git a/inference-engine/tests/functional/inference_engine/onnx_reader/model_support_tests.cpp b/inference-engine/tests/functional/inference_engine/onnx_reader/model_support_tests.cpp
index 579229ad634..0ed41ca973e 100644
--- a/inference-engine/tests/functional/inference_engine/onnx_reader/model_support_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/onnx_reader/model_support_tests.cpp
@@ -2,6 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "common_test_utils/file_utils.hpp"
+#include "ie_common.h"
 #include <gtest/gtest.h>
 #include <fstream>
 
@@ -12,7 +14,7 @@ namespace {
         std::string path = ONNX_TEST_MODELS;
         path += "support_test/";
         path += model;
-        return path;
+        return CommonTestUtils::getModelFromTestModelZoo(path);
     }
 }
 
@@ -37,13 +39,9 @@ TEST(ONNXReader_ModelSupported, varint_on_two_bytes) {
     EXPECT_NO_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("supported/varint_on_two_bytes.onnx")));
 }
 
-TEST(ONNXReader_ModelSupported, prototxt_basic) {
-    EXPECT_NO_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("supported/basic.prototxt")));
-}
-
 TEST(ONNXReader_ModelSupported, scrambled_keys) {
     // same as the prototxt_basic but with a different order of keys
-    EXPECT_NO_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("supported/scrambled_keys.prototxt")));
+    EXPECT_NO_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("supported/scrambled_keys.onnx")));
 }
 
 TEST(ONNXReader_ModelUnsupported, no_graph_field) {
@@ -65,8 +63,3 @@ TEST(ONNXReader_ModelUnsupported, unknown_wire_type) {
     EXPECT_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("unsupported/unknown_wire_type.onnx")),
                  InferenceEngine::Exception);
 }
-
-TEST(ONNXReader_ModelUnsupported, no_valid_keys) {
-    EXPECT_THROW(InferenceEngine::Core{}.ReadNetwork(model_path("unsupported/no_valid_keys.prototxt")),
-                 InferenceEngine::Exception);
-}
diff --git a/inference-engine/tests/functional/inference_engine/onnx_reader/models/support_test/supported/basic.onnx b/inference-engine/tests/functional/inference_engine/onnx_reader/models/support_test/supported/basic.onnx
deleted file mode 100644
index 2469457e577..00000000000
--- a/inference-engine/tests/functional/inference_engine/onnx_reader/models/support_test/supported/basic.onnx
+++ /dev/null
@@ -1,12 +0,0 @@
-:D
-
-xy"Cosh
-cosh_graphZ
-x
-
-
-b
-y
-
-
-B
\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/onnx_reader/models/support_test/unsupported/no_valid_keys.prototxt b/inference-engine/tests/functional/inference_engine/onnx_reader/models/support_test/unsupported/no_valid_keys.prototxt
deleted file mode 100644
index 28a5e83672d..00000000000
--- a/inference-engine/tests/functional/inference_engine/onnx_reader/models/support_test/unsupported/no_valid_keys.prototxt
+++ /dev/null
@@ -1,5 +0,0 @@
-james_bond: 007
-Shakira: "Waka Waka"
-blip {
-  bloop: 21,37
-}
diff --git a/inference-engine/tests/functional/inference_engine/onnx_reader/models/ひらがな日本語.prototxt b/inference-engine/tests/functional/inference_engine/onnx_reader/models/ひらがな日本語.prototxt
deleted file mode 100644
index a6f53724995..00000000000
--- a/inference-engine/tests/functional/inference_engine/onnx_reader/models/ひらがな日本語.prototxt
+++ /dev/null
@@ -1,97 +0,0 @@
-ir_version: 3
-producer_name: "nGraph ONNX Importer"
-graph {
-  node {
-    input: "A"
-    input: "B"
-    output: "X"
-    name: "multiply_node_1"
-    op_type: "Mul"
-  }
-  node {
-    input: "X"
-    input: "C"
-    output: "Y"
-    name: "multiply_node_2"
-    op_type: "Mul"
-  }
-  name: "test_graph"
-  initializer {
-    dims: 2
-    dims: 2
-    data_type: 1
-    name: "A"
-    external_data {
-        key: "location",
-        value: "data/tensor.data"
-    }
-    data_location: 1
-  }
-  input {
-    name: "A"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 2
-          }
-          dim {
-            dim_value: 2
-          }
-        }
-      }
-    }
-  }
-  input {
-    name: "B"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 2
-          }
-          dim {
-            dim_value: 2
-          }
-        }
-      }
-    }
-  }
-  input {
-    name: "C"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 2
-          }
-          dim {
-            dim_value: 2
-          }
-        }
-      }
-    }
-  }
-  output {
-    name: "Y"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 2
-          }
-          dim {
-            dim_value: 2
-          }
-        }
-      }
-    }
-  }
-}
-opset_import {
-  version: 4
-}
diff --git a/inference-engine/tests/functional/inference_engine/onnx_reader/onnx_reader.cpp b/inference-engine/tests/functional/inference_engine/onnx_reader/onnx_reader.cpp
deleted file mode 100644
index 4eb19cca1b3..00000000000
--- a/inference-engine/tests/functional/inference_engine/onnx_reader/onnx_reader.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <set>
-#include <string>
-#include <fstream>
-
-#include <ie_blob.h>
-#include <ie_core.hpp>
-#include <ngraph/ngraph.hpp>
-
-TEST(ONNX_Reader_Tests, ImportBasicModelToCore) {
-    std::string model = R"V0G0N(
-ir_version: 3
-producer_name: "nGraph ONNX Importer"
-graph {
-  node {
-    output: "B"
-    op_type: "Constant"
-    attribute {
-      name: "value"
-      t {
-        dims: 2
-        dims: 2
-        data_type: 1
-        float_data: 1
-        float_data: 2
-        float_data: 3
-        float_data: 4
-        name: "const_tensor"
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    input: "A"
-    input: "B"
-    output: "X"
-    name: "add_node1"
-    op_type: "Add"
-  }
-  node {
-    input: "X"
-    input: "C"
-    output: "Y"
-    name: "add_node2"
-    op_type: "Add"
-  }
-  name: "test_graph"
-  initializer {
-    dims: 2
-    dims: 2
-    data_type: 1
-    name: "A"
-    raw_data: "\000\000\200?\000\000\000@\000\000@@\000\000\200@"
-  }
-  input {
-    name: "A"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 2
-          }
-          dim {
-            dim_value: 2
-          }
-        }
-      }
-    }
-  }
-  input {
-    name: "C"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 2
-          }
-          dim {
-            dim_value: 2
-          }
-        }
-      }
-    }
-  }
-  output {
-    name: "Y"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 2
-          }
-          dim {
-            dim_value: 2
-          }
-        }
-      }
-    }
-  }
-}
-opset_import {
-  version: 4
-}
-)V0G0N";
-    InferenceEngine::Core ie;
-    InferenceEngine::Blob::CPtr weights;
-    auto cnnNetwork = ie.ReadNetwork(model, weights);
-    auto function = cnnNetwork.getFunction();
-
-    int count_additions = 0;
-    int count_constants = 0;
-    int count_parameters = 0;
-
-    for (auto op : function->get_ops()) {
-        const auto op_type = std::string(op->get_type_name());
-        count_additions += (op_type == "Add" ? 1 : 0);
-        count_constants += (op_type == "Constant" ? 1 : 0);
-        count_parameters += (op_type == "Parameter" ? 1 : 0);
-    }
-
-    ASSERT_EQ(function->get_output_size(), 1);
-    ASSERT_EQ(std::string(function->get_output_op(0)->get_type_name()), "Result");
-    ASSERT_EQ(function->get_output_element_type(0), ngraph::element::f32);
-    ASSERT_EQ(function->get_output_shape(0), ngraph::Shape({2, 2}));
-    ASSERT_EQ(count_additions, 2);
-    ASSERT_EQ(count_constants, 2);
-    ASSERT_EQ(count_parameters, 1);
-}
-
diff --git a/inference-engine/tests/functional/inference_engine/onnx_reader/onnx_reader_external_data.cpp b/inference-engine/tests/functional/inference_engine/onnx_reader/onnx_reader_external_data.cpp
index 2ae931d76b8..70b0e7537cc 100644
--- a/inference-engine/tests/functional/inference_engine/onnx_reader/onnx_reader_external_data.cpp
+++ b/inference-engine/tests/functional/inference_engine/onnx_reader/onnx_reader_external_data.cpp
@@ -12,11 +12,14 @@
 #include <ie_core.hpp>
 #include <file_utils.h>
 #include <streambuf>
+#include "common_test_utils/file_utils.hpp"
+#include "common_test_utils/unicode_utils.hpp"
 #include <ngraph/ngraph.hpp>
 
 TEST(ONNX_Reader_Tests, ImportModelWithExternalDataFromFile) {
     InferenceEngine::Core ie;
-    auto cnnNetwork = ie.ReadNetwork(std::string(ONNX_TEST_MODELS) + "onnx_external_data.prototxt", "");
+    auto cnnNetwork = ie.ReadNetwork(CommonTestUtils::getModelFromTestModelZoo(
+        std::string(ONNX_TEST_MODELS) + "onnx_external_data.onnx"), "");
     auto function = cnnNetwork.getFunction();
 
     int count_additions = 0;
@@ -48,8 +51,9 @@ TEST(ONNX_Reader_Tests, ImportModelWithExternalDataFromFile) {
 
 TEST(ONNX_Reader_Tests, ImportModelWithExternalDataFromStringException) {
     InferenceEngine::Core ie;
-    const auto path = std::string(ONNX_TEST_MODELS) + "onnx_external_data.prototxt";
-    InferenceEngine::Blob::CPtr weights; //not used
+    const auto path = CommonTestUtils::getModelFromTestModelZoo(
+        std::string(ONNX_TEST_MODELS) + "onnx_external_data.onnx");
+    InferenceEngine::Blob::CPtr weights; // not used
     std::ifstream stream(path, std::ios::binary);
     std::string modelAsString((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
     stream.close();
@@ -75,22 +79,28 @@ TEST(ONNX_Reader_Tests, ImportModelWithExternalDataFromStringException) {
 #if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
 TEST(ONNX_Reader_Tests, ImportModelWithExternalDataFromWstringNamedFile) {
     InferenceEngine::Core ie;
-    std::string win_dir_path = ONNX_TEST_MODELS;
-    std::replace(win_dir_path.begin(), win_dir_path.end(), '/', '\\');
-    const std::wstring unicode_win_dir_path = FileUtils::multiByteCharToWString(win_dir_path.c_str());
-    const std::wstring path = unicode_win_dir_path + L"ひらがな日本語.prototxt";
+    std::string win_dir_path = CommonTestUtils::getModelFromTestModelZoo(
+        ONNX_TEST_MODELS "onnx_external_data.onnx");
+    std::wstring wmodel = CommonTestUtils::addUnicodePostfixToPath(win_dir_path,
+        CommonTestUtils::test_unicode_postfix_vector[0]);
+    bool is_copy_successfully = CommonTestUtils::copyFile(win_dir_path, wmodel);
+    if (!is_copy_successfully) {
+        FAIL() << "Unable to copy from '" << win_dir_path << "' to '"
+                << FileUtils::wStringtoMBCSstringChar(wmodel) << "'";
+    }
 
-    auto cnnNetwork = ie.ReadNetwork(path, L"");
+    auto cnnNetwork = ie.ReadNetwork(wmodel, L"");
+    CommonTestUtils::removeFile(wmodel);
     auto function = cnnNetwork.getFunction();
 
-    int count_multiply = 0;
+    int count_add = 0;
     int count_constants = 0;
     int count_parameters = 0;
 
     std::shared_ptr<ngraph::Node> external_data_node;
     for (auto op : function->get_ops()) {
         const auto op_type = std::string(op->get_type_name());
-        count_multiply += (op_type == "Multiply" ? 1 : 0);
+        count_add += (op_type == "Add" ? 1 : 0);
         count_parameters += (op_type == "Parameter" ? 1 : 0);
         if (op_type == "Constant") {
             count_constants += 1;
@@ -102,7 +112,7 @@ TEST(ONNX_Reader_Tests, ImportModelWithExternalDataFromWstringNamedFile) {
     ASSERT_EQ(std::string(function->get_output_op(0)->get_type_name()), "Result");
     ASSERT_EQ(function->get_output_element_type(0), ngraph::element::f32);
     ASSERT_EQ(function->get_output_shape(0), ngraph::Shape({2, 2}));
-    ASSERT_EQ(count_multiply, 2);
+    ASSERT_EQ(count_add, 2);
     ASSERT_EQ(count_constants, 1);
     ASSERT_EQ(count_parameters, 2);
 
diff --git a/inference-engine/tests/functional/inference_engine/pdpd_reader/models/relu.pdmodel b/inference-engine/tests/functional/inference_engine/paddle_reader/models/relu.pdmodel
similarity index 100%
rename from inference-engine/tests/functional/inference_engine/pdpd_reader/models/relu.pdmodel
rename to inference-engine/tests/functional/inference_engine/paddle_reader/models/relu.pdmodel
diff --git a/inference-engine/tests/functional/inference_engine/pdpd_reader/read_pdpd_model_test.cpp b/inference-engine/tests/functional/inference_engine/paddle_reader/read_paddle_model_test.cpp
similarity index 83%
rename from inference-engine/tests/functional/inference_engine/pdpd_reader/read_pdpd_model_test.cpp
rename to inference-engine/tests/functional/inference_engine/paddle_reader/read_paddle_model_test.cpp
index 5ec2077da1e..ead189023f2 100644
--- a/inference-engine/tests/functional/inference_engine/pdpd_reader/read_pdpd_model_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/paddle_reader/read_paddle_model_test.cpp
@@ -13,9 +13,10 @@
 #include <ngraph/ngraph.hpp>
 #include <ngraph/opsets/opset8.hpp>
 #include "common_test_utils/ngraph_test_utils.hpp"
+#include "common_test_utils/unicode_utils.hpp"
 
 TEST(PDPD_Reader_Tests, ImportBasicModelToCore) {
-    auto model = std::string(PDPD_TEST_MODELS) + "relu.pdmodel";
+    auto model = std::string(PADDLE_TEST_MODELS) + "relu.pdmodel";
     InferenceEngine::Core ie;
     auto cnnNetwork = ie.ReadNetwork(model);
     auto function = cnnNetwork.getFunction();
@@ -43,17 +44,22 @@ TEST(PDPD_Reader_Tests, ImportBasicModelToCore) {
         "RefPDPDFunction");
     const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::NAMES);
     const FunctionsComparator::Result res = func_comparator(function, reference);
-    ASSERT_TRUE(res.valid);
+    ASSERT_TRUE(res.valid) << res.message;
 }
 
 #if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
 TEST(PDPD_Reader_Tests, ImportBasicModelToCoreWstring) {
-    std::string win_dir_path{ PDPD_TEST_MODELS };
-    std::replace(win_dir_path.begin(), win_dir_path.end(), '/', '\\');
-    const std::wstring unicode_win_dir_path = FileUtils::multiByteCharToWString(win_dir_path.c_str());
-    auto model = unicode_win_dir_path + L"ひらがな日本語.pdmodel";
+    std::string win_dir_path{ PADDLE_TEST_MODELS "relu.pdmodel" };
+    std::wstring wmodel = CommonTestUtils::addUnicodePostfixToPath(win_dir_path,
+        CommonTestUtils::test_unicode_postfix_vector[0]);
+    bool is_copy_successfully = CommonTestUtils::copyFile(win_dir_path, wmodel);
+    if (!is_copy_successfully) {
+        FAIL() << "Unable to copy from '" << win_dir_path << "' to '"
+                << FileUtils::wStringtoMBCSstringChar(wmodel) << "'";
+    }
     InferenceEngine::Core ie;
-    auto cnnNetwork = ie.ReadNetwork(model);
+    auto cnnNetwork = ie.ReadNetwork(wmodel);
+    CommonTestUtils::removeFile(wmodel);
     auto function = cnnNetwork.getFunction();
 
     const auto inputType = ngraph::element::f32;
@@ -79,6 +85,6 @@ TEST(PDPD_Reader_Tests, ImportBasicModelToCoreWstring) {
         "RefPDPDFunction");
     const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::NAMES);
     const FunctionsComparator::Result res = func_comparator(function, reference);
-    ASSERT_TRUE(res.valid);
+    ASSERT_TRUE(res.valid) << res.message;
 }
 #endif
diff --git a/inference-engine/tests/functional/inference_engine/pdpd_reader/models/ひらがな日本語.pdmodel b/inference-engine/tests/functional/inference_engine/pdpd_reader/models/ひらがな日本語.pdmodel
deleted file mode 100644
index 9bb64f2a50d..00000000000
Binary files a/inference-engine/tests/functional/inference_engine/pdpd_reader/models/ひらがな日本語.pdmodel and /dev/null differ
diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/activation.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/activation.cpp
index efe2efaf5ee..0b3d4535b97 100644
--- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/activation.cpp
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/activation.cpp
@@ -59,7 +59,6 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
         {HSigmoid,              {}},
         {RoundHalfToEven,       {}},
         {RoundHalfAwayFromZero, {}},
-        {Erf,                   {}},
         {GeluErf,               {}},
         {GeluTanh,              {}}
 };
diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/logical.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/logical.cpp
new file mode 100644
index 00000000000..aff8081509b
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/logical.cpp
@@ -0,0 +1,84 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include "shared_test_classes/single_layer/logical.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace LayerTestsDefinitions::LogicalParams;
+
+namespace {
+TEST_P(LogicalLayerTest, Serialize) {
+    Serialize();
+}
+
+std::map<std::vector<size_t>, std::vector<std::vector<size_t >>> inputShapes = {
+        {{1}, {{1}, {17}, {1, 1}, {2, 18}, {1, 1, 2}, {2, 2, 3}, {1, 1, 2, 3}}},
+        {{5}, {{1}, {1, 1}, {2, 5}, {1, 1, 1}, {2, 2, 5}}},
+        {{2, 200}, {{1}, {200}, {1, 200}, {2, 200}, {2, 2, 200}}},
+        {{1, 3, 20}, {{20}, {2, 1, 1}}},
+        {{2, 17, 3, 4}, {{4}, {1, 3, 4}, {2, 1, 3, 4}}},
+        {{2, 1, 1, 3, 1}, {{1}, {1, 3, 4}, {2, 1, 3, 4}, {1, 1, 1, 1, 1}}},
+};
+
+std::map<std::vector<size_t>, std::vector<std::vector<size_t >>> inputShapesNot = {
+        {{1}, {}},
+        {{5}, {}},
+        {{2, 200}, {}},
+        {{1, 3, 20}, {}},
+        {{2, 17, 3, 4}, {}},
+        {{2, 1, 1, 3, 1}, {}},
+};
+
+std::vector<InferenceEngine::Precision> inputsPrecisions = {
+        InferenceEngine::Precision::BOOL,
+};
+
+std::vector<ngraph::helpers::LogicalTypes> logicalOpTypes = {
+        ngraph::helpers::LogicalTypes::LOGICAL_AND,
+        ngraph::helpers::LogicalTypes::LOGICAL_OR,
+        ngraph::helpers::LogicalTypes::LOGICAL_XOR,
+};
+
+std::vector<ngraph::helpers::InputLayerType> secondInputTypes = {
+        ngraph::helpers::InputLayerType::CONSTANT,
+        ngraph::helpers::InputLayerType::PARAMETER,
+};
+
+std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+};
+
+std::map<std::string, std::string> additional_config = {};
+
+const auto LogicalTestParams = ::testing::Combine(
+        ::testing::ValuesIn(LogicalLayerTest::combineShapes(inputShapes)),
+        ::testing::ValuesIn(logicalOpTypes),
+        ::testing::ValuesIn(secondInputTypes),
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(inputsPrecisions),
+        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::Values(additional_config));
+
+const auto LogicalNotTestParams = ::testing::Combine(
+        ::testing::ValuesIn(LogicalLayerTest::combineShapes(inputShapesNot)),
+        ::testing::Values(ngraph::helpers::LogicalTypes::LOGICAL_NOT),
+        ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(inputsPrecisions),
+        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::Values(additional_config));
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs, LogicalLayerTest, LogicalTestParams, LogicalLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefsNot, LogicalLayerTest, LogicalNotTestParams, LogicalLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/matrix_nms.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/matrix_nms.cpp
new file mode 100644
index 00000000000..750b483bd29
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/matrix_nms.cpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "shared_test_classes/single_layer/matrix_nms.hpp"
+
+using namespace ngraph;
+using namespace LayerTestsDefinitions;
+
+namespace {
+    TEST_P(MatrixNmsLayerTest, Serialize) {
+        Serialize();
+    }
+
+    const std::vector<InferenceEngine::Precision> netPrecisions = {
+            InferenceEngine::Precision::FP32,
+            InferenceEngine::Precision::FP16
+    };
+
+    const std::vector<InputShapeParams> inShapeParams = {
+        InputShapeParams{3, 100, 5},
+        InputShapeParams{1, 10, 50},
+        InputShapeParams{2, 50, 50}
+    };
+
+    const std::vector<op::v8::MatrixNms::SortResultType> sortResultType = {op::v8::MatrixNms::SortResultType::CLASSID,
+                                                                       op::v8::MatrixNms::SortResultType::SCORE,
+                                                                       op::v8::MatrixNms::SortResultType::NONE};
+    const std::vector<element::Type> outType = {element::i32, element::i64};
+    const std::vector<TopKParams> topKParams = {
+        TopKParams{-1, 5},
+        TopKParams{100, -1}
+    };
+    const std::vector<ThresholdParams> thresholdParams = {
+        ThresholdParams{0.0f, 2.0f, 0.0f},
+        ThresholdParams{0.1f, 1.5f, 0.2f}
+    };
+    const std::vector<int> nmsTopK = {-1, 100};
+    const std::vector<int> keepTopK = {-1, 5};
+    const std::vector<int> backgroudClass = {-1, 0};
+    const std::vector<bool> normalized = {true, false};
+    const std::vector<op::v8::MatrixNms::DecayFunction> decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN,
+                                                    op::v8::MatrixNms::DecayFunction::LINEAR};
+    const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
+                                          ::testing::Combine(::testing::Values(InferenceEngine::Precision::FP32),
+                                                             ::testing::Values(InferenceEngine::Precision::I32),
+                                                             ::testing::Values(InferenceEngine::Precision::FP32)),
+                                          ::testing::ValuesIn(sortResultType),
+                                          ::testing::ValuesIn(outType),
+                                          ::testing::ValuesIn(topKParams),
+                                          ::testing::ValuesIn(thresholdParams),
+                                          ::testing::ValuesIn(backgroudClass),
+                                          ::testing::ValuesIn(normalized),
+                                          ::testing::ValuesIn(decayFunction),
+                                          ::testing::Values(CommonTestUtils::DEVICE_CPU));
+
+    INSTANTIATE_TEST_CASE_P(smoke_MatrixNmsLayerTest, MatrixNmsLayerTest, nmsParams, MatrixNmsLayerTest::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/multiclass_nms.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/multiclass_nms.cpp
new file mode 100644
index 00000000000..203b20c4ab4
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/multiclass_nms.cpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "shared_test_classes/single_layer/multiclass_nms.hpp"
+
+using namespace ngraph;
+using namespace LayerTestsDefinitions;
+
+namespace {
+TEST_P(MulticlassNmsLayerTest, Serialize) {
+    Serialize();
+}
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16};
+
+const std::vector<InputShapeParams> inShapeParams = {
+    InputShapeParams{3, 100, 5}, InputShapeParams{1, 10, 50},
+    InputShapeParams{2, 50, 50}};
+
+const std::vector<int32_t> nmsTopK = {-1, 20};
+const std::vector<float> iouThreshold = {0.7f};
+const std::vector<float> scoreThreshold = {0.7f};
+const std::vector<int32_t> backgroundClass = {-1, 0};
+const std::vector<int32_t> keepTopK = {-1, 30};
+const std::vector<element::Type> outType = {element::i32, element::i64};
+
+const std::vector<op::v8::MulticlassNms::SortResultType> sortResultType = {
+    op::v8::MulticlassNms::SortResultType::SCORE,
+    op::v8::MulticlassNms::SortResultType::CLASSID,
+    op::v8::MulticlassNms::SortResultType::NONE};
+const std::vector<bool> sortResDesc = {true, false};
+const std::vector<float> nmsEta = {0.6f, 1.0f};
+const std::vector<bool> normalized = {true, false};
+
+const auto nmsParams = ::testing::Combine(
+    ::testing::ValuesIn(inShapeParams),
+    ::testing::Combine(::testing::Values(InferenceEngine::Precision::FP32),
+                       ::testing::Values(InferenceEngine::Precision::I32),
+                       ::testing::Values(InferenceEngine::Precision::FP32)),
+    ::testing::ValuesIn(nmsTopK),
+    ::testing::Combine(::testing::ValuesIn(iouThreshold),
+                       ::testing::ValuesIn(scoreThreshold),
+                       ::testing::ValuesIn(nmsEta)),
+    ::testing::ValuesIn(backgroundClass),
+    ::testing::ValuesIn(keepTopK),
+    ::testing::ValuesIn(outType),
+    ::testing::ValuesIn(sortResultType),
+    ::testing::Combine(::testing::ValuesIn(sortResDesc),
+                       ::testing::ValuesIn(normalized)),
+    ::testing::Values(CommonTestUtils::DEVICE_CPU));
+
+INSTANTIATE_TEST_CASE_P(smoke_MulticlassNmsLayerTest,
+                        MulticlassNmsLayerTest,
+                        nmsParams,
+                        MulticlassNmsLayerTest::getTestCaseName);
+}  // namespace
diff --git a/inference-engine/tests/functional/inference_engine/skip_tests_config.cpp b/inference-engine/tests/functional/inference_engine/skip_tests_config.cpp
index 8b850f20c21..aff04cee6e5 100644
--- a/inference-engine/tests/functional/inference_engine/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/inference_engine/skip_tests_config.cpp
@@ -14,5 +14,7 @@ std::vector<std::string> disabledTestPatterns() {
         ".*TransformationTests\\.ConstFoldingPriorBoxClustered.*",
         // TODO: task 32568, enable after supporting constants outputs in plugins
         ".*TransformationTests\\.ConstFoldingPriorBox.*",
+        // azure is failing after #6199
+        ".*/NmsLayerTest.*",
     };
 }
diff --git a/inference-engine/tests/functional/inference_engine/task_executor_tests.cpp b/inference-engine/tests/functional/inference_engine/task_executor_tests.cpp
index c4fe68a87b7..2e9b8341b31 100644
--- a/inference-engine/tests/functional/inference_engine/task_executor_tests.cpp
+++ b/inference-engine/tests/functional/inference_engine/task_executor_tests.cpp
@@ -186,6 +186,18 @@ TEST_P(ASyncTaskExecutorTests, runAndWaitDoesNotOwnTasks) {
     ASSERT_EQ(1, useCount);
 }
 
+class StreamsExecutorConfigTest : public ::testing::Test {};
+
+TEST_F(StreamsExecutorConfigTest, streamsExecutorConfigReturnStrings) {
+    auto streams = getNumberOfCPUCores();
+    auto threads = parallel_get_max_threads();
+    auto config = IStreamsExecutor::Config::MakeDefaultMultiThreaded({"TestCPUStreamsExecutor",
+                                            streams, threads/streams, IStreamsExecutor::ThreadBindingType::NONE});
+    for (auto&& key : config.SupportedKeys()) {
+        ASSERT_NO_THROW(config.GetConfig(key).as<std::string>());
+    }
+}
+
 static auto Executors = ::testing::Values(
     [] {
         auto streams = getNumberOfCPUCores();
@@ -211,3 +223,5 @@ static auto AsyncExecutors = ::testing::Values(
 
 INSTANTIATE_TEST_SUITE_P(ASyncTaskExecutorTests, ASyncTaskExecutorTests, AsyncExecutors);
 
+
+
diff --git a/inference-engine/tests/functional/inference_engine/transformations/algebraic_simplification.cpp b/inference-engine/tests/functional/inference_engine/transformations/algebraic_simplification.cpp
deleted file mode 100644
index e54fff99788..00000000000
--- a/inference-engine/tests/functional/inference_engine/transformations/algebraic_simplification.cpp
+++ /dev/null
@@ -1,553 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-
-#include "common_test_utils/test_common.hpp"
-#include <string>
-#include <sstream>
-#include <memory>
-#include <queue>
-
-#include <ngraph/function.hpp>
-#include <ngraph/opsets/opset2.hpp>
-#include <ngraph/opsets/opset3.hpp>
-#include <ngraph/pass/manager.hpp>
-#include <ngraph/pass/constant_folding.hpp>
-#include <ngraph/builder/autobroadcast.hpp>
-#include <transformations/common_optimizations/algebraic_simplification.hpp>
-#include <transformations/utils/utils.hpp>
-#include <transformations/init_node_info.hpp>
-#include <transformations/common_optimizations/transpose_to_reshape.hpp>
-
-#include "common_test_utils/ngraph_test_utils.hpp"
-
-using namespace ngraph;
-using namespace std;
-
-TEST(algebraic_simplification, add_negative_tests) {
-    Shape shape{};
-    auto type = element::f32;
-    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::AlgebraicSimplification>();
-
-    auto a = make_shared<op::Parameter>(type, shape);
-    auto b = make_shared<op::Parameter>(type, shape);
-    auto c = make_shared<op::Parameter>(type, shape);
-    auto abs_a = make_shared<op::Abs>(a);
-    auto iconst2 = ngraph::make_constant_from_string("2", type, shape);
-    auto add_a_0 = std::make_shared<ngraph::op::v1::Add>(a, iconst2);
-    auto add_a_0_0 = std::make_shared<ngraph::op::v1::Add>(add_a_0, iconst2);
-    auto add_b_0 = std::make_shared<ngraph::op::v1::Add>(b, abs_a);
-    auto add_b_0_0 = std::make_shared<ngraph::op::v1::Add>(add_b_0, abs_a);
-
-    auto f = std::make_shared<Function>(ngraph::NodeVector{a, b, add_a_0_0, c, add_b_0_0},
-                                        ParameterVector{a, b, c});
-    pass_manager.run_passes(f);
-
-    auto expected = ngraph::NodeVector{a, b, add_a_0_0, c, add_b_0_0};
-    auto results = f->get_results();
-    for (size_t i = 0; i < results.size(); i++) {
-        ASSERT_EQ(expected.at(i), results.at(i)->input_value(0).get_node_shared_ptr());
-    }
-}
-
-TEST(algebraic_simplification, multiply_negative_tests) {
-    Shape shape{};
-    auto type = element::f32;
-    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::AlgebraicSimplification>();
-
-    auto a = make_shared<op::Parameter>(type, shape);
-    auto b = make_shared<op::Parameter>(type, shape);
-    auto c = make_shared<op::Parameter>(type, shape);
-    auto abs_a = make_shared<op::Abs>(a);
-    auto iconst2 = ngraph::make_constant_from_string("2", type, shape);
-    auto add_a_0 = make_shared<op::v1::Multiply>(a, iconst2);
-    auto add_a_0_0 = make_shared<op::v1::Multiply>(add_a_0, iconst2);
-    auto add_b_0 = make_shared<op::v1::Multiply>(b, abs_a);
-    auto add_b_0_0 = make_shared<op::v1::Multiply>(add_b_0, abs_a);
-
-    auto f = std::make_shared<Function>(ngraph::NodeVector{a, b, add_a_0_0, c, add_b_0_0},
-                                        ParameterVector{a, b, c});
-    pass_manager.run_passes(f);
-
-    auto expected = ngraph::NodeVector{a, b, add_a_0_0, c, add_b_0_0};
-    auto results = f->get_results();
-    for (size_t i = 0; i < results.size(); i++) {
-        ASSERT_EQ(expected.at(i), results.at(i)->input_value(0).get_node_shared_ptr());
-    }
-}
-
-TEST(algebraic_simplification, multiply_prod_negative) {
-    auto fconst1 = ngraph::op::Constant::create(element::f64, Shape{2}, {1.0, 1.0});
-    auto broadcast = builder::opset1::make_broadcast(fconst1, Shape{2, 5}, AxisSet{1});
-    auto axes = op::Constant::create(element::i64, {2}, {0, 1});
-    auto prod_fconst1 = std::make_shared<op::v1::ReduceProd>(broadcast, axes);
-
-    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::AlgebraicSimplification>();
-
-    auto f = std::make_shared<Function>(ngraph::NodeVector{prod_fconst1}, ParameterVector{});
-    pass_manager.run_passes(f);
-    auto f_prod = f->get_results().at(0)->input_value(0).get_node_shared_ptr();
-    ASSERT_EQ(f_prod, prod_fconst1);
-}
-
-TEST(algebraic_simplification, multiply_sum_negative) {
-    auto fconst1 = ngraph::op::Constant::create(element::f64, Shape{2}, {1.0, 1.0});
-    auto broadcast = builder::opset1::make_broadcast(fconst1, Shape{2, 5}, AxisSet{1});
-    auto axes = op::Constant::create(element::i64, {2}, {0, 1});
-    auto sum_fconst1 = std::make_shared<op::v1::ReduceSum>(broadcast, axes);
-
-    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::AlgebraicSimplification>();
-
-    auto f = std::make_shared<Function>(ngraph::NodeVector{sum_fconst1}, ParameterVector{});
-    pass_manager.run_passes(f);
-    auto f_sum = f->get_results().at(0)->input_value(0).get_node_shared_ptr();
-    ASSERT_EQ(f_sum, sum_fconst1);
-}
-
-TEST(algebraic_simplification, concat_parameter_slices_reversed) {
-    auto a = make_shared<op::Parameter>(element::f32, Shape{96, 100});
-    auto strides = op::Constant::create(element::i64, {2}, {1, 1});
-    std::vector<int64_t> mask(2, 0);
-    auto slice1 = make_shared<op::v1::StridedSlice>(a,
-            op::Constant::create(element::i64, {2}, {0, 0}),
-            op::Constant::create(element::i64, {2}, {32, 100}),
-            strides, mask, mask);
-    auto slice2 = make_shared<op::v1::StridedSlice>(a,
-            op::Constant::create(element::i64, {2}, {32, 0}),
-            op::Constant::create(element::i64, {2}, {64, 100}),
-            strides, mask, mask);
-    auto slice3 = make_shared<op::v1::StridedSlice>(a,
-            op::Constant::create(element::i64, {2}, {64, 0}),
-            op::Constant::create(element::i64, {2}, {96, 100}),
-            strides, mask, mask);
-
-    size_t concat_axis = 0;
-    auto concat = make_shared<op::Concat>(NodeVector{slice3, slice2, slice1}, concat_axis);
-
-    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::AlgebraicSimplification>();
-
-    auto f = std::make_shared<Function>(ngraph::NodeVector{concat}, ParameterVector{a});
-    pass_manager.run_passes(f);
-    ASSERT_EQ(f->get_results().at(0)->input_value(0).get_node_shared_ptr(), concat);
-}
-
-TEST(algebraic_simplification, concat_parameter_slices_element_count) {
-    auto a = make_shared<op::Parameter>(element::f32, Shape{96, 100});
-    // slicing 30 elements out of 96; should trigger a check that some elements are missing
-    auto strides = op::Constant::create(element::i64, {2}, {1, 1});
-    std::vector<int64_t> mask(2, 0);
-    auto slice1 = make_shared<op::v1::StridedSlice>(a,
-            op::Constant::create(element::i64, {2}, {0, 0}),
-            op::Constant::create(element::i64, {2}, {10, 100}),
-            strides, mask, mask);
-    auto slice2 = make_shared<op::v1::StridedSlice>(a,
-            op::Constant::create(element::i64, {2}, {10, 0}),
-            op::Constant::create(element::i64, {2}, {20, 100}),
-            strides, mask, mask);
-    auto slice3 = make_shared<op::v1::StridedSlice>(a,
-            op::Constant::create(element::i64, {2}, {20, 0}),
-            op::Constant::create(element::i64, {2}, {30, 100}),
-            strides, mask, mask);
-
-    size_t concat_axis = 0;
-    auto concat = make_shared<op::Concat>(NodeVector{slice1, slice2, slice3}, concat_axis);
-
-    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::AlgebraicSimplification>();
-
-    auto f = std::make_shared<Function>(ngraph::NodeVector{concat}, ParameterVector{a});
-    pass_manager.run_passes(f);
-    ASSERT_EQ(f->get_results().at(0)->input_value(0).get_node_shared_ptr(), concat);
-}
-
-TEST(algebraic_simplification, concat_parameter_non_uniform_slices) {
-    auto a = make_shared<op::Parameter>(element::f32, Shape{96, 100});
-    auto strides = op::Constant::create(element::i64, {2}, {1, 1});
-    std::vector<int64_t> mask(2, 0);
-    auto slice1 = make_shared<op::v1::StridedSlice>(a,
-            op::Constant::create(element::i64, {2}, {0, 0}),
-            op::Constant::create(element::i64, {2}, {38, 100}),
-            strides, mask, mask);
-    auto slice2 = make_shared<op::v1::StridedSlice>(a,
-            op::Constant::create(element::i64, {2}, {38, 0}),
-            op::Constant::create(element::i64, {2}, {64, 100}),
-            strides, mask, mask);
-    auto slice3 = make_shared<op::v1::StridedSlice>(a,
-            op::Constant::create(element::i64, {2}, {64, 0}),
-            op::Constant::create(element::i64, {2}, {96, 100}),
-            strides, mask, mask);
-
-    size_t concat_axis = 0;
-    auto concat = make_shared<op::Concat>(NodeVector{slice1, slice2, slice3}, concat_axis);
-
-    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::AlgebraicSimplification>();
-
-    auto f = std::make_shared<Function>(ngraph::NodeVector{concat}, ParameterVector{a});
-    pass_manager.run_passes(f);
-    ASSERT_EQ(f->get_results().at(0)->input_value(0).get_node_shared_ptr(), concat);
-}
-
-TEST(algebraic_simplification, concat_different_inputs) {
-    auto a = make_shared<op::Parameter>(element::f32, Shape{96, 100});
-    auto goe1 = -a;
-    auto goe2 = -a;
-    auto strides = op::Constant::create(element::i64, {2}, {1, 1});
-    std::vector<int64_t> mask(2, 0);
-    auto slice1 = make_shared<op::v1::StridedSlice>(goe1,
-            op::Constant::create(element::i64, {2}, {0, 0}),
-            op::Constant::create(element::i64, {2}, {32, 100}),
-            strides, mask, mask);
-    auto slice2 = make_shared<op::v1::StridedSlice>(goe2,
-            op::Constant::create(element::i64, {2}, {32, 0}),
-            op::Constant::create(element::i64, {2}, {64, 100}),
-            strides, mask, mask);
-    auto slice3 = make_shared<op::v1::StridedSlice>(goe1,
-            op::Constant::create(element::i64, {2}, {64, 0}),
-            op::Constant::create(element::i64, {2}, {96, 100}),
-            strides, mask, mask);
-
-    size_t concat_axis = 0;
-    auto concat = make_shared<op::Concat>(NodeVector{slice1, slice2, slice3}, concat_axis);
-
-    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::AlgebraicSimplification>();
-
-    auto f = std::make_shared<Function>(ngraph::NodeVector{concat}, ParameterVector{a});
-    pass_manager.run_passes(f);
-    ASSERT_EQ(f->get_results().at(0)->input_value(0).get_node_shared_ptr(), concat);
-}
-
-TEST(algebraic_simplification, log_no_exp) {
-    auto a = make_shared<op::Parameter>(element::f32, Shape{96, 100});
-    auto b = make_shared<op::Parameter>(element::f32, Shape{96, 100});
-    auto abs_a = make_shared<op::Abs>(a);
-    auto div = std::make_shared<op::v1::Divide>(abs_a, b);
-    auto log_div = make_shared<op::Log>(div);
-
-    auto neg_inner = make_shared<op::Negative>(log_div);
-    auto neg2 = make_shared<op::Negative>(neg_inner);
-    auto neg3 = make_shared<op::Negative>(neg2);
-    auto neg4 = make_shared<op::Negative>(neg3);
-
-    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::AlgebraicSimplification>();
-
-    auto f = std::make_shared<Function>(ngraph::NodeVector{neg4}, ParameterVector{a, b});
-    pass_manager.run_passes(f);
-    ASSERT_EQ(neg_inner->input_value(0).get_node_shared_ptr(), log_div);
-}
-
-TEST(algebraic_simplification, log_no_divide) {
-    auto a = make_shared<op::Parameter>(element::f32, Shape{96, 100});
-    auto b = make_shared<op::Parameter>(element::f32, Shape{96, 100});
-    auto exp_a = make_shared<op::Exp>(a);
-    auto mul = make_shared<op::v1::Multiply>(exp_a, b);
-    auto log_mul = make_shared<op::Log>(mul);
-
-    auto neg_inner = make_shared<op::Negative>(log_mul);
-    auto neg2 = make_shared<op::Negative>(neg_inner);
-    auto neg3 = make_shared<op::Negative>(neg2);
-    auto neg4 = make_shared<op::Negative>(neg3);
-
-    pass::Manager pass_manager;
-    pass_manager.register_pass<pass::AlgebraicSimplification>();
-
-    auto f = std::make_shared<Function>(ngraph::NodeVector{neg4}, ParameterVector{a, b});
-    pass_manager.run_passes(f);
-    ASSERT_EQ(neg_inner->input_value(0).get_node_shared_ptr(), log_mul);
-}
-
-TEST(algebraic_simplification, pass_property) {
-    auto pass = std::make_shared<ngraph::pass::AlgebraicSimplification>();
-
-    ASSERT_FALSE(pass->get_property(pass::PassProperty::CHANGE_DYNAMIC_STATE));
-}
-
-TEST(algebraic_simplification, replace_transpose_with_reshape) {
-    auto check_usecase = [](const PartialShape& shape,
-                            const std::vector<int64_t>& perm_val,
-                            bool i32,
-                            bool multiout,
-                            size_t num) {
-        static size_t id = 0;
-        auto casename = string("usecase #") + to_string(++id);
-
-        shared_ptr<Node> perm;
-        if (i32) {
-            std::vector<int32_t> perm_val_i32(perm_val.begin(), perm_val.end());
-            perm =
-                op::Constant::create<int32_t>(element::i32, Shape{perm_val.size()}, perm_val_i32);
-        } else {
-            perm = op::Constant::create<int64_t>(element::i64, Shape{perm_val.size()}, perm_val);
-        }
-        auto param = make_shared<op::Parameter>(element::f32, shape);
-        shared_ptr<Node> A1;
-        if (multiout) {
-            shared_ptr<Node> k;
-            auto last_dim = shape.rank().get_length() - 1;
-            if (shape[last_dim].is_dynamic()) {
-                k = make_shared<op::v1::Gather>(make_shared<op::ShapeOf>(param),
-                                                op::Constant::create(element::i64, {}, {last_dim}),
-                                                op::Constant::create(element::i64, {}, {0}));
-            } else {
-                k = make_shared<op::Constant>(element::i64, Shape{}, std::vector<int64_t>{shape[last_dim].get_length()});
-            }
-            A1 = make_shared<op::v1::TopK>(param, k, last_dim,
-                                           op::v1::TopK::Mode::MAX, op::v1::TopK::SortType::NONE);
-        } else {
-            A1 = make_shared<op::v0::Abs>(param);
-        }
-        auto transpose = make_shared<op::v1::Transpose>((multiout ? A1->output(0) : A1), perm);
-        auto transpose1 = make_shared<op::v0::Abs>(transpose);
-        auto baseline_f = make_shared<Function>(transpose1, ParameterVector{param});
-        auto optimized_f = clone_function(*baseline_f);
-
-        pass::Manager pass_manager;
-        pass_manager.register_pass<pass::Validate>();
-        pass_manager.register_pass<pass::TransposeToReshape>();
-        pass_manager.run_passes(optimized_f);
-
-        auto ps = baseline_f->get_results()[0]->get_output_partial_shape(0);
-        auto ps_r = optimized_f->get_results()[0]->get_output_partial_shape(0);
-        EXPECT_TRUE(ps.rank().is_static() && ps_r.rank().is_static()) << casename;
-        ASSERT_EQ(ps.rank().get_length(), ps_r.rank().get_length()) << casename;
-
-        ASSERT_EQ(count_ops_of_type<op::v1::Transpose>(baseline_f), 1);
-        ASSERT_EQ(count_ops_of_type<op::v1::Reshape>(baseline_f), 0);
-        ASSERT_EQ(count_ops_of_type<op::v1::Transpose>(optimized_f), num);
-        ASSERT_EQ(count_ops_of_type<op::v1::Reshape>(optimized_f), (num ? 0 : 1));
-    };
-
-    for (auto& i32 : {true, false})
-        for (auto& multiout : {true, false}) {
-            check_usecase(Shape{1, 3}, vector<int64_t>{1, 0}, i32, multiout, 0);
-            check_usecase(Shape{2, 3, 1}, vector<int64_t>{2, 0, 1}, i32, multiout, 0);
-            check_usecase(Shape{10, 20, 1, 1}, vector<int64_t>{0, 2, 3, 1}, i32, multiout, 0);
-            check_usecase(Shape{10, 1, 1, 20}, vector<int64_t>{0, 3, 1, 2}, i32, multiout, 0);
-            check_usecase(Shape{10, 20, 1, 2}, vector<int64_t>{0, 2, 1, 3}, i32, multiout, 0);
-            check_usecase(Shape{10, 1, 1, 1, 20}, vector<int64_t>{0, 4, 1, 2, 3}, i32, multiout, 0);
-            check_usecase(Shape{10, 20, 1, 1, 1}, vector<int64_t>{0, 2, 3, 4, 1}, i32, multiout, 0);
-            check_usecase(Shape{10, 1, 1, 1, 1}, vector<int64_t>{1, 4, 2, 3, 0}, i32, multiout, 0);
-            check_usecase(Shape{10, 1, 1, 1, 1}, vector<int64_t>{4, 2, 0, 1, 3}, i32, multiout, 0);
-            check_usecase(Shape{10, 20, 1, 2}, vector<int64_t>{0, 2, 3, 1}, i32, multiout, 1);
-            check_usecase(Shape{10, 20, 1, 2}, vector<int64_t>{0, 3, 1, 2}, i32, multiout, 1);
-            check_usecase(Shape{10, 20}, vector<int64_t>{1, 0}, i32, multiout, 1);
-
-            check_usecase(PartialShape{Dimension::dynamic(), 20, 1, 1},
-                          vector<int64_t>{
-                              0, 2, 3, 1,
-                          },
-                          i32,
-                          multiout,
-                          0);
-            check_usecase(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 20, 1, 1},
-                          vector<int64_t>{0, 1, 3, 2, 4},
-                          i32,
-                          multiout,
-                          0);
-            check_usecase(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 20, 1, 1},
-                          vector<int64_t>{0, 2, 1, 4, 3},
-                          i32,
-                          multiout,
-                          1);
-        }
-}
-
-// the following gather test will be used to test when
-// gather is Nop and will be removed during `simplify_gather`
-// algebraic_simplification pass
-
-TEST(algebraic_simplification, gather_3d_indices_constant_axis_1) {
-    auto check_usecase = [](const PartialShape& pshape,
-                            bool i32,
-                            bool multiout,
-                            const std::vector<int64_t>& indices_val,
-                            int64_t axis_val,
-                            size_t num) {
-        static size_t id = 0;
-        auto casename = string("usecase #") + to_string(++id);
-
-        shared_ptr<Node> indices;
-        shared_ptr<Node> axis;
-        if (i32) {
-            std::vector<int32_t> indices_val_i32(indices_val.begin(), indices_val.end());
-            indices = op::Constant::create<int32_t>(
-                element::i32, Shape{indices_val.size()}, indices_val_i32);
-            axis = op::Constant::create<int32_t>(element::i32, Shape{}, {(int32_t)axis_val});
-        } else {
-            indices =
-                op::Constant::create<int64_t>(element::i64, Shape{indices_val.size()}, indices_val);
-            axis = op::Constant::create<int64_t>(element::i64, Shape{}, {axis_val});
-        }
-
-        auto A = make_shared<op::Parameter>(element::f32, pshape);
-        shared_ptr<Node> A1;
-        if (multiout) {
-            auto last_dim = pshape.rank().get_length() - 1;
-            A1 = make_shared<op::v1::TopK>(A, op::Constant::create(element::i64, {}, {1}), last_dim, op::v1::TopK::Mode::MAX, op::v1::TopK::SortType::NONE);
-        } else {
-            A1 = make_shared<op::v0::Abs>(A);
-        }
-        auto G = make_shared<op::v1::Gather>((multiout ? A1->output(0) : A1), indices, axis);
-
-        auto baseline_f = make_shared<Function>(make_shared<op::v0::Abs>(G), ParameterVector{A});
-        auto optimized_f = clone_function(*baseline_f);
-
-        pass::Manager pass_manager;
-        pass_manager.register_pass<pass::Validate>();
-        pass_manager.register_pass<pass::AlgebraicSimplification>();
-        pass_manager.run_passes(optimized_f);
-
-        auto ps = baseline_f->get_results()[0]->get_output_partial_shape(0);
-        auto ps_r = optimized_f->get_results()[0]->get_output_partial_shape(0);
-        EXPECT_TRUE(ps.rank().is_static() && ps_r.rank().is_static()) << casename;
-        ASSERT_EQ(ps.rank().get_length(), ps_r.rank().get_length()) << casename;
-
-        ASSERT_EQ(count_ops_of_type<op::v1::Gather>(baseline_f), 1) << casename;
-        // the pass should short cut the Gather i/p with the gather users
-        // since we are fetching the whole tensor using gather op
-        ASSERT_EQ(count_ops_of_type<op::v1::Gather>(optimized_f), num) << casename;
-    };
-    for (auto& i32 : {true, false})
-        for (auto& multiout : {true, false}) {
-            check_usecase(PartialShape{1, 3, 2}, i32, multiout, std::vector<int64_t>{1}, 0, 0);
-            check_usecase(PartialShape{3, 2, 1}, i32, multiout, std::vector<int64_t>{0, 1}, 1, 0);
-            check_usecase(PartialShape{3, 2, 1}, i32, multiout, std::vector<int64_t>{1}, 2, 0);
-            check_usecase(PartialShape{1, 16}, i32, multiout, std::vector<int64_t>{0, 0}, 0, 1);
-        }
-}
-
-TEST(algebraic_simplification, gather_shapeof) {
-    auto check_usecase = [](const PartialShape& pshape,
-                            bool is_scalar_index,
-                            bool opset2,
-                            bool i32,
-                            bool multiout,
-                            bool multiout_1,
-                            const std::vector<int64_t>& indices_val,
-                            int64_t axis_val) {
-        static size_t id = 0;
-        auto casename = string("usecase #") + to_string(++id);
-
-        shared_ptr<Node> indices;
-        shared_ptr<Node> axis;
-        if (i32) {
-            std::vector<int32_t> indices_val_i32(indices_val.begin(), indices_val.end());
-            indices = is_scalar_index
-                          ? op::Constant::create<int32_t>(element::i32, Shape{}, indices_val_i32)
-                          : op::Constant::create<int32_t>(
-                                element::i32, Shape{indices_val.size()}, indices_val_i32);
-            axis = op::Constant::create<int32_t>(element::i32, Shape{}, {(int32_t)axis_val});
-        } else {
-            indices = is_scalar_index
-                          ? op::Constant::create<int64_t>(element::i64, Shape{}, indices_val)
-                          : op::Constant::create<int64_t>(
-                                element::i64, Shape{indices_val.size()}, indices_val);
-            axis = op::Constant::create<int64_t>(element::i64, Shape{}, {axis_val});
-        }
-
-        auto dims_1 = std::vector<Dimension>(pshape);
-        dims_1.push_back(11);
-        dims_1.push_back(13);
-        auto pshape_1 = PartialShape(dims_1);
-        auto A = make_shared<op::Parameter>(element::f32, pshape);
-        auto AA = make_shared<op::Parameter>(element::f64, pshape_1);
-        shared_ptr<Node> A1;
-        if (multiout) {
-            A1 = make_shared<TestOpMultiOut>(A, AA);
-        } else {
-            A1 = make_shared<op::v0::Abs>(A);
-        }
-        auto B = make_shared<op::v1::Gather>(
-            (multiout ? (multiout_1 ? A1->output(1) : A1->output(0)) : A1), indices, axis);
-        shared_ptr<Node> B1;
-        if (opset2) {
-            B1 = make_shared<op::v0::ShapeOf>(B);
-        } else {
-            B1 = make_shared<op::v3::ShapeOf>(B);
-        }
-        auto baseline_f = make_shared<Function>(
-            make_shared<op::v0::Abs>(B1), (multiout ? ParameterVector{A, AA} : ParameterVector{A}));
-        auto optimized_f = clone_function(*baseline_f);
-
-        pass::Manager pass_manager;
-        pass_manager.register_pass<pass::Validate>();
-        pass_manager.register_pass<pass::AlgebraicSimplification>();
-        pass_manager.run_passes(optimized_f);
-
-        ASSERT_EQ(baseline_f->get_results().at(0)->get_element_type(),
-                  optimized_f->get_results().at(0)->get_element_type());
-
-        auto ps = baseline_f->get_results()[0]->get_output_partial_shape(0);
-        auto ps_r = optimized_f->get_results()[0]->get_output_partial_shape(0);
-        EXPECT_TRUE(ps.rank().is_static() && ps_r.rank().is_static()) << casename;
-        EXPECT_TRUE(ps.same_scheme(ps_r)) << casename;
-
-        ASSERT_EQ(count_ops_of_type<op::v1::Gather>(baseline_f), 1) << casename;
-
-        auto last_node = optimized_f->get_results()[0]->input_value(0).get_node_shared_ptr();
-        if (is_scalar_index) {
-            ASSERT_EQ(count_ops_of_type<op::v3::ShapeOf>(optimized_f), 1) << casename;
-            ASSERT_EQ(count_ops_of_type<op::v1::Gather>(optimized_f), 1) << casename;
-            EXPECT_TRUE(
-                as_type_ptr<op::v1::Gather>(last_node->input_value(0).get_node_shared_ptr()))
-                << casename;
-        } else {
-            ASSERT_EQ(count_ops_of_type<op::v0::Concat>(optimized_f), 1) << casename;
-            EXPECT_TRUE(
-                as_type_ptr<op::v0::Concat>(last_node->input_value(0).get_node_shared_ptr()))
-                << casename;
-        }
-    };
-
-    for (auto& opset2 : {true, false})
-        for (auto& i32 : {true, false})
-            for (auto& multiout : {true, false})
-                for (auto& multiout_1 : {true, false}) {
-                    check_usecase(PartialShape{2, 3, 2, 1},
-                                  true,
-                                  opset2,
-                                  i32,
-                                  multiout,
-                                  multiout_1,
-                                  std::vector<int64_t>{0},
-                                  3);
-                    check_usecase(PartialShape{2, Dimension::dynamic(), 2, 1},
-                                  true,
-                                  opset2,
-                                  i32,
-                                  multiout,
-                                  multiout_1,
-                                  std::vector<int64_t>{0},
-                                  3);
-                }
-    for (auto& opset2 : {true, false})
-        for (auto& i32 : {true, false})
-            for (auto& multiout : {true, false})
-                for (auto& multiout_1 : {true, false}) {
-                    check_usecase(PartialShape{2, 3, 2, 1},
-                                  false,
-                                  opset2,
-                                  i32,
-                                  multiout,
-                                  multiout_1,
-                                  std::vector<int64_t>{0, 2},
-                                  1);
-                    check_usecase(PartialShape{2, Dimension::dynamic(), 2, 1},
-                                  false,
-                                  opset2,
-                                  i32,
-                                  multiout,
-                                  multiout_1,
-                                  std::vector<int64_t>{0, 2},
-                                  1);
-                }
-}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_matrix_nms_to_matrix_nms_ie_internal.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_matrix_nms_to_matrix_nms_ie_internal.cpp
new file mode 100644
index 00000000000..afd4cd26a5b
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_matrix_nms_to_matrix_nms_ie_internal.cpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/opsets/opset5.hpp>
+#include <ngraph/opsets/opset8.hpp>
+#include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+#include <ngraph_ops/nms_static_shape_ie.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <ngraph/pass/manager.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, ConvertMatrixNmsToMatrixNmsIE) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+
+        auto nms = std::make_shared<opset8::MatrixNms>(boxes, scores, opset8::MatrixNms::Attributes());
+
+        f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>();
+        manager.register_pass<ngraph::pass::ConstantFolding>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+        ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
+    }
+
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto nms = std::make_shared<op::internal::NmsStaticShapeIE<ngraph::opset8::MatrixNms>>(boxes, scores, opset8::MatrixNms::Attributes());
+
+        f_ref = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+        ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static";
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_multiclass_nms_to_multiclass_nms_ie_internal.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_multiclass_nms_to_multiclass_nms_ie_internal.cpp
new file mode 100644
index 00000000000..1f0f6f856f7
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_multiclass_nms_to_multiclass_nms_ie_internal.cpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/opsets/opset5.hpp>
+#include <ngraph/opsets/opset8.hpp>
+#include <transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+#include <ngraph_ops/nms_static_shape_ie.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <ngraph/pass/manager.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, ConvertMulticlassNmsToMulticlassNmsIE) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+
+        auto nms = std::make_shared<opset8::MulticlassNms>(boxes, scores, opset8::MulticlassNms::Attributes());
+
+        f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
+        manager.register_pass<ngraph::pass::ConstantFolding>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+        ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
+    }
+
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto nms = std::make_shared<op::internal::NmsStaticShapeIE<ngraph::opset8::MulticlassNms>>(boxes, scores, opset8::MulticlassNms::Attributes());
+
+        f_ref = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+        ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static";
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/disable_shapeof_constant_folding_tests.cpp b/inference-engine/tests/functional/inference_engine/transformations/disable_shapeof_constant_folding_tests.cpp
new file mode 100644
index 00000000000..2e526e4c72d
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/disable_shapeof_constant_folding_tests.cpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset6.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+
+#include <disable_shapeof_constant_folding.hpp>
+#include <transformations/serialize.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, DisableShapeOfConstantFolding) {
+    std::shared_ptr<Function> f, f_ref;
+    {
+        auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{1, 4, 10, 10});
+        auto shape_of = std::make_shared<opset6::ShapeOf>(data);
+        auto abs = std::make_shared<opset6::Abs>(shape_of);
+        auto reshape = std::make_shared<opset6::Reshape>(data, abs, false);
+        f = std::make_shared<Function>(NodeVector{reshape}, ParameterVector{data});
+
+        pass::Manager m;
+        m.register_pass<pass::DisableShapeOfConstantFolding>();
+        m.register_pass<pass::ConstantFolding>();
+        m.run_passes(f);
+    }
+
+    {
+        auto data = std::make_shared<opset6::Parameter>(element::f32, Shape{1, 4, 10, 10});
+        auto shape_of = std::make_shared<opset6::ShapeOf>(data);
+        auto abs = std::make_shared<opset6::Abs>(shape_of);
+        auto reshape = std::make_shared<opset6::Reshape>(data, abs, false);
+        f_ref = std::make_shared<Function>(NodeVector{reshape}, ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, ShapeOfShapeOfConstantFolding) {
+    std::shared_ptr<Function> f, f_ref;
+    {
+        auto data = std::make_shared<opset6::Parameter>(element::i64, Shape{1, 4, 10, 10});
+        auto shape_of = std::make_shared<opset6::ShapeOf>(data);
+        auto reshape = std::make_shared<opset6::Reshape>(data, shape_of, false);
+        auto rank = std::make_shared<opset6::ShapeOf>(shape_of);
+        auto mul = std::make_shared<opset6::Multiply>(reshape, rank);
+        f = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data});
+
+        pass::Manager m;
+        m.register_pass<pass::DisableShapeOfConstantFolding>();
+        m.register_pass<pass::ConstantFolding>();
+        m.run_passes(f);
+    }
+
+    {
+        auto data = std::make_shared<opset6::Parameter>(element::i64, Shape{1, 4, 10, 10});
+        auto shape_of = std::make_shared<opset6::ShapeOf>(data);
+        auto reshape = std::make_shared<opset6::Reshape>(data, shape_of, false);
+        auto mul = std::make_shared<opset6::Multiply>(reshape, opset6::Constant::create(element::i64, Shape{1}, {4}));
+        f_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/transformations/leaky_relu_fusion.cpp b/inference-engine/tests/functional/inference_engine/transformations/leaky_relu_fusion.cpp
new file mode 100644
index 00000000000..dec4de41c13
--- /dev/null
+++ b/inference-engine/tests/functional/inference_engine/transformations/leaky_relu_fusion.cpp
@@ -0,0 +1,104 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset8.hpp>
+#include <transformations/common_optimizations/leaky_relu_fusion.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, LeakyReluFusionConstant) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<opset8::Parameter>(element::f32, Shape{2, 2});
+        auto alpha = opset8::Constant::create(element::f32, Shape{1}, {0.1});
+        auto multiply = std::make_shared<opset8::Multiply>(data, alpha);
+        auto max = std::make_shared<opset8::Maximum>(data, multiply);
+        f = std::make_shared<Function>(NodeVector{max}, ParameterVector{data});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<pass::LeakyReluFusion>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto alpha = opset8::Constant::create(element::f32, Shape{1}, {0.1});
+        auto leaky_relu = std::make_shared<opset8::PRelu>(data, alpha);
+        f_ref = std::make_shared<Function>(NodeVector{leaky_relu}, ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, LeakyReluFusionScalar) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<opset8::Parameter>(element::f32, Shape{2, 2});
+        auto alpha = opset8::Constant::create(element::f32, Shape{}, {0.1});
+        auto multiply = std::make_shared<opset8::Multiply>(data, alpha);
+        auto max = std::make_shared<opset8::Maximum>(data, multiply);
+        f = std::make_shared<Function>(NodeVector{max}, ParameterVector{data});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<pass::LeakyReluFusion>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto alpha = opset8::Constant::create(element::f32, Shape{}, {0.1});
+        auto leaky_relu = std::make_shared<opset8::PRelu>(data, alpha);
+        f_ref = std::make_shared<Function>(NodeVector{leaky_relu}, ParameterVector{data});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, LeakyReluFusionParameter) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto data = std::make_shared<opset8::Parameter>(element::f32, Shape{2, 2});
+        auto alpha = std::make_shared<opset8::Parameter>(element::f32, Shape{});
+        auto multiply = std::make_shared<opset8::Multiply>(data, alpha);
+        auto max = std::make_shared<opset8::Maximum>(data, multiply);
+        f = std::make_shared<Function>(NodeVector{max}, ParameterVector{data, alpha});
+
+        pass::Manager m;
+        m.register_pass<pass::InitNodeInfo>();
+        m.register_pass<pass::LeakyReluFusion>();
+        m.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+    }
+
+    {
+        auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
+        auto alpha = std::make_shared<opset8::Parameter>(element::f32, Shape{});
+        auto leaky_relu = std::make_shared<opset8::PRelu>(data, alpha);
+        f_ref = std::make_shared<Function>(NodeVector{leaky_relu}, ParameterVector{data, alpha});
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
diff --git a/inference-engine/tests/functional/inference_engine/transformations/nop_elimination.cpp b/inference-engine/tests/functional/inference_engine/transformations/nop_elimination.cpp
index 8212cb4fb45..40b660bca03 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/nop_elimination.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/nop_elimination.cpp
@@ -25,11 +25,14 @@ using namespace ngraph;
 using namespace std;
 
 TEST(nop_elimination, eliminate_convert) {
-    Shape shape{};
-    auto type = element::f32;
-    auto A = make_shared<op::Parameter>(type, shape);
-    auto c = make_shared<op::v0::Convert>(A, element::f32);
-    auto f = make_shared<Function>(make_shared<op::v0::Abs>(c), ParameterVector{A});
+    std::shared_ptr<Function> f;
+    {
+        Shape shape{};
+        auto type = element::f32;
+        auto A = make_shared<op::Parameter>(type, shape);
+        auto c = make_shared<op::v0::Convert>(A, element::f32);
+        f = make_shared<Function>(make_shared<op::v0::Abs>(c), ParameterVector{A});
+    }
 
     pass::Manager pass_manager;
     pass_manager.register_pass<pass::NopElimination>();
@@ -40,12 +43,15 @@ TEST(nop_elimination, eliminate_convert) {
 
 TEST(nop_elimination, convert_type_agnostic) {
     Shape shape{};
-    auto type = element::from<int8_t>();
-    auto A = make_shared<op::Parameter>(type, shape);
-    auto c1 = make_shared<op::v0::Convert>(A, element::from<uint8_t>());
-    auto c = make_shared<op::v0::Convert>(c1, element::f32);
-    auto z = make_shared<op::v3::NonZero>(c);
-    auto f = make_shared<Function>(make_shared<op::v0::Abs>(z), ParameterVector{A});
+    std::shared_ptr<Function> f;
+    {
+        auto type = element::from<int8_t>();
+        auto A = make_shared<op::Parameter>(type, shape);
+        auto c1 = make_shared<op::v0::Convert>(A, element::from<uint8_t>());
+        auto c = make_shared<op::v0::Convert>(c1, element::f32);
+        auto z = make_shared<op::v3::NonZero>(c);
+        f = make_shared<Function>(make_shared<op::v0::Abs>(z), ParameterVector{A});
+    }
 
     pass::Manager pass_manager;
     pass_manager.register_pass<pass::Validate>();
@@ -56,11 +62,14 @@ TEST(nop_elimination, convert_type_agnostic) {
 }
 
 TEST(nop_elimination, eliminate_broadcast) {
-    Shape shape{1};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto b = make_shared<op::v1::Broadcast>(A,
-                                            op::Constant::create(element::u64, Shape{1}, {1}));
-    auto f = make_shared<Function>(make_shared<op::v0::Abs>(b), ParameterVector{A});
+    std::shared_ptr<Function> f;
+    {
+        Shape shape{1};
+        auto A = make_shared<op::Parameter>(element::f32, shape);
+        auto b = make_shared<op::v1::Broadcast>(A,
+                                                op::Constant::create(element::u64, Shape{1}, {1}));
+        f = make_shared<Function>(make_shared<op::v0::Abs>(b), ParameterVector{A});
+    }
 
     pass::Manager pass_manager;
     pass_manager.register_pass<pass::NopElimination>();
@@ -732,3 +741,63 @@ TEST(nop_elimination, topk_convert_elimination) {
 
     check_usecase();
 }
+
+TEST(nop_elimination, gather_3d_indices_constant_axis_1) {
+    auto check_usecase = [](const PartialShape& pshape,
+                            bool i32,
+                            bool multiout,
+                            const std::vector<int64_t>& indices_val,
+                            int64_t axis_val,
+                            size_t num) {
+        static size_t id = 0;
+        auto casename = string("usecase #") + to_string(++id);
+
+        shared_ptr<Node> indices;
+        shared_ptr<Node> axis;
+        if (i32) {
+            std::vector<int32_t> indices_val_i32(indices_val.begin(), indices_val.end());
+            indices = op::Constant::create<int32_t>(
+                    element::i32, Shape{indices_val.size()}, indices_val_i32);
+            axis = op::Constant::create<int32_t>(element::i32, Shape{}, {(int32_t)axis_val});
+        } else {
+            indices =
+                    op::Constant::create<int64_t>(element::i64, Shape{indices_val.size()}, indices_val);
+            axis = op::Constant::create<int64_t>(element::i64, Shape{}, {axis_val});
+        }
+
+        auto A = make_shared<op::Parameter>(element::f32, pshape);
+        shared_ptr<Node> A1;
+        if (multiout) {
+            auto last_dim = pshape.rank().get_length() - 1;
+            A1 = make_shared<op::v1::TopK>(A, op::Constant::create(element::i64, {}, {1}), last_dim, op::v1::TopK::Mode::MAX, op::v1::TopK::SortType::NONE);
+        } else {
+            A1 = make_shared<op::v0::Abs>(A);
+        }
+        auto G = make_shared<op::v1::Gather>((multiout ? A1->output(0) : A1), indices, axis);
+
+        auto baseline_f = make_shared<Function>(make_shared<op::v0::Abs>(G), ParameterVector{A});
+        auto optimized_f = clone_function(*baseline_f);
+
+        pass::Manager pass_manager;
+        pass_manager.register_pass<pass::Validate>();
+        pass_manager.register_pass<pass::NopElimination>();
+        pass_manager.run_passes(optimized_f);
+
+        auto ps = baseline_f->get_results()[0]->get_output_partial_shape(0);
+        auto ps_r = optimized_f->get_results()[0]->get_output_partial_shape(0);
+        EXPECT_TRUE(ps.rank().is_static() && ps_r.rank().is_static()) << casename;
+        ASSERT_EQ(ps.rank().get_length(), ps_r.rank().get_length()) << casename;
+
+        ASSERT_EQ(count_ops_of_type<op::v1::Gather>(baseline_f), 1) << casename;
+        // the pass should short cut the Gather i/p with the gather users
+        // since we are fetching the whole tensor using gather op
+        ASSERT_EQ(count_ops_of_type<op::v1::Gather>(optimized_f), num) << casename;
+    };
+    for (auto& i32 : {true, false})
+        for (auto& multiout : {true, false}) {
+            check_usecase(PartialShape{1, 3, 2}, i32, multiout, std::vector<int64_t>{1}, 0, 0);
+            check_usecase(PartialShape{3, 2, 1}, i32, multiout, std::vector<int64_t>{0, 1}, 1, 0);
+            check_usecase(PartialShape{3, 2, 1}, i32, multiout, std::vector<int64_t>{1}, 2, 0);
+            check_usecase(PartialShape{1, 16}, i32, multiout, std::vector<int64_t>{0, 0}, 0, 1);
+        }
+}
\ No newline at end of file
diff --git a/inference-engine/tests/functional/inference_engine/transformations/pad_fusion.cpp b/inference-engine/tests/functional/inference_engine/transformations/pad_fusion.cpp
index e3e6bc40081..6c81b650b4a 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/pad_fusion.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/pad_fusion.cpp
@@ -11,6 +11,7 @@
 #include <ngraph/function.hpp>
 #include <ngraph/opsets/opset5.hpp>
 #include <transformations/common_optimizations/pad_fusion.hpp>
+#include <transformations/common_optimizations/nop_elimination.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/utils/utils.hpp>
 #include <ngraph/pass/manager.hpp>
@@ -40,7 +41,7 @@ TEST(TransformationTests, PadElimination) {
         f = std::make_shared<Function>(NodeVector{conv}, ParameterVector{data, filters});
         pass::Manager m;
         m.register_pass<pass::InitNodeInfo>();
-        m.register_pass<pass::PadFusion>();
+        m.register_pass<pass::EliminatePad>();
         m.run_passes(f);
         ASSERT_NO_THROW(check_rt_info(f));
     }
diff --git a/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp
index 154f11e00c1..9956ce96bed 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp
@@ -17,7 +17,6 @@
 #include <ngraph/pass/constant_folding.hpp>
 #include <transformations/utils/utils.hpp>
 #include <transformations/init_node_info.hpp>
-#include <transformations/common_optimizations/algebraic_simplification.hpp>
 #include <ngraph/pass/visualize_tree.hpp>
 #include <transformations/common_optimizations/transpose_sinking.hpp>
 #include <transformations/common_optimizations/transpose_to_reshape.hpp>
@@ -25,6 +24,8 @@
 #include "common_test_utils/ngraph_test_utils.hpp"
 
 using namespace testing;
+using namespace ngraph;
+using namespace std;
 
 using InputShape = ngraph::PartialShape;
 using TransposeOrder = std::vector<int64_t>;
@@ -140,3 +141,93 @@ INSTANTIATE_TEST_SUITE_P(ReshapeWithGather, TransposeToReshapeTests,
 #undef SAME_FUNCTION
 #undef EMPTY_FUNCTION
 #undef SHAPE_OF_GATHER
+
+TEST(TransformationTests, replace_transpose_with_reshape) {
+    auto check_usecase = [](const PartialShape& shape,
+                            const std::vector<int64_t>& perm_val,
+                            bool i32,
+                            bool multiout,
+                            size_t num) {
+        static size_t id = 0;
+        auto casename = string("usecase #") + to_string(++id);
+
+        shared_ptr<Node> perm;
+        if (i32) {
+            std::vector<int32_t> perm_val_i32(perm_val.begin(), perm_val.end());
+            perm =
+                    op::Constant::create<int32_t>(element::i32, Shape{perm_val.size()}, perm_val_i32);
+        } else {
+            perm = op::Constant::create<int64_t>(element::i64, Shape{perm_val.size()}, perm_val);
+        }
+        auto param = make_shared<op::Parameter>(element::f32, shape);
+        shared_ptr<Node> A1;
+        if (multiout) {
+            shared_ptr<Node> k;
+            auto last_dim = shape.rank().get_length() - 1;
+            if (shape[last_dim].is_dynamic()) {
+                k = make_shared<op::v1::Gather>(make_shared<op::ShapeOf>(param),
+                                                op::Constant::create(element::i64, {}, {last_dim}),
+                                                op::Constant::create(element::i64, {}, {0}));
+            } else {
+                k = make_shared<op::Constant>(element::i64, Shape{}, std::vector<int64_t>{shape[last_dim].get_length()});
+            }
+            A1 = make_shared<op::v1::TopK>(param, k, last_dim,
+                                           op::v1::TopK::Mode::MAX, op::v1::TopK::SortType::NONE);
+        } else {
+            A1 = make_shared<op::v0::Abs>(param);
+        }
+        auto transpose = make_shared<op::v1::Transpose>((multiout ? A1->output(0) : A1), perm);
+        auto transpose1 = make_shared<op::v0::Abs>(transpose);
+        auto baseline_f = make_shared<Function>(transpose1, ParameterVector{param});
+        auto optimized_f = clone_function(*baseline_f);
+
+        pass::Manager pass_manager;
+        pass_manager.register_pass<pass::Validate>();
+        pass_manager.register_pass<pass::TransposeToReshape>();
+        pass_manager.run_passes(optimized_f);
+
+        auto ps = baseline_f->get_results()[0]->get_output_partial_shape(0);
+        auto ps_r = optimized_f->get_results()[0]->get_output_partial_shape(0);
+        EXPECT_TRUE(ps.rank().is_static() && ps_r.rank().is_static()) << casename;
+        ASSERT_EQ(ps.rank().get_length(), ps_r.rank().get_length()) << casename;
+
+        ASSERT_EQ(count_ops_of_type<op::v1::Transpose>(baseline_f), 1);
+        ASSERT_EQ(count_ops_of_type<op::v1::Reshape>(baseline_f), 0);
+        ASSERT_EQ(count_ops_of_type<op::v1::Transpose>(optimized_f), num);
+        ASSERT_EQ(count_ops_of_type<op::v1::Reshape>(optimized_f), (num ? 0 : 1));
+    };
+
+    for (auto& i32 : {true, false})
+        for (auto& multiout : {true, false}) {
+            check_usecase(Shape{1, 3}, vector<int64_t>{1, 0}, i32, multiout, 0);
+            check_usecase(Shape{2, 3, 1}, vector<int64_t>{2, 0, 1}, i32, multiout, 0);
+            check_usecase(Shape{10, 20, 1, 1}, vector<int64_t>{0, 2, 3, 1}, i32, multiout, 0);
+            check_usecase(Shape{10, 1, 1, 20}, vector<int64_t>{0, 3, 1, 2}, i32, multiout, 0);
+            check_usecase(Shape{10, 20, 1, 2}, vector<int64_t>{0, 2, 1, 3}, i32, multiout, 0);
+            check_usecase(Shape{10, 1, 1, 1, 20}, vector<int64_t>{0, 4, 1, 2, 3}, i32, multiout, 0);
+            check_usecase(Shape{10, 20, 1, 1, 1}, vector<int64_t>{0, 2, 3, 4, 1}, i32, multiout, 0);
+            check_usecase(Shape{10, 1, 1, 1, 1}, vector<int64_t>{1, 4, 2, 3, 0}, i32, multiout, 0);
+            check_usecase(Shape{10, 1, 1, 1, 1}, vector<int64_t>{4, 2, 0, 1, 3}, i32, multiout, 0);
+            check_usecase(Shape{10, 20, 1, 2}, vector<int64_t>{0, 2, 3, 1}, i32, multiout, 1);
+            check_usecase(Shape{10, 20, 1, 2}, vector<int64_t>{0, 3, 1, 2}, i32, multiout, 1);
+            check_usecase(Shape{10, 20}, vector<int64_t>{1, 0}, i32, multiout, 1);
+
+            check_usecase(PartialShape{Dimension::dynamic(), 20, 1, 1},
+                          vector<int64_t>{
+                                  0, 2, 3, 1,
+                          },
+                          i32,
+                          multiout,
+                          0);
+            check_usecase(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 20, 1, 1},
+                          vector<int64_t>{0, 1, 3, 2, 4},
+                          i32,
+                          multiout,
+                          0);
+            check_usecase(PartialShape{Dimension::dynamic(), Dimension::dynamic(), 20, 1, 1},
+                          vector<int64_t>{0, 2, 1, 4, 3},
+                          i32,
+                          multiout,
+                          1);
+        }
+}
diff --git a/inference-engine/tests/functional/plugin/cpu/CMakeLists.txt b/inference-engine/tests/functional/plugin/cpu/CMakeLists.txt
index 4248f702de0..c857007f60c 100644
--- a/inference-engine/tests/functional/plugin/cpu/CMakeLists.txt
+++ b/inference-engine/tests/functional/plugin/cpu/CMakeLists.txt
@@ -11,7 +11,7 @@ target_link_libraries(cpuSpecificRtInfo PRIVATE ngraph)
 set(INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} ${IE_MAIN_SOURCE_DIR}/src/mkldnn_plugin)
 set(DEPENDENCIES MKLDNNPlugin AutoPlugin)
 set(LINK_LIBRARIES funcSharedTests cpuSpecificRtInfo)
-if (NGRAPH_ONNX_IMPORT_ENABLE AND NOT NGRAPH_USE_PROTOBUF_LITE)
+if (NGRAPH_ONNX_IMPORT_ENABLE)
     list(APPEND INCLUDES "${OpenVINO_SOURCE_DIR}/docs/onnx_custom_op")
     list(APPEND LINK_LIBRARIES onnx_custom_op)
     list(APPEND DEPENDENCIES template_extension onnx_custom_op)
diff --git a/inference-engine/tests/functional/plugin/cpu/extension/extension.cpp b/inference-engine/tests/functional/plugin/cpu/extension/extension.cpp
index 274aba63069..25e88eee0df 100644
--- a/inference-engine/tests/functional/plugin/cpu/extension/extension.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/extension/extension.cpp
@@ -89,7 +89,7 @@ public:
     std::shared_ptr<ngraph::Node> clone_with_new_inputs(const ngraph::OutputVector& new_args) const override {
         return std::make_shared<CustomAbs>(new_args.at(0));
     }
-    bool visit_attributes(ngraph::AttributeVisitor& visitor) override {
+    bool visit_attributes(ngraph::AttributeVisitor&) override {
         return true;
     }
 };
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/reshape_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/reshape_transformation.cpp
index 7353032f4a0..c4b2466bc21 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/reshape_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/reshape_transformation.cpp
@@ -11,14 +11,12 @@ using namespace LayerTestsDefinitions;
 
 namespace {
 const std::vector<ngraph::element::Type> netPrecisions = {
-    ngraph::element::f32
-    // ngraph::element::f16
+    ngraph::element::f32,
+    ngraph::element::f16
 };
 
 const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
-    LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams(),
-    // LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false),
-    // LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8()
+    LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams()
 };
 
 const std::vector<ReshapeTransformationParam> params = {
@@ -27,29 +25,87 @@ const std::vector<ReshapeTransformationParam> params = {
         { 1, 3, 32 },
         { 1, 3, 4, 8 },
         { 256ul, ngraph::Shape{ 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
-        true
+        "Reshape",
+        "U8"
+    },
+    // 3D -> 1D
+    {
+        { 1, 3, 32 },
+        { -1 },
+        { 256ul, ngraph::Shape{}, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
+        "Reshape",
+        "FP32"
     },
     // 4D -> 3D
     {
         { 1, 3, 16, 16 },
         { 1, 3, 256 },
         { 256ul, ngraph::Shape{ 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
-        true
+        "Reshape",
+        "U8"
     },
     // 4D -> 3D
     {
         { 1, 3, 16, 16 },
         { 0, 3, -1 },
         { 256ul, ngraph::Shape{ 1, 3, 1, 1 }, { 0.f }, { 255.f }, { 0.f, 0.f, 0.f }, { 255.f, 25.5f, 2.55f } },
-        true
+        "Reshape",
+        "U8"
     },
     // 4D -> 2D
     {
         { 1, 3, 4, 8 },
         { 1, -1 },
         { 256ul, ngraph::Shape{ 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
-        true
+        "Reshape",
+        "U8"
     },
+    // 4D -> 2D
+    {
+        { 1, 3, 4, 8 },
+        { 1, -1 },
+        {
+            256ul,
+            ngraph::Shape{ 1, 3, 1, 1 },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f/2.f, 255.f/3.f },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f/2.f, 255.f/3.f },
+        },
+        "Reshape",
+        "U8"
+    },
+    // 4D -> 3D
+    {
+        { 1, 3, 4, 8 },
+        { 1, 3, -1 },
+        {
+            256ul,
+            ngraph::Shape{ 1, 3, 1, 1 },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f/2.f, 255.f/3.f },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f/2.f, 255.f/3.f },
+        },
+        "Reshape",
+        "U8"
+    },
+    // per-channel
+    // 4D -> 3D
+    {
+        { 1, 3, 4, 8 },
+        { 1, -1, 8 },
+        {
+            256ul,
+            ngraph::Shape{ 1, 3, 1, 1 },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f/2.f, 255.f/3.f },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f/2.f, 255.f/3.f },
+        },
+        "Reshape",
+        "U8"
+    }
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_LPT, ReshapeTransformation,
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
index 98b73bfb1d4..f78179275e2 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/activation.cpp
@@ -28,6 +28,7 @@ const std::vector<InferenceEngine::Precision> intPrecisions = {
 
 const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes = {
         {Sigmoid,               {}},
+        {Tan,                   {}},
         {Tanh,                  {}},
         {Relu,                  {}},
         {Exp,                   {}},
@@ -59,7 +60,6 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
         {HSigmoid,              {}},
         {RoundHalfToEven,       {}},
         {RoundHalfAwayFromZero, {}},
-        {Erf,                   {}},
         {GeluErf,               {}},
         {GeluTanh,              {}},
         {Swish,                 {{0.4f}}}
@@ -73,9 +73,11 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> intActivationTy
         {Negative,              {}},
         {Ceiling,               {}},
         {Cos,                   {}},
+        {Cosh,                  {}},
         {Sign,                  {}},
         {Sinh,                  {}},
         {Sqrt,                  {}},
+        {Tan,                   {}},
         {Tanh,                  {}},
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/adaptive_pooling.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/adaptive_pooling.cpp
new file mode 100644
index 00000000000..cc52b6bed69
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/adaptive_pooling.cpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/adaptive_pooling.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+
+const std::vector<InferenceEngine::Precision> netPRCs = {
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::FP32
+};
+
+const auto AdaPool3DCases = ::testing::Combine(
+        ::testing::ValuesIn(
+                std::vector<std::vector<size_t>> {
+                        { 1, 2, 1},
+                        { 1, 1, 3 },
+                        { 3, 17, 5 }}),
+        ::testing::ValuesIn(std::vector<std::vector<int>>{ {1}, {3}, {5} }),
+        ::testing::ValuesIn(std::vector<std::string>{"max", "avg"}),
+        ::testing::ValuesIn(netPRCs),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_SUITE_P(smoke_TestsAdaPool3D, AdaPoolLayerTest, AdaPool3DCases, AdaPoolLayerTest::getTestCaseName);
+
+const auto AdaPool4DCases = ::testing::Combine(
+        ::testing::ValuesIn(
+                std::vector<std::vector<size_t>> {
+                        { 1, 2, 1, 2},
+                        { 1, 1, 3, 2},
+                        { 3, 17, 5, 1}}),
+        ::testing::ValuesIn(std::vector<std::vector<int>>{ {1, 1}, {3, 5}, {5, 5} }),
+        ::testing::ValuesIn(std::vector<std::string>{"max", "avg"}),
+        ::testing::ValuesIn(netPRCs),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_SUITE_P(smoke_TestsAdaPool4D, AdaPoolLayerTest, AdaPool4DCases, AdaPoolLayerTest::getTestCaseName);
+
+const auto AdaPool5DCases = ::testing::Combine(
+        ::testing::ValuesIn(
+                std::vector<std::vector<size_t>> {
+                        { 1, 2, 1, 2, 2},
+                        { 1, 1, 3, 2, 3},
+                        { 3, 17, 5, 1, 2}}),
+        ::testing::ValuesIn(std::vector<std::vector<int>>{ {1, 1, 1}, {3, 5, 3}, {5, 5, 5} }),
+        ::testing::ValuesIn(std::vector<std::string>{"max", "avg"}),
+        ::testing::ValuesIn(netPRCs),
+        ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_SUITE_P(smoke_TestsAdaPool5D, AdaPoolLayerTest, AdaPool5DCases, AdaPoolLayerTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/concat.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/concat.cpp
index e983d4a65ec..d78d6bd3950 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/concat.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/concat.cpp
@@ -11,7 +11,7 @@ using namespace LayerTestsDefinitions;
 
 namespace {
 
-std::vector<size_t >axes = {0, 1, 2, 3};
+std::vector<int> axes = {-3, -2, -1, 0, 1, 2, 3};
 std::vector<std::vector<std::vector<size_t>>> inShapes = {
         {{10, 10, 10, 10}},
         {{10, 10, 10, 10}, {10, 10, 10, 10}},
@@ -36,4 +36,4 @@ INSTANTIATE_TEST_SUITE_P(smoke_NoReshape, ConcatLayerTest,
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU)),
                         ConcatLayerTest::getTestCaseName);
 
-}  // namespace
\ No newline at end of file
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
index 1b16a37812c..9a31f695f4f 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
@@ -42,7 +42,7 @@ const auto fqParams = ::testing::Combine(
         ::testing::ValuesIn(broadcasts)
 );
 
-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTestRevise,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTest,
                         ::testing::Combine(
                                 fqParams,
                                 ::testing::ValuesIn(netPrecisions),
@@ -53,7 +53,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTestRevise,
                                 ::testing::ValuesIn(inputShapes),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                 ::testing::Values(config)),
-                        FakeQuantizeLayerTestRevise::getTestCaseName);
+                        FakeQuantizeLayerTest::getTestCaseName);
 
 
 const std::vector<size_t> singleShape = {3, 4, 2, 5};
@@ -65,7 +65,7 @@ const auto noneBroadcastFqParams = ::testing::Combine(
         ::testing::Values(noneBroadcast)
 );
 
-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeNoneBroadcast, FakeQuantizeLayerTestRevise,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeNoneBroadcast, FakeQuantizeLayerTest,
                         ::testing::Combine(
                                 noneBroadcastFqParams,
                                 ::testing::ValuesIn(netPrecisions),
@@ -76,7 +76,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeNoneBroadcast, FakeQuantizeLayerTestR
                                 ::testing::Values(singleShape),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                 ::testing::Values(config)),
-                        FakeQuantizeLayerTestRevise::getTestCaseName);
+                        FakeQuantizeLayerTest::getTestCaseName);
 
 const std::vector<std::vector<size_t>> inputShapesPerChannel = {{11, 10, 22, 19}, {11, 10, 5, 6}};
 const std::vector<std::vector<size_t>> constShapesPerChannelAxis0 = {{11, 1, 1, 1}};
@@ -98,7 +98,7 @@ const auto fqParamsPerChannelAxis1 = ::testing::Combine(
         ::testing::Values(numpyBroadcast)
 );
 
-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis0, FakeQuantizeLayerTestRevise,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis0, FakeQuantizeLayerTest,
                         ::testing::Combine(
                                 fqParamsPerChannelAxis0,
                                 ::testing::ValuesIn(netPrecisions),
@@ -109,9 +109,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis0, FakeQuantizeLayerTes
                                 ::testing::ValuesIn(inputShapesPerChannel),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                 ::testing::Values(config)),
-                        FakeQuantizeLayerTestRevise::getTestCaseName);
+                        FakeQuantizeLayerTest::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis1, FakeQuantizeLayerTestRevise,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis1, FakeQuantizeLayerTest,
                         ::testing::Combine(
                                 fqParamsPerChannelAxis1,
                                 ::testing::ValuesIn(netPrecisions),
@@ -122,7 +122,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannelAxis1, FakeQuantizeLayerTes
                                 ::testing::ValuesIn(inputShapesPerChannel),
                                 ::testing::Values(CommonTestUtils::DEVICE_CPU),
                                 ::testing::Values(config)),
-                        FakeQuantizeLayerTestRevise::getTestCaseName);
+                        FakeQuantizeLayerTest::getTestCaseName);
 
 const std::vector<std::vector<size_t>> inputShapesPerChannel2D = {{1, 10}};
 const std::vector<std::vector<size_t>> constShapesPerChannel2D = { {10}, {1, 10}, {1} };
@@ -134,7 +134,7 @@ const auto fqParamsPerChannel2D = ::testing::Combine(
     ::testing::Values(numpyBroadcast)
 );
 
-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannel2D, FakeQuantizeLayerTestRevise,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannel2D, FakeQuantizeLayerTest,
     ::testing::Combine(
         fqParamsPerChannel2D,
         ::testing::ValuesIn(netPrecisions),
@@ -145,6 +145,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizePerChannel2D, FakeQuantizeLayerTestRe
         ::testing::ValuesIn(inputShapesPerChannel2D),
         ::testing::Values(CommonTestUtils::DEVICE_CPU),
         ::testing::Values(config)),
-    FakeQuantizeLayerTestRevise::getTestCaseName);
+    FakeQuantizeLayerTest::getTestCaseName);
 
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather_elements.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather_elements.cpp
index 1ad8bbd0d4c..0220364af31 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather_elements.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/gather_elements.cpp
@@ -4,7 +4,8 @@
 
 #include <vector>
 
-#include "shared_test_classes/single_layer/gather_elements.hpp"
+#include "single_layer_tests/gather_elements.hpp"
+#include "common_test_utils/test_constants.hpp"
 
 using namespace LayerTestsDefinitions;
 
@@ -16,8 +17,6 @@ const std::vector<InferenceEngine::Precision> dPrecisions = {
         InferenceEngine::Precision::I32,
         InferenceEngine::Precision::I64,
         InferenceEngine::Precision::I16,
-        InferenceEngine::Precision::U8,
-        InferenceEngine::Precision::I8
 };
 const std::vector<InferenceEngine::Precision> iPrecisions = {
         InferenceEngine::Precision::I32,
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mat_mul.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mat_mul.cpp
index 680276b2f72..3241ebef007 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mat_mul.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mat_mul.cpp
@@ -18,6 +18,8 @@ const std::vector<ShapeRelatedParams> shapeRelatedParams = {
         { { {1, 4, 5, 6}, false }, { {1, 4, 6, 4}, false } },
         { { {4, 5, 6}, false }, { {6, 3}, false } },
         { { {9, 9, 9}, false }, { {9, 9}, false } },
+        { { {1, 2, 3}, false }, { {1, 10, 3}, true } },
+        { { {1, 2, 3}, false }, { {1, 3, 10}, false } },
         { { {1, 2, 3}, false }, { {1, 1, 3, 2}, false } },
         { { {1, 3, 2, 4}, false }, { {2, 1, 4, 2}, false } },
         { { {2, 1, 2, 4}, false }, { {1, 3, 4, 2}, false } },
@@ -30,7 +32,7 @@ const std::vector<ShapeRelatedParams> shapeRelatedParams = {
         { { {2, 2, 1, 3}, false }, { {3}, false } },
         { { {1, 5}, false }, { {5, 1}, false } },
         { { {5, 1}, true }, { {5, 1}, false } },
-        { { {1, 5}, false }, { {1, 5}, true } },
+        { { {1, 5}, false }, { {10, 5}, true } },
         { { {1, 5}, false }, { {5}, false } },
         { { {5}, false }, { {5, 1}, false } },
         { { {5}, false }, { {5}, false } },
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp
new file mode 100644
index 00000000000..25766a89fc8
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/matrix_nms.cpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <tuple>
+
+#include "single_layer_tests/matrix_nms.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace InferenceEngine;
+using namespace ngraph;
+
+const std::vector<InputShapeParams> inShapeParams = {
+    InputShapeParams{3, 100, 5},
+    InputShapeParams{1, 10, 50},
+    InputShapeParams{2, 50, 50}
+};
+
+const std::vector<op::v8::MatrixNms::SortResultType> sortResultType = {op::v8::MatrixNms::SortResultType::CLASSID,
+                                                                       op::v8::MatrixNms::SortResultType::SCORE,
+                                                                       op::v8::MatrixNms::SortResultType::NONE};
+const std::vector<element::Type> outType = {element::i32, element::i64};
+const std::vector<TopKParams> topKParams = {
+    TopKParams{-1, 5},
+    TopKParams{100, -1}
+};
+const std::vector<ThresholdParams> thresholdParams = {
+    ThresholdParams{0.0f, 2.0f, 0.0f},
+    ThresholdParams{0.1f, 1.5f, 0.2f}
+};
+const std::vector<int> nmsTopK = {-1, 100};
+const std::vector<int> keepTopK = {-1, 5};
+const std::vector<int> backgroudClass = {-1, 0};
+const std::vector<bool> normalized = {true, false};
+const std::vector<op::v8::MatrixNms::DecayFunction> decayFunction = {op::v8::MatrixNms::DecayFunction::GAUSSIAN,
+                                                op::v8::MatrixNms::DecayFunction::LINEAR};
+
+const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
+                                          ::testing::Combine(::testing::Values(Precision::FP32),
+                                                             ::testing::Values(Precision::I32),
+                                                             ::testing::Values(Precision::FP32)),
+                                          ::testing::ValuesIn(sortResultType),
+                                          ::testing::ValuesIn(outType),
+                                          ::testing::ValuesIn(topKParams),
+                                          ::testing::ValuesIn(thresholdParams),
+                                          ::testing::ValuesIn(backgroudClass),
+                                          ::testing::ValuesIn(normalized),
+                                          ::testing::ValuesIn(decayFunction),
+                                          ::testing::Values(CommonTestUtils::DEVICE_CPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_MatrixNmsLayerTest, MatrixNmsLayerTest, nmsParams, MatrixNmsLayerTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp
new file mode 100644
index 00000000000..6622a24ce3d
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/multiclass_nms.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/multiclass_nms.hpp"
+
+#include <vector>
+
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace InferenceEngine;
+using namespace ngraph;
+
+const std::vector<InputShapeParams> inShapeParams = {InputShapeParams {3, 100, 5}, InputShapeParams {1, 10, 50}, InputShapeParams {2, 50, 50}};
+
+const std::vector<int32_t> nmsTopK = {-1, 20};
+const std::vector<float> iouThreshold = {0.7f};
+const std::vector<float> scoreThreshold = {0.7f};
+const std::vector<int32_t> backgroundClass = {-1, 0};
+const std::vector<int32_t> keepTopK = {-1, 30};
+const std::vector<element::Type> outType = {element::i32, element::i64};
+
+const std::vector<op::v8::MulticlassNms::SortResultType> sortResultType = {
+    op::v8::MulticlassNms::SortResultType::SCORE, op::v8::MulticlassNms::SortResultType::CLASSID, op::v8::MulticlassNms::SortResultType::NONE};
+const std::vector<bool> sortResDesc = {true, false};
+const std::vector<float> nmsEta = {0.6f, 1.0f};
+const std::vector<bool> normalized = {true, false};
+
+const auto nmsParams = ::testing::Combine(
+    ::testing::ValuesIn(inShapeParams),
+    ::testing::Combine(::testing::Values(Precision::FP32), ::testing::Values(Precision::I32), ::testing::Values(Precision::FP32)), ::testing::ValuesIn(nmsTopK),
+    ::testing::Combine(::testing::ValuesIn(iouThreshold), ::testing::ValuesIn(scoreThreshold), ::testing::ValuesIn(nmsEta)),
+    ::testing::ValuesIn(backgroundClass), ::testing::ValuesIn(keepTopK), ::testing::ValuesIn(outType), ::testing::ValuesIn(sortResultType),
+    ::testing::Combine(::testing::ValuesIn(sortResDesc), ::testing::ValuesIn(normalized)), ::testing::Values(CommonTestUtils::DEVICE_CPU));
+
+INSTANTIATE_TEST_CASE_P(smoke_MulticlassNmsLayerTest, MulticlassNmsLayerTest, nmsParams, MulticlassNmsLayerTest::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/normalize_l2.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/normalize_l2.cpp
index 15b5b453642..b8fbeb414e2 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/normalize_l2.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/normalize_l2.cpp
@@ -75,9 +75,7 @@ const std::vector<std::vector<int64_t>> axes_3D = {
         {},
         {1},
         {1, 2},
-
-        // [CPU] Unsorted axes, Issue: 59794
-        // {2, 1},
+        {2, 1},
 
         // [CPU] Unsupported axes, Issue: 59791
         // {0},
@@ -107,9 +105,7 @@ const std::vector<std::vector<int64_t>> axes_4D = {
         {},
         {1},
         {1, 2, 3},
-
-        // [CPU] Unsorted axes, Issue: 59794
-        // {3, 1, 2},
+        {3, 1, 2},
 
         // [CPU] Unsupported axes, Issue: 59791
         // {0},
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index 296be4032b4..8019fc072a8 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -61,6 +61,8 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*NormalizeL2_5D.*)",
         // Issue: 59788. mkldnn_normalize_nchw applies eps after sqrt for across_spatial
         R"(.*NormalizeL2_.*axes=\(1.2.*_eps=100.*)",
+        R"(.*NormalizeL2_.*axes=\(2.1.*_eps=100.*)",
+        R"(.*NormalizeL2_.*axes=\(3.1.2.*_eps=100.*)",
 
         // Unsupported operation of type: NormalizeL2 name : Doesn't support reduction axes: (2.2)
         R"(.*BF16NetworkRestore1.*)",
@@ -73,7 +75,9 @@ std::vector<std::string> disabledTestPatterns() {
         // TODO: 57562 No dynamic output shape support
         R"(.*NonZeroLayerTest.*)",
         // need to implement Export / Import
-        R"(.*IEClassImportExportTestP.*)"
+        R"(.*IEClassImportExportTestP.*)",
+        // azure is failing after #6199
+        R"(.*/NmsLayerTest.*)"
     };
 #ifdef __APPLE__
         // TODO: Issue 55717
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/adaptive_pooling.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/adaptive_pooling.cpp
new file mode 100644
index 00000000000..e8232476ec4
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/adaptive_pooling.cpp
@@ -0,0 +1,362 @@
+/// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils/cpu_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace CPULayerTestsDefinitions {
+namespace {
+    std::vector<int> pooledSpatialShape;
+    std::string mode;
+    std::vector<size_t> inputShape;
+}  // namespace
+
+typedef std::tuple<
+        std::vector<int>,        // pooled vector
+        std::vector<size_t>      // feature map shape
+> AdaPoolSpecificParams;
+
+typedef std::tuple<
+        AdaPoolSpecificParams,
+        std::string,                        // mode
+        InferenceEngine::Precision,         // Net precision
+        LayerTestsUtils::TargetDevice       // Device name
+> AdaPoolLayerTestParams;
+
+typedef std::tuple<
+        CPULayerTestsDefinitions::AdaPoolLayerTestParams,
+        CPUSpecificParams> AdaPoolLayerCPUTestParamsSet;
+
+class AdaPoolLayerCPUTest : public testing::WithParamInterface<AdaPoolLayerCPUTestParamsSet>,
+                            virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<AdaPoolLayerCPUTestParamsSet> obj) {
+        CPULayerTestsDefinitions::AdaPoolLayerTestParams basicParamsSet;
+        CPUSpecificParams cpuParams;
+        std::tie(basicParamsSet, cpuParams) = obj.param;
+        std::string td;
+        Precision netPr;
+        AdaPoolSpecificParams adaPar;
+        std::tie(adaPar, mode, netPr, td) = basicParamsSet;
+        std::tie(pooledSpatialShape, inputShape) = adaPar;
+        std::ostringstream result;
+        result << "AdaPoolTest_";
+        result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
+        result << "OS=" << CommonTestUtils::vec2str(pooledSpatialShape) << "(spat.)_";
+        result << netPr.name() << "_";
+        result << mode << "_";
+        result << CPUTestsBase::getTestCaseName(cpuParams) << "_";
+        result << std::to_string(obj.index);
+        return result.str();
+    }
+protected:
+    void SetUp() override {
+        CPULayerTestsDefinitions::AdaPoolLayerTestParams basicParamsSet;
+        CPUSpecificParams cpuParams;
+        std::tie(basicParamsSet, cpuParams) = this->GetParam();
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+
+        CPULayerTestsDefinitions::AdaPoolSpecificParams adaPoolParams;
+        auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
+        std::tie(adaPoolParams, mode,  netPrecision, targetDevice) = basicParamsSet;
+        inPrc = outPrc = netPrecision;
+        std::tie(pooledSpatialShape, inputShape) = adaPoolParams;
+
+        ngraph::Shape coordsShape = {pooledSpatialShape.size() };
+        auto pooledParam = ngraph::builder::makeConstant<int32_t>(ngraph::element::i32, coordsShape, pooledSpatialShape);
+        auto params = ngraph::builder::makeParams(ngraph::element::f32, {inputShape});
+
+        // we cannot create abstract Op to use polymorphism
+        auto adapoolMax = std::make_shared<ngraph::opset8::AdaptiveMaxPool>(params[0], pooledParam, ngraph::element::i32);
+        adapoolMax->get_rt_info() = getCPUInfo();
+        auto adapoolAvg = std::make_shared<ngraph::opset8::AdaptiveAvgPool>(params[0], pooledParam);
+        adapoolAvg->get_rt_info() = getCPUInfo();
+
+        selectedType = std::string("unknown_FP32");
+        threshold = 1e-2;
+        function = (mode == "max" ? std::make_shared<ngraph::Function>(adapoolMax->outputs(), params, "AdaPoolMax") :
+                    std::make_shared<ngraph::Function>(adapoolAvg->outputs(), params, "AdaPoolAvg"));
+    }
+};
+
+TEST_P(AdaPoolLayerCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    Run();
+    CheckPluginRelatedResults(executableNetwork, "AdaptivePooling");
+}
+
+namespace {
+
+/* CPU PARAMS */
+std::vector<CPUSpecificParams> filterCPUInfoForDevice(std::string dims = "3D", std::string modeStr = "max") {
+    std::vector<CPUSpecificParams> resCPUParams;
+    if (modeStr == "max") {
+        if (dims == "5D") {
+            resCPUParams.push_back(CPUSpecificParams{{ncdhw, x}, {ncdhw}, {}, {}});  // i.e. two equal output layouts
+            resCPUParams.push_back(CPUSpecificParams{{ndhwc, x}, {ndhwc, ncdhw}, {}, {}});
+            if (with_cpu_x86_avx512f()) {
+                resCPUParams.push_back(CPUSpecificParams{{nCdhw16c, x}, {nCdhw16c, ncdhw}, {}, {}});
+            } else if (with_cpu_x86_avx2() || with_cpu_x86_sse42()) {
+                resCPUParams.push_back(CPUSpecificParams{{nCdhw8c, x}, {nCdhw8c, ncdhw}, {}, {}});
+            }
+        } else if (dims == "4D") {
+            resCPUParams.push_back(CPUSpecificParams{{nchw, x}, {nchw}, {}, {}});  // i.e. two equal output layouts
+            resCPUParams.push_back(CPUSpecificParams{{nhwc, x}, {nhwc, nchw}, {}, {}});
+            if (with_cpu_x86_avx512f()) {
+                resCPUParams.push_back(CPUSpecificParams{{nChw16c, x}, {nChw16c, nchw}, {}, {}});
+            } else if (with_cpu_x86_avx2() || with_cpu_x86_sse42()) {
+                resCPUParams.push_back(CPUSpecificParams{{nChw8c, x}, {nChw8c, nchw}, {}, {}});
+            }
+        } else {
+            resCPUParams.push_back(CPUSpecificParams{{ncw, x}, {ncw}, {}, {}});  // i.e. two equal output layouts
+            resCPUParams.push_back(CPUSpecificParams{{nwc, x}, {nwc, ncw}, {}, {}});
+            if (with_cpu_x86_avx512f()) {
+                resCPUParams.push_back(CPUSpecificParams{{nCw16c, x}, {nCw16c, ncw}, {}, {}});
+            } else if (with_cpu_x86_avx2() || with_cpu_x86_sse42()) {
+                resCPUParams.push_back(CPUSpecificParams{{nCw8c, x}, {nCw8c, ncw}, {}, {}});
+            }
+        }
+    } else {
+        if (dims == "5D") {
+            resCPUParams.push_back(CPUSpecificParams{{ncdhw, x}, {ncdhw}, {}, {}});
+            resCPUParams.push_back(CPUSpecificParams{{ndhwc, x}, {ndhwc}, {}, {}});
+            if (with_cpu_x86_avx512f()) {
+                resCPUParams.push_back(CPUSpecificParams{{nCdhw16c, x}, {nCdhw16c}, {}, {}});
+            } else if (with_cpu_x86_avx2() || with_cpu_x86_sse42()) {
+                resCPUParams.push_back(CPUSpecificParams{{nCdhw8c, x}, {nCdhw8c}, {}, {}});
+            }
+        } else if (dims == "4D") {
+            resCPUParams.push_back(CPUSpecificParams{{nchw, x}, {nchw}, {}, {}});
+            resCPUParams.push_back(CPUSpecificParams{{nhwc, x}, {nhwc}, {}, {}});
+            if (with_cpu_x86_avx512f()) {
+                resCPUParams.push_back(CPUSpecificParams{{nChw16c, x}, {nChw16c}, {}, {}});
+            } else if (with_cpu_x86_avx2() || with_cpu_x86_sse42()) {
+                resCPUParams.push_back(CPUSpecificParams{{nChw8c, x}, {nChw8c}, {}, {}});
+            }
+        } else {
+            resCPUParams.push_back(CPUSpecificParams{{ncw, x}, {ncw}, {}, {}});
+            resCPUParams.push_back(CPUSpecificParams{{nwc, x}, {nwc}, {}, {}});
+            if (with_cpu_x86_avx512f()) {
+                resCPUParams.push_back(CPUSpecificParams{{nCw16c, x}, {nCw16c}, {}, {}});
+            } else if (with_cpu_x86_avx2() || with_cpu_x86_sse42()) {
+                resCPUParams.push_back(CPUSpecificParams{{nCw8c, x}, {nCw8c}, {}, {}});
+            }
+        }
+    }
+    return resCPUParams;
+}
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::BF16
+};
+
+const std::vector<std::vector<int>> pooled3DVector = {
+        { 1 },
+        { 3 },
+        { 5 }
+};
+const std::vector<std::vector<int>> pooled4DVector = {
+        { 1, 1 },
+        { 3, 5 },
+        { 5, 5 }
+};
+
+const std::vector<std::vector<int>> pooled5DVector = {
+        { 1, 1, 1 },
+        { 3, 5, 1 },
+        { 3, 5, 3 },
+};
+
+const std::vector<std::vector<size_t>> input3DShapeVector = {
+        SizeVector({ 1, 17, 3 }),
+        SizeVector({ 3, 17, 5 }),
+};
+
+const std::vector<std::vector<size_t>> input4DShapeVector = {
+        SizeVector({ 1, 3, 1, 1 }),
+        SizeVector({ 3, 17, 5, 2 }),
+};
+
+const std::vector<std::vector<size_t>> input5DShapeVector = {
+        SizeVector({ 1, 17, 2, 5, 2 }),
+        SizeVector({ 3, 17, 4, 5, 4 }),
+};
+
+const auto adaPool3DParams = ::testing::Combine(
+        ::testing::ValuesIn(pooled3DVector),         // output spatial shape
+        ::testing::ValuesIn(input3DShapeVector)     // feature map shape
+);
+
+const auto adaPool4DParams = ::testing::Combine(
+        ::testing::ValuesIn(pooled4DVector),         // output spatial shape
+        ::testing::ValuesIn(input4DShapeVector)     // feature map shape
+);
+
+const auto adaPool5DParams = ::testing::Combine(
+        ::testing::ValuesIn(pooled5DVector),         // output spatial shape
+        ::testing::ValuesIn(input5DShapeVector)     // feature map shape
+);
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPoolAvg3DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         adaPool3DParams,
+                                         ::testing::Values("avg"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfoForDevice("3D", "avg"))),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPoolAvg4DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         adaPool4DParams,
+                                         ::testing::Values("avg"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfoForDevice("4D", "avg"))),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPoolAvg5DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         adaPool5DParams,
+                                         ::testing::Values("avg"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfoForDevice("5D", "avg"))),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPoolMax3DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         adaPool3DParams,
+                                         ::testing::Values("max"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfoForDevice("3D", "max"))),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPoolMax4DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         adaPool4DParams,
+                                         ::testing::Values("max"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfoForDevice("4D", "max"))),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPoolMax5DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         adaPool5DParams,
+                                         ::testing::Values("max"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::ValuesIn(filterCPUInfoForDevice("5D", "max"))),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+// in 1-channel cases  {..., 1, 1, 1} shape cannot be correctly resolved on oneDnn level, so it was removed from instances
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPool_1ch_Avg3DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         ::testing::Combine(
+                                                 ::testing::ValuesIn(std::vector<std::vector<int>> {
+                                                         {1}, {2}}),
+                                                 ::testing::ValuesIn(std::vector<std::vector<size_t>> {
+                                                         SizeVector{1, 1, 2}, SizeVector{2, 1, 2}})),
+                                         ::testing::Values("avg"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::Values(CPUSpecificParams{{ncw, x}, {ncw}, {}, {}})),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPool_1ch_Avg4DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         ::testing::Combine(
+                                                 ::testing::ValuesIn(std::vector<std::vector<int>> {
+                                                         {1, 1},
+                                                         {2, 2}
+                                                 }),
+                                                 ::testing::ValuesIn(std::vector<std::vector<size_t>> {
+                                                         SizeVector{1, 1, 1, 2},
+                                                         SizeVector{2, 1, 2, 1}
+                                                 })),
+                                         ::testing::Values("avg"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::Values(CPUSpecificParams{{nchw, x}, {nchw}, {}, {}})),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPool_1ch_Avg5DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         ::testing::Combine(
+                                                 ::testing::ValuesIn(std::vector<std::vector<int>> {
+                                                         {1, 1, 1}, {2, 2, 2}}),
+                                                 ::testing::ValuesIn(std::vector<std::vector<size_t>> {
+                                                         SizeVector{1, 1, 1, 1, 2}, SizeVector{2, 1, 1, 2, 1}})),
+                                         ::testing::Values("avg"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::Values(CPUSpecificParams{{ncdhw, x}, {ncdhw}, {}, {}})),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPool_1ch_Max3DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         ::testing::Combine(
+                                                 ::testing::ValuesIn(std::vector<std::vector<int>> {
+                                                         {1}, {2}}),
+                                                 ::testing::ValuesIn(std::vector<std::vector<size_t>> {
+                                                         SizeVector{1, 1, 2}, SizeVector{2, 1, 2}})),
+                                         ::testing::Values("max"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::Values(CPUSpecificParams{{ncw, x}, {ncw}, {}, {}})),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPool_1ch_Max4DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         ::testing::Combine(
+                                                 ::testing::ValuesIn(std::vector<std::vector<int>> {
+                                                         {1, 1}, {2, 2}}),
+                                                 ::testing::ValuesIn(std::vector<std::vector<size_t>> {
+                                                         SizeVector{1, 1, 1, 2}, SizeVector{2, 1, 2, 1}})),
+                                         ::testing::Values("max"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::Values(CPUSpecificParams{{nchw, x}, {nchw}, {}, {}})),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_AdaPool_1ch_Max5DLayoutTest, AdaPoolLayerCPUTest,
+                         ::testing::Combine(
+                                 ::testing::Combine(
+                                         ::testing::Combine(
+                                                 ::testing::ValuesIn(std::vector<std::vector<int>> {
+                                                         {1, 1, 1},
+                                                         {2, 2, 2}
+                                                 }),
+                                                 ::testing::ValuesIn(std::vector<std::vector<size_t>> {
+                                                         SizeVector{1, 1, 1, 1, 2},
+                                                         SizeVector{2, 1, 1, 2, 1}
+                                                 })),
+                                         ::testing::Values("max"),
+                                         ::testing::ValuesIn(netPrecisions),
+                                         ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                 ::testing::Values(CPUSpecificParams{{ncdhw, x}, {ncdhw}, {}, {}})),
+                         AdaPoolLayerCPUTest::getTestCaseName);
+
+} // namespace
+} // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp
index c2285e5f8cf..5d6b7061882 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp
@@ -68,6 +68,7 @@ protected:
         MatMulNodeType nodeType;
         fusingSpecificParams fusingParams;
         std::tie(basicParamsSet, nodeType, fusingParams) = this->GetParam();
+        std::tie(postOpMgrPtr, fusedOps) = fusingParams;
 
         cpuNodeType = nodeType == MatMulNodeType::MatMul ? "MatMul" : "FullyConnected";
 
@@ -120,7 +121,7 @@ namespace fullyConnected {
 
 const auto fusingBiasFC = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
             {[](std::shared_ptr<Node> inpNode, const element::Type& ngPrc, ParameterVector& params) {
-                auto bias = builder::makeConstant(ngPrc, Shape({inpNode->get_input_shape(1).back()}), std::vector<float>{}, true);
+                auto bias = builder::makeConstant(ngPrc, Shape({inpNode->get_output_shape(0).back()}), std::vector<float>{}, true);
                 return std::make_shared<opset1::Add>(inpNode, bias);
             }, "fusingBiasFC"}}), {"Add"}};
 
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp
index 17e9b24f76b..401c00d5e37 100755
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp
@@ -130,6 +130,7 @@ const std::vector<std::vector<size_t>> inputShape_3D = {
 
 const std::vector<std::vector<int64_t>> axes_3D = {
     {1, 2},
+    {2, 1},
     {1}
 };
 
@@ -155,6 +156,7 @@ const std::vector<std::vector<size_t>> inputShape_4D = {
 
 const std::vector<std::vector<int64_t>> axes_4D = {
     {1, 2, 3},
+    {3, 1, 2},
     {1}
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp
index 094a9f4b507..869818e7d1b 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp
@@ -10,9 +10,11 @@ using namespace InferenceEngine;
 using namespace CPUTestUtils;
 
 namespace CPULayerTestsDefinitions {
+enum ProposalGenerationMode { RANDOM, ULTIMATE_RIGHT_BORDER };
 
 using ROIPoolingCPUTestParamsSet = std::tuple<LayerTestsDefinitions::roiPoolingParamsTuple,
                                               CPUSpecificParams,
+                                              ProposalGenerationMode,
                                               std::map<std::string, std::string>>;
 
 class ROIPoolingCPULayerTest : public testing::WithParamInterface<ROIPoolingCPUTestParamsSet>,
@@ -22,9 +24,10 @@ public:
     static std::string getTestCaseName(testing::TestParamInfo<ROIPoolingCPUTestParamsSet> obj) {
         LayerTestsDefinitions::roiPoolingParamsTuple basicParamsSet;
         CPUSpecificParams cpuParams;
+        ProposalGenerationMode propMode;
         std::map<std::string, std::string> additionalConfig;
 
-        std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
+        std::tie(basicParamsSet, cpuParams, propMode, additionalConfig) = obj.param;
         std::ostringstream result;
 
         result << LayerTestsDefinitions::ROIPoolingLayerTest::getTestCaseName(
@@ -38,6 +41,15 @@ public:
                     result << "_" << item.first << "=" << item.second;
             }
         }
+        switch (propMode) {
+            case ProposalGenerationMode::ULTIMATE_RIGHT_BORDER:
+                result << "_UltimateRightBorderProposal";
+                break;
+            case ProposalGenerationMode::RANDOM:
+            default:
+                result << "_RandomProposal";
+                break;
+        }
 
         return result.str();
     }
@@ -55,6 +67,28 @@ protected:
         for (const auto &input : cnnNetwork.getInputsInfo()) {
             const auto &info = input.second;
             InferenceEngine::Blob::Ptr blob;
+            void (*propGenerator)(InferenceEngine::Blob::Ptr &);
+            switch (propMode) {
+                case ULTIMATE_RIGHT_BORDER:
+                    // because of nonalgebraic character of floating point operation, the following values causes inequity:
+                    // ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) * (pooled_h - 1) > (end_h - start_h) * (input_h - 1)
+                    // and as result excess of right limit for proposal value if the border case (current_h == pooled_h - 1)
+                    // will not be handled explicitly
+                    propGenerator = [](InferenceEngine::Blob::Ptr &blob) {
+                        auto *data = blob->buffer().as<float *>();
+                        for (size_t i = 0; i < blob->size(); i += 5) {
+                            data[i] = 0;
+                            data[i + 1] = 0.f;
+                            data[i + 2] = 0.248046786f;
+                            data[i + 3] = 0.471333951f;
+                            data[i + 4] = 1.f;
+                        }
+                    };
+                    break;
+                case RANDOM:
+                default:
+                    propGenerator = nullptr;
+            }
 
             if (it == 1) {
                 blob = make_blob_with_precision(info->getTensorDesc());
@@ -62,12 +96,12 @@ protected:
                 switch (inPrc) {
                 case Precision::FP32: {
                     CommonTestUtils::fill_data_roi<Precision::FP32>
-                        (blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode);
+                        (blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode, 1, propGenerator);
                     break;
                 }
                 case Precision::BF16: {
                     CommonTestUtils::fill_data_roi<Precision::BF16>
-                        (blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode);
+                        (blob, feat_map_shape[0] - 1, height, width, 1.0f, is_roi_max_mode, 1, propGenerator);
                     break;
                 }
                 default:
@@ -92,7 +126,7 @@ protected:
         InferenceEngine::SizeVector poolShape;
         InferenceEngine::Precision netPrecision;
 
-        std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
+        std::tie(basicParamsSet, cpuParams, propMode, additionalConfig) = this->GetParam();
         std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
         std::tie(inputShape, coordsShape, poolShape, spatial_scale, pool_method, netPrecision, targetDevice) = basicParamsSet;
 
@@ -118,6 +152,7 @@ protected:
 private:
     ngraph::helpers::ROIPoolingTypes pool_method;
     float spatial_scale;
+    ProposalGenerationMode propMode;
 };
 
 TEST_P(ROIPoolingCPULayerTest, CompareWithRefs) {
@@ -190,6 +225,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ROIPoolingCPU_max,
                         ROIPoolingCPULayerTest,
                         ::testing::Combine(test_ROIPooling_max,
                                            ::testing::ValuesIn(selectCPUInfoForDevice()),
+                                           ::testing::Values(ProposalGenerationMode::RANDOM),
                                            ::testing::ValuesIn(additionalConfig)),
                         ROIPoolingCPULayerTest::getTestCaseName);
 
@@ -197,7 +233,23 @@ INSTANTIATE_TEST_SUITE_P(smoke_ROIPoolingCPU_bilinear,
                         ROIPoolingCPULayerTest,
                         ::testing::Combine(test_ROIPooling_bilinear,
                                            ::testing::ValuesIn(selectCPUInfoForDevice()),
+                                           ::testing::Values(ProposalGenerationMode::RANDOM),
                                            ::testing::ValuesIn(additionalConfig)),
                         ROIPoolingCPULayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_ROIPoolingCPU_bilinear_ultimateRightBorderProposal,
+                        ROIPoolingCPULayerTest,
+                        ::testing::Combine(::testing::Combine(::testing::Values(std::vector<size_t> { 1, 1, 50, 50 }),
+                                                              ::testing::Values(std::vector<size_t> { 1, 5 }),
+                                                              ::testing::Values(std::vector<size_t> { 4, 4 }),
+                                                              ::testing::Values(spatial_scales[1]),
+                                                              ::testing::Values(ngraph::helpers::ROIPoolingTypes::ROI_BILINEAR),
+                                                              ::testing::Values(InferenceEngine::Precision::FP32),
+                                                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                           ::testing::ValuesIn(selectCPUInfoForDevice()),
+                                           ::testing::Values(ProposalGenerationMode::ULTIMATE_RIGHT_BORDER),
+                                           ::testing::Values(std::map<std::string, std::string>{
+                                               {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}})),
+                        ROIPoolingCPULayerTest::getTestCaseName);
 } // namespace
 } // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv_concat.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv_concat.cpp
index aef68679e83..dd31d2ed847 100644
--- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv_concat.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/conv_concat.cpp
@@ -115,6 +115,7 @@ TEST_P(ConvConcatSubgraphTest, CompareWithRefs) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
 
     Run();
+
     CheckPluginRelatedResults(executableNetwork, pluginTypeNode);
 };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/not_fused_conv_simple_op.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/not_fused_conv_simple_op.cpp
new file mode 100644
index 00000000000..f7a9f2f1aa8
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/not_fused_conv_simple_op.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph_functions/builders.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+
+using namespace ngraph;
+using ngraph::helpers::EltwiseTypes;
+
+namespace SubgraphTestsDefinitions {
+
+class NotFusedConvSimpleOp : public LayerTestsUtils::LayerTestsCommon {
+protected:
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
+        auto inputParams = builder::makeParams(element::f32, {{1, 3, 12, 9}, {1, 16, 12, 9}});
+        auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(inputParams));
+
+        std::shared_ptr<Node> conv;
+        {
+            const std::vector<size_t> kernelSize = {3, 3};
+            const std::vector<size_t> strides = {1, 1};
+            const std::vector<ptrdiff_t> padBegin = {0, 0};
+            const std::vector<ptrdiff_t> padEnd = {0, 0};
+            const std::vector<size_t> dilation = {1, 1};
+            const size_t numOutChannels = 16;
+            const op::PadType paddingType = op::PadType::EXPLICIT;
+            conv = builder::makeConvolution(paramOuts[0], element::f32, kernelSize, strides, padBegin, padEnd, dilation, paddingType, numOutChannels);
+        }
+        const auto sharedNode = builder::makeConstant(element::f32, {1, 16, 1, 1}, std::vector<float>{}, true);
+        const auto postOpCandidate = builder::makeEltwise(conv, sharedNode, EltwiseTypes::ADD);
+
+        const auto secondConsumpt = builder::makeEltwise(paramOuts[1], sharedNode, EltwiseTypes::ADD);
+
+        NodeVector results{postOpCandidate, secondConsumpt};
+        function = std::make_shared<ngraph::Function>(results, inputParams, "NotFusedConvSimpleOp");
+    }
+};
+
+TEST_F(NotFusedConvSimpleOp, smoke_CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+}
+
+} // namespace SubgraphTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
index 2c5e052f6d7..39bae3546ee 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
@@ -12,6 +12,10 @@ const char *CPUTestsBase::cpu_fmt2str(cpu_memory_format_t v) {
     if (v == _fmt) return #_fmt; \
 } while (0)
     CASE(undef);
+    CASE(ncw);
+    CASE(nCw8c);
+    CASE(nCw16c);
+    CASE(nwc);
     CASE(nchw);
     CASE(nChw8c);
     CASE(nChw16c);
@@ -44,6 +48,10 @@ cpu_memory_format_t CPUTestsBase::cpu_str2fmt(const char *str) {
     CASE(undef);
     CASE(a);
     CASE(ab);
+    CASE(abc);
+    CASE(acb);
+    CASE(aBc8b);
+    CASE(aBc16b);
     CASE(abcd);
     CASE(acdb);
     CASE(aBcd8b);
@@ -52,10 +60,13 @@ cpu_memory_format_t CPUTestsBase::cpu_str2fmt(const char *str) {
     CASE(acdeb);
     CASE(aBcde8b);
     CASE(aBcde16b);
-    CASE(abc);
     CASE(bac);
     CASE(abdc);
     CASE(abdec);
+    CASE(ncw);
+    CASE(nCw8c);
+    CASE(nCw16c);
+    CASE(nwc);
     CASE(nchw);
     CASE(nChw8c);
     CASE(nChw16c);
@@ -129,7 +140,7 @@ void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork
         // skip policy
         auto should_be_skipped = [] (const ngraph::Shape &shape, cpu_memory_format_t fmt) {
             bool skip_unsquized_1D =  std::count(shape.begin(), shape.end(), 1) == shape.size() - 1;
-            bool permule_of_1 = (fmt == cpu_memory_format_t::nhwc || fmt == cpu_memory_format_t::ndhwc) && shape[1] == 1;
+            bool permule_of_1 = (fmt == cpu_memory_format_t::nhwc || fmt == cpu_memory_format_t::ndhwc || fmt == cpu_memory_format_t::nwc) && shape[1] == 1;
             return skip_unsquized_1D || permule_of_1;
         };
 
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
index 6df8b355c80..f6b89c6ee54 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
@@ -16,6 +16,9 @@ namespace CPUTestUtils {
         undef,
         a,
         ab,
+        acb,
+        aBc8b,
+        aBc16b,
         abcd,
         acdb,
         aBcd8b,
@@ -32,14 +35,18 @@ namespace CPUTestUtils {
 
         x = a,
         nc = ab,
+        ncw = abc,
         nchw = abcd,
+        ncdhw = abcde,
+        nwc = acb,
+        nhwc = acdb,
+        ndhwc = acdeb,
+        nCw8c = aBc8b,
+        nCw16c = aBc16b,
         nChw8c = aBcd8b,
         nChw16c = aBcd16b,
-        nhwc = acdb,
-        ncdhw = abcde,
         nCdhw8c = aBcde8b,
         nCdhw16c = aBcde16b,
-        ndhwc = acdeb,
         // RNN layouts
         tnc = abc,
         /// 3D RNN data tensor in the format (batch, seq_length, input channels).
diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/decompose_2d_conv.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/decompose_2d_conv.cpp
new file mode 100644
index 00000000000..f193f4e4608
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/decompose_2d_conv.cpp
@@ -0,0 +1,316 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/test_common.hpp"
+#include <string>
+#include <sstream>
+#include <fstream>
+#include <memory>
+#include <queue>
+#include <map>
+
+#include "transformations/init_node_info.hpp"
+#include "ngraph_functions/builders.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+
+using namespace ngraph;
+using namespace opset1;
+
+namespace LayerTestsDefinitions {
+
+enum class modelType {
+    TranspConvTransp = 0,               /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) */
+    TranspConvBcastAddTransp,           /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Transpose(NCHW->NHWC) */
+    TranspConvBcastAddMaxPoolTransp,    /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPooling => Transpose(NCHW->NHWC) (2D Max Pool case) */
+    TranspConvBcastAddActTransp,        /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Activation Function => Transpose(NCHW->NHWC) */
+    TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPool => Activation Function => Transpose(NCHW->NHWC) */
+    TranspConvTranspBcastAdd,           /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) => Bias */
+    TranspConvTranspBcastAddAct         /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) => Bias => AF */
+};
+
+typedef std::tuple<
+    InferenceEngine::SizeVector,    // Kernel size
+    InferenceEngine::SizeVector,    // Strides
+    std::vector<ptrdiff_t>,         // Pad begin
+    std::vector<ptrdiff_t>,         // Pad end
+    InferenceEngine::SizeVector,    // Dilation
+    size_t,                         // Num out channels
+    op::PadType                     // Padding type
+> convSpecificParams;
+
+typedef std::tuple<
+    InferenceEngine::SizeVector,    // Bias
+    InferenceEngine::SizeVector,    // Transposed Bias
+    InferenceEngine::SizeVector,    // Maxpool pool
+    InferenceEngine::SizeVector     // Maxpool strides
+> miscSpecificParams;
+
+typedef std::tuple<
+    convSpecificParams,                 // Convolution parameters
+    miscSpecificParams,                 // Bias & Maxpool parameters
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    InferenceEngine::SizeVector,        // Input shapes
+    modelType                           // Test model
+> decompose2DConvParams;
+
+class Decompose2DConvTest : public testing::WithParamInterface<decompose2DConvParams>,
+    virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<decompose2DConvParams> obj) {
+        convSpecificParams convParams;
+        miscSpecificParams miscParams;
+        InferenceEngine::Precision netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        InferenceEngine::SizeVector inputShape;
+        modelType model;
+        std::tie(convParams, miscParams, netPrecision, targetDevice, configuration, inputShape, model) = obj.param;
+        op::PadType padType;
+        InferenceEngine::SizeVector kernel, stride, dilation, bias, transpBias, maxpoolPool, maxpoolStride;
+        std::vector<ptrdiff_t> padBegin, padEnd;
+        size_t numOutChannels;
+        std::tie(kernel, stride, padBegin, padEnd, dilation, numOutChannels, padType) = convParams;
+        std::tie(bias, transpBias, maxpoolPool, maxpoolStride) = miscParams;
+
+        std::ostringstream result;
+        result << "M=" << static_cast<uint32_t>(model) << "_";
+        result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
+        result << "K" << CommonTestUtils::vec2str(kernel) << "_";
+        result << "S" << CommonTestUtils::vec2str(stride) << "_";
+        result << "PB" << CommonTestUtils::vec2str(padBegin) << "_";
+        result << "PE" << CommonTestUtils::vec2str(padEnd) << "_";
+        result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
+        result << "O=" << numOutChannels << "_";
+        result << "AP=" << padType << "_";
+        result << "B=" << CommonTestUtils::vec2str(bias) << "_";
+        result << "B=" << CommonTestUtils::vec2str(transpBias) << "_";
+        result << "MPP=" << CommonTestUtils::vec2str(maxpoolPool) << "_";
+        result << "MPS=" << CommonTestUtils::vec2str(maxpoolStride) << "_";
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        for (auto const& configItem : configuration) {
+            result << "_configItem=" << configItem.first << "_" << configItem.second;
+        }
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        threshold = 0.015f;
+        convSpecificParams convParams;
+        miscSpecificParams miscParams;
+        InferenceEngine::Precision netPrecision;
+        std::vector<size_t> inputShape;
+        modelType model;
+        std::tie(convParams, miscParams, netPrecision, targetDevice, configuration, inputShape, model) = this->GetParam();
+        op::PadType padType;
+        InferenceEngine::SizeVector kernel, stride, dilation, bias, transpBias, maxpoolPool, maxpoolStride;
+        std::vector<ptrdiff_t> padBegin, padEnd;
+        size_t numOutChannels;
+        std::tie(kernel, stride, padBegin, padEnd, dilation, numOutChannels, padType) = convParams;
+        std::tie(bias, transpBias, maxpoolPool, maxpoolStride) = miscParams;
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        Shape biasShape{bias};
+        Shape transpBiasShape{transpBias};
+        Shape maxpoolShape{maxpoolPool};
+        Strides maxpoolStrides{maxpoolStride};
+
+        auto input = builder::makeParams(ngPrc, {inputShape});
+        auto transposeInOrder = opset7::Constant::create(element::i64, Shape{4}, {0, 3, 1, 2});
+        auto transposeIn = std::make_shared<Transpose>(input[0], transposeInOrder);
+        auto filterSize = std::accumulate(std::begin(kernel), std::end(kernel), 1ull, std::multiplies<size_t>());
+        auto filterWeights = CommonTestUtils::generate_float_numbers(numOutChannels * inputShape[3] * filterSize, -0.03f, 0.03f);
+        auto conv = builder::makeConvolution(transposeIn, ngPrc, kernel, stride, padBegin,
+            padEnd, dilation, padType, numOutChannels, false, filterWeights);
+        auto transposeOutOrder = opset7::Constant::create(element::i64, Shape{4}, {0, 2, 3, 1});
+        auto biasWeights = CommonTestUtils::generate_float_numbers(shape_size(biasShape), -1.5f, 1.5f);
+        Output<Node> biasConst = std::make_shared<Constant>(ngPrc, biasShape, biasWeights);
+        Output<Node> lastOp = std::make_shared<Transpose>(conv, transposeOutOrder);
+
+        switch (model) {
+        case modelType::TranspConvBcastAddTransp:
+        {
+            auto bias = std::make_shared<Add>(conv, biasConst);
+            lastOp = std::make_shared<Transpose>(bias, transposeOutOrder);
+        }
+        break;
+
+        case modelType::TranspConvBcastAddMaxPoolTransp:
+        {
+            auto bcastAdd = std::make_shared<Add>(conv, biasConst);
+            auto maxpool = std::make_shared<MaxPool>(bcastAdd, maxpoolStrides, Shape{0, 0}, Shape{0, 0}, maxpoolShape,
+                op::RoundingType::FLOOR, op::PadType::VALID);
+            auto transpose = std::make_shared<Transpose>(maxpool, transposeOutOrder);
+            auto lastOp = std::make_shared<Relu>(transpose);
+        }
+        break;
+
+        case modelType::TranspConvBcastAddActTransp:
+        {
+            auto bcastAdd = std::make_shared<Add>(conv, biasConst);
+            auto activation = std::make_shared<Sigmoid>(bcastAdd);
+            lastOp = std::make_shared<Transpose>(activation, transposeOutOrder);
+        }
+        break;
+
+        case modelType::TranspConvBcastAddMaxPoolActTransp:
+        {
+            auto bcastAdd = std::make_shared<Add>(conv, biasConst);
+            auto max_pool = std::make_shared<MaxPool>(bcastAdd, Strides{1, 1}, Shape{0, 0}, Shape{0, 0}, maxpoolShape,
+                op::RoundingType::FLOOR, op::PadType::VALID);
+            auto activation = std::make_shared<Relu>(max_pool);
+            lastOp = std::make_shared<Transpose>(activation, transposeOutOrder);
+        }
+        break;
+
+        case modelType::TranspConvTranspBcastAdd:
+        {
+            biasConst = std::make_shared<Constant>(ngPrc, transpBiasShape);
+            lastOp = std::make_shared<Add>(lastOp, biasConst);
+        }
+        break;
+
+        case modelType::TranspConvTranspBcastAddAct:
+        {
+            biasConst = builder::makeConstant(ngPrc, transpBiasShape, biasWeights, true);
+            auto bcastAdd = std::make_shared<Add>(lastOp, biasConst);
+            lastOp = std::make_shared<Relu>(bcastAdd);
+        }
+        break;
+
+        case modelType::TranspConvTransp:
+        default:
+            break;
+        }
+
+        auto result = std::make_shared<Result>(lastOp);
+        function = std::make_shared<Function>(ResultVector{result}, ParameterVector{input});
+    }
+};
+
+TEST_P(Decompose2DConvTest, CompareWithRefs) {
+    Run();
+}
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::map<std::string, std::string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+        {"GNA_SCALE_FACTOR_0", "1"},
+        {"GNA_EXEC_TARGET", "GNA_TARGET_2_0"}
+    }
+};
+
+const std::vector<op::PadType> padTypes = {
+        op::PadType::VALID,
+        op::PadType::EXPLICIT,
+        op::PadType::SAME_LOWER,
+        op::PadType::SAME_UPPER
+};
+
+const std::vector<modelType> models = {
+    modelType::TranspConvTransp,
+    modelType::TranspConvBcastAddTransp,
+    modelType::TranspConvBcastAddActTransp,
+    modelType::TranspConvTranspBcastAdd,
+    modelType::TranspConvTranspBcastAddAct,
+    modelType::TranspConvBcastAddMaxPoolTransp,
+    modelType::TranspConvBcastAddMaxPoolActTransp
+};
+
+const std::vector<std::vector<size_t>> input2DNHWC = {{1, 4, 4, 32}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 2}, {2, 1}, {2, 2}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{3, 1}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}, {1, 2}, {2, 1}, {2, 2}};
+const std::vector<size_t> numOutChannels2D = {4};
+const std::vector<std::vector<size_t >> biases2D = {{1, 4, 1, 1}};
+const std::vector<std::vector<size_t >> transp_biases2D = {{1, 1, 1, 4}};
+const std::vector<std::vector<size_t >> maxpool1D_pools = {{1, 2}};
+const std::vector<std::vector<size_t >> maxpool1D_strides = {{1, 1}};
+
+const auto conv2DParams = ::testing::Combine(
+    ::testing::ValuesIn(kernels2D),
+    ::testing::ValuesIn(strides2D),
+    ::testing::ValuesIn(padBegins2D),
+    ::testing::ValuesIn(padEnds2D),
+    ::testing::ValuesIn(dilations2D),
+    ::testing::ValuesIn(numOutChannels2D),
+    ::testing::ValuesIn(padTypes)
+);
+
+const auto miscParams = ::testing::Combine(
+    ::testing::ValuesIn(biases2D),
+    ::testing::ValuesIn(transp_biases2D),
+    ::testing::ValuesIn(maxpool1D_pools),
+    ::testing::ValuesIn(maxpool1D_strides)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_Decompose2DConv, Decompose2DConvTest,
+    ::testing::Combine(
+        conv2DParams,
+        miscParams,
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configs),
+        ::testing::ValuesIn(input2DNHWC),
+        ::testing::ValuesIn(models)),
+    Decompose2DConvTest::getTestCaseName);
+
+
+/* ============= Strides & Dilations Combination ============= */
+
+const std::vector<std::map<std::string, std::string>> configsStrides = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_FP32"},
+        {"GNA_SCALE_FACTOR_0", "1"},
+        {"GNA_EXEC_TARGET", "GNA_TARGET_2_0"}
+    }
+};
+
+const std::vector<op::PadType> padTypesStrides = {
+    op::PadType::VALID,
+};
+
+const std::vector<modelType> modelsStrides = {
+    modelType::TranspConvTransp,
+};
+
+const std::vector<std::vector<size_t>> input2DNHWCStrides = {{1, 8, 8, 32}};
+const std::vector<std::vector<size_t >> kernels2DStrides = {{1, 2}, {2, 1}, {2, 2}};
+const std::vector<std::vector<size_t >> strides2DStrides = {{1, 1}, {2, 1}, {1, 2}, {2, 2}};
+const std::vector<std::vector<size_t >> dilations2DStrides = {{1, 1}, {1, 2}, {2, 1}, {2, 2}};
+
+const auto conv2DParamsStrides = ::testing::Combine(
+    ::testing::ValuesIn(kernels2DStrides),
+    ::testing::ValuesIn(strides2DStrides),
+    ::testing::ValuesIn(padBegins2D),
+    ::testing::ValuesIn(padEnds2D),
+    ::testing::ValuesIn(dilations2DStrides),
+    ::testing::ValuesIn(numOutChannels2D),
+    ::testing::ValuesIn(padTypesStrides)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_Decompose2DConvStridesDilations, Decompose2DConvTest,
+    ::testing::Combine(
+        conv2DParamsStrides,
+        miscParams,
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configsStrides),
+        ::testing::ValuesIn(input2DNHWCStrides),
+        ::testing::ValuesIn(modelsStrides)),
+    Decompose2DConvTest::getTestCaseName);
+
+} // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/fq_outputs_activation_.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_outputs_activation_.cpp
new file mode 100644
index 00000000000..c684d166872
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_outputs_activation_.cpp
@@ -0,0 +1,139 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    std::vector<size_t>,                // Input Shape
+    std::pair<float, float>,            // Input Min and Max
+    size_t,                             // Levels
+    size_t                              // Outputs
+> fqOutputsActivationParams;
+
+namespace LayerTestsDefinitions {
+
+class FQOutputsActivation : public testing::WithParamInterface<fqOutputsActivationParams>,
+    public LayerTestsUtils::LayerTestsCommon {
+    float inputDataMin = 0.0f;
+    float inputDataMax = 0.0f;
+    float inputDataResolution = 1.0f;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<fqOutputsActivationParams> obj) {
+        InferenceEngine::Precision netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        std::vector<size_t> inputShape;
+        std::pair<float, float> inputMinMax;
+        size_t levels = 0;
+        size_t outputCount = 1;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels, outputCount) = obj.param;
+
+        std::ostringstream result;
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        for (auto const& configItem : configuration) {
+            result << "_configItem=" << configItem.first << "_" << configItem.second;
+        }
+        result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
+        result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
+        result << "_levels=" << levels;
+        result << "_outputs=" << outputCount;
+
+        return result.str();
+    }
+
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
+        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution);
+    }
+
+protected:
+    void SetUp() override {
+        InferenceEngine::Precision netPrecision;
+        std::vector<size_t> inputShape;
+        std::pair<float, float> inputMinMax;
+        size_t levels = 0;
+        size_t outputCount = 1;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels, outputCount) = this->GetParam();
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        auto inputLowNode = ngraph::builder::makeConstant<float>(ngPrc, { 1 }, { inputMinMax.first });
+        auto inputHighNode = ngraph::builder::makeConstant<float>(ngPrc, { 1 }, { inputMinMax.second });
+        auto inputVector = ngraph::builder::makeParams(ngPrc, { inputShape });
+        auto split = ngraph::builder::makeSplit(inputVector[0], ngPrc, outputCount, 1);
+
+        ngraph::ResultVector results;
+        for (size_t i = 0; i < outputCount; ++i) {
+            auto relu = ngraph::builder::makeActivation(split->output(i), ngraph::element::f32, ngraph::helpers::ActivationTypes::Sigmoid);
+            auto reluFQNode = std::make_shared<ngraph::opset8::FakeQuantize>(relu,
+                inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
+            results.push_back(std::make_shared<ngraph::opset8::Result>(reluFQNode));
+        }
+        function = std::make_shared<ngraph::Function>(results, inputVector, "FQOutputsActivation");
+    }
+};
+
+
+TEST_P(FQOutputsActivation, CompareWithRefImpl) {
+    Run();
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::map<std::string, std::string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+    }
+};
+
+const std::vector<std::vector<size_t>> inputShape = {
+    {1, 2048},
+};
+
+const std::vector<std::pair<float, float>> inputMinMax = {
+    {-0.5, 0.5},
+    {-16, 16},
+    {-100, 100},
+};
+
+const std::vector<size_t> levels = {
+    65535,
+};
+
+const std::vector<size_t> outputCount = {
+    1, 2, 4
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_fq_activation, FQOutputsActivation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configs),
+        ::testing::ValuesIn(inputShape),
+        ::testing::ValuesIn(inputMinMax),
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(outputCount)),
+    FQOutputsActivation::getTestCaseName);
+} // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/concat.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/concat.cpp
index 68107baee29..fa6fd4348f4 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/concat.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/concat.cpp
@@ -11,7 +11,7 @@ using namespace LayerTestsDefinitions;
 
 namespace {
 
-std::vector<size_t > axes = {1};
+std::vector<int> axes = {1};
 std::vector<std::vector<std::vector<size_t>>> inShapes = {
         {{10, 10, 10, 10}, {10, 10, 10, 10}},
         {{10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}},
@@ -34,4 +34,4 @@ INSTANTIATE_TEST_SUITE_P(smoke_NoReshape, ConcatLayerTest,
                                 ::testing::Values(CommonTestUtils::DEVICE_GNA)),
                         ConcatLayerTest::getTestCaseName);
 
-}  // namespace
\ No newline at end of file
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/fake_quantize.cpp
index 96a8319da61..a37572c82bb 100644
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/fake_quantize.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/fake_quantize.cpp
@@ -85,7 +85,7 @@ const auto fqParams = ::testing::Combine(
     ::testing::Values(ngraph::op::AutoBroadcastType::NUMPY)
 );
 
-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTestRevise,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTest,
     ::testing::Combine(
     fqParams,
     ::testing::ValuesIn(netPrecisions),
@@ -96,6 +96,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTestRevise,
     ::testing::ValuesIn(inputShapes),
     ::testing::Values(CommonTestUtils::DEVICE_GNA),
     ::testing::ValuesIn(gnaQuantModes)),
-    FakeQuantizeLayerTestRevise::getTestCaseName);
+    FakeQuantizeLayerTest::getTestCaseName);
 
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp b/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
index 83a4b3d5f5b..f2a447b30db 100644
--- a/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp
@@ -31,7 +31,7 @@ protected:
 };
 
 TEST_F(RemoteBlob_Test, smoke_canInputUserBlob) {
-#if defined(_WIN32) || defined(ANDROID)
+#if defined(ANDROID)
     GTEST_SKIP();
 #endif
     CNNNetwork net(fn_ptr);
@@ -39,7 +39,6 @@ TEST_F(RemoteBlob_Test, smoke_canInputUserBlob) {
     net.getInputsInfo().begin()->second->setLayout(Layout::NCHW);
     net.getInputsInfo().begin()->second->setPrecision(Precision::U8);
 
-    auto blob = FuncTestUtils::createAndFillBlob(net.getInputsInfo().begin()->second->getTensorDesc());
     // TODO: Issue: investigate issue with IECore
     auto ie = InferenceEngine::Core();
     auto exec_net = ie.LoadNetwork(net, CommonTestUtils::DEVICE_GPU);
@@ -86,9 +85,6 @@ TEST_F(RemoteBlob_Test, smoke_canInputUserBlob) {
 }
 
 TEST_F(RemoteBlob_Test, smoke_canInferOnUserContext) {
-#if defined _WIN32
-    GTEST_SKIP();
-#endif
     auto fn_ptr = ngraph::builder::subgraph::makeSplitMultiConvConcat();
     CNNNetwork net(fn_ptr);
 
@@ -142,7 +138,7 @@ protected:
 };
 
 TEST_P(BatchedBlob_Test, canInputNV12) {
-#if defined(_WIN32) || defined(ANDROID)
+#if defined(ANDROID)
     GTEST_SKIP();
 #endif
     const int height = 16;
@@ -271,19 +267,27 @@ const std::vector<size_t> num_batches{1, 2, 4};
 
 INSTANTIATE_TEST_SUITE_P(smoke_RemoteBlob, BatchedBlob_Test, ::testing::ValuesIn(num_batches), BatchedBlob_Test::getTestCaseName);
 
-class TwoNets_Test : public CommonTestUtils::TestsCommon, public testing::WithParamInterface<size_t> {
+using TwoNetsParams = std::tuple<size_t,   // number of streams
+                                 size_t>;  // number of requests
+
+class TwoNets_Test : public CommonTestUtils::TestsCommon,
+    public testing::WithParamInterface<TwoNetsParams> {
     void SetUp() override {
-        num_streams = this->GetParam();
+        std::tie(num_streams, num_requests) = this->GetParam();
         fn_ptrs = {ngraph::builder::subgraph::makeSplitMultiConvConcat(),
                    ngraph::builder::subgraph::makeMultiSingleConv()};
     };
 public:
-    static std::string getTestCaseName(const testing::TestParamInfo<std::size_t> &obj) {
-        return "num_streams_" + std::to_string(obj.param);
+    static std::string getTestCaseName(const testing::TestParamInfo<TwoNetsParams>& obj) {
+        size_t streams, requests;
+        std::tie(streams, requests) = obj.param;
+        return "_num_streams_" + std::to_string(streams) + "_num_req_" +
+            std::to_string(requests);
     }
 
 protected:
     size_t num_streams;
+    size_t num_requests;
     std::vector<std::shared_ptr<ngraph::Function>> fn_ptrs;
 };
 
@@ -309,7 +313,7 @@ TEST_P(TwoNets_Test, canInferTwoExecNets) {
         auto exec_net = ie.LoadNetwork(net, CommonTestUtils::DEVICE_GPU,
                                        {{PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, std::to_string(num_streams)}});
 
-        for (int j = 0; j < num_streams; j++) {
+        for (int j = 0; j < num_streams * num_requests; j++) {
             outputs.push_back(net.getOutputsInfo().begin()->first);
 
             auto inf_req = exec_net.CreateInferRequest();
@@ -355,6 +359,10 @@ TEST_P(TwoNets_Test, canInferTwoExecNets) {
     }
 }
 
-const std::vector<size_t> num_streams{1, 2};
+const std::vector<size_t> num_streams{ 1, 2 };
+const std::vector<size_t> num_requests{ 1, 4 };
 
-INSTANTIATE_TEST_SUITE_P(smoke_RemoteBlob, TwoNets_Test, ::testing::ValuesIn(num_streams), TwoNets_Test::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_RemoteBlob, TwoNets_Test,
+    ::testing::Combine(::testing::ValuesIn(num_streams),
+        ::testing::ValuesIn(num_requests)),
+    TwoNets_Test::getTestCaseName);
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp
index 919f2dd9388..7a2c8ec2b8f 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp
@@ -19,16 +19,16 @@ const std::vector<ngraph::element::Type> precisions = {
 std::vector<MatMulWithConstantTransformationTestValues> testValues = {
     {
         { 2, 3, 4 },
-        { 256ul, {{1, 1, 1}, {1, 1, 1}, {1, 3, 1}, {1, 3, 1}}, {0.f}, {255.f}, {0.f, 0.f, 0.f}, {255.f, 25.5f, 255.f} },
+        { 256ul, {{1, 3, 1}, {1, 3, 1}, {1, 3, 1}, {1, 3, 1}}, {0.f, 0.f, 0.f}, {255.f, 25.5f, 2.55f}, {0.f, 0.f, 0.f}, {255.f, 25.5f, 2.55f} },
         { std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
-        { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
+        { 256ul, {{2, 1}, {2, 1}, {2, 1}, {2, 1}}, {-128.f, -12.8f}, {127.f, 12.7f}, {-128.f, -12.8f}, {127.f, 12.7f} },
         { {}, {}, {} },
         "FullyConnected",
         "FP32"
     },
     {
         { 2, 3, 4 },
-        { 256ul, {{1, 1, 1}, {1, 1, 1}, {1, 3, 1}, {1, 3, 1}}, {0.f}, {255.f}, {0.f, 0.f, 0.f}, {255.f, 25.5f, 255.f} },
+        { 256ul, {{1, 3, 1}, {1, 3, 1}, {1, 3, 1}, {1, 3, 1}}, {0.f, 0.f, 0.f}, {255.f, 25.5f, 2.f}, {0.f, 0.f, 0.f}, {255.f, 25.5f, 2.f} },
         { std::vector<float>(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } },
         {},
         { ngraph::element::f32, {}, {0.1f} },
@@ -39,23 +39,23 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
         { 1, 3, 4 },
         { 256ul, {{1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}, {-10.5f}, {4.5f}, {-10.5f}, {4.5f} },
         { std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
-        { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
+        { 256ul, {{2, 1}, {2, 1}, {2, 1}, {2, 1}}, {-128.f, -12.8f}, {127.f, 12.7f}, {-128.f, -12.8f}, {127.f, 12.7f} },
         { {}, {}, {} },
         "FullyConnected",
         "FP32"
     },
     {
         { 1, 1, 3, 4 },
-        { 256ul, {{1, 1, 1}, {1, 1, 1}, {1, 3, 1}, {1, 3, 1}}, {0.f}, {255.f}, {0.f, 0.f, 0.f}, {255.f, 25.5f, 255.f} },
+        { 256ul, {{1, 3, 1}, {1, 3, 1}, {1, 3, 1}, {1, 3, 1}}, {0.f, 0.f, 0.f}, {25.f, 24.f, 25.f}, {0.f, 0.f, 0.f}, {25.f, 24.f, 25.f} },
         { std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
-        { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
+        { 256ul, {{2, 1}, {2, 1}, {2, 1}, {2, 1}}, {-128.f, -12.8f}, {127.f, 12.7f}, {-128.f, -12.8f}, {127.f, 12.7f} },
         { {}, {}, {} },
         "FullyConnected",
         "U8"
     },
     {
         { 1, 1, 3, 4 },
-        { 256ul, {{1, 1, 1}, {1, 1, 1}, {1, 3, 1}, {1, 3, 1}}, {0.f}, {255.f}, {0.f, 0.f, 0.f}, {255.f, 25.5f, 255.f} },
+        { 256ul, {{1, 3, 1}, {1, 3, 1}, {1, 3, 1}, {1, 3, 1}}, {0.f, 0.f, 0.f}, {25.f, 24.f, 25.f}, {0.f, 0.f, 0.f}, {25.f, 24.f, 25.f} },
         { std::vector<float>(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } },
         {},
         { ngraph::element::f32, {}, {{0.1f, 0.01}, ngraph::element::f32, ngraph::Shape{ 2, 1 }} },
@@ -73,7 +73,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
     },
     {
         { 2, 3 },
-        { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-10.f}, {5.f}, {-10.f, -5.f}, {5.f, 5.f} },
+        { 256ul, {{2, 1}, {2, 1}, {2, 1}, {2, 1}}, {-10.f, -5.f}, {5.f, 5.f}, {-10.f, -5.f}, {5.f, 5.f} },
         { std::vector<float>{1, 2, 3, 4, 5, 6}, ngraph::element::f32, ngraph::Shape{ 2, 3 } },
         { 256ul, {{1}, {1}, {1}, {1}}, {-128.f}, {127.f}, {-12.8f}, {12.7f} },
         { {}, {}, {} },
@@ -82,7 +82,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
     },
     {
         { 2, 3 },
-        { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-10.f}, {5.f}, {-10.f, -5.f}, {5.f, 5.f} },
+        { 256ul, {{2, 1}, {2, 1}, {2, 1}, {2, 1}}, {-10.f, -5.f}, {5.f, 5.f}, {-10.f, -5.f}, {5.f, 5.f} },
         { std::vector<float>{1, 2, 3, 4, 5, 6}, ngraph::element::i8, ngraph::Shape{ 2, 3 } },
         {},
         { ngraph::element::f32, {}, {0.1f} },
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/reshape_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/reshape_transformation.cpp
index b74f1d2769e..2621dec4dba 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/reshape_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/reshape_transformation.cpp
@@ -17,8 +17,6 @@ const std::vector<ngraph::element::Type> netPrecisions = {
 
 const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
     LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams(),
-    // LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false),
-    // LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8()
 };
 
 const std::vector<ReshapeTransformationParam> params = {
@@ -27,29 +25,87 @@ const std::vector<ReshapeTransformationParam> params = {
         { 1, 3, 32 },
         { 1, 3, 4, 8 },
         { 256ul, ngraph::Shape{ 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
-        true
+        "Reshape",
+        "U8"
+    },
+    // 3D -> 1D
+    {
+        { 1, 3, 32 },
+        { -1 },
+        { 256ul, ngraph::Shape{}, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
+        "Reshape",
+        "FP32"
     },
     // 4D -> 3D
     {
         { 1, 3, 16, 16 },
         { 1, 3, 256 },
         { 256ul, ngraph::Shape{ 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
-        true
+        "Reshape",
+        "U8"
     },
     // 4D -> 3D
     {
         { 1, 3, 16, 16 },
         { 0, 3, -1 },
         { 256ul, ngraph::Shape{ 1, 3, 1, 1 }, { 0.f }, { 255.f }, { 0.f, 0.f, 0.f }, { 255.f, 25.5f, 2.55f } },
-        true
+        "Reshape",
+        "U8"
     },
     // 4D -> 2D
     {
         { 1, 3, 4, 8 },
         { 1, -1 },
         { 256ul, ngraph::Shape{ 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } },
-        true
+        "Reshape",
+        "U8"
     },
+    // 4D -> 2D
+    {
+        { 1, 3, 4, 8 },
+        { 1, -1 },
+        {
+            256ul,
+            ngraph::Shape{ 1, 3, 1, 1 },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f / 2.f, 255.f / 3.f },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f / 2.f, 255.f / 3.f },
+        },
+        "Reshape",
+        "U8"
+    },
+    // 4D -> 3D
+    {
+        { 1, 3, 4, 8 },
+        { 1, 3, -1 },
+        {
+            256ul,
+            ngraph::Shape{ 1, 3, 1, 1 },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f / 2.f, 255.f / 3.f },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f / 2.f, 255.f / 3.f },
+        },
+        "Reshape",
+        "U8"
+    },
+    // per-channel
+    // 4D -> 3D
+    {
+        { 1, 3, 4, 8 },
+        { 1, -1, 8 },
+        {
+            256ul,
+            ngraph::Shape{ 1, 3, 1, 1 },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f / 2.f, 255.f / 3.f },
+            { 0.f, 0.f, 0.f },
+            { 255.f, 255.f / 2.f, 255.f / 3.f },
+        },
+        "Reshape",
+        "U8"
+    }
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_LPT, ReshapeTransformation,
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/concat.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/concat.cpp
index a57887e900f..f1231de1f71 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/concat.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/concat.cpp
@@ -11,7 +11,7 @@ using namespace LayerTestsDefinitions;
 
 namespace {
 
-std::vector<size_t > axes = {0, 1, 2, 3};
+std::vector<int> axes = {-3, -2, -1, 0, 1, 2, 3};
 std::vector<std::vector<std::vector<size_t>>> inShapes = {
         {{10, 10, 10, 10}},
         {{10, 10, 10, 10}, {10, 10, 10, 10}},
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
index 50eba79b54a..d92cf87185a 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
@@ -33,7 +33,7 @@ const auto fqParams = ::testing::Combine(
         ::testing::Values(ngraph::op::AutoBroadcastType::NUMPY)
 );
 
-INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTestRevise,
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTest,
                         ::testing::Combine(
                                 fqParams,
                                 ::testing::ValuesIn(netPrecisions),
@@ -44,6 +44,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantize, FakeQuantizeLayerTestRevise,
                                 ::testing::ValuesIn(inputShapes),
                                 ::testing::Values(CommonTestUtils::DEVICE_GPU),
                                 ::testing::Values(config)),
-                        FakeQuantizeLayerTestRevise::getTestCaseName);
+                        FakeQuantizeLayerTest::getTestCaseName);
 
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gather_elements.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gather_elements.cpp
new file mode 100644
index 00000000000..cbc4e9fed4f
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gather_elements.cpp
@@ -0,0 +1,227 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <ngraph/opsets/opset6.hpp>
+
+#include "single_layer_tests/gather_elements.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace ngraph::opset6;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::I32,
+};
+
+const std::vector<InferenceEngine::Precision> idxPrecisions = {
+        InferenceEngine::Precision::I32,
+        InferenceEngine::Precision::I64,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_set1, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>({2, 2})),
+        ::testing::Values(std::vector<size_t>({2, 2})),
+        ::testing::ValuesIn(std::vector<int>({-1, 0, 1})),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_set2, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>({2, 2, 1})),
+        ::testing::Values(std::vector<size_t>({4, 2, 1})),
+        ::testing::ValuesIn(std::vector<int>({0, -3})),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_set3, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>({2, 2, 3, 5})),
+        ::testing::Values(std::vector<size_t>({2, 2, 3, 7})),
+        ::testing::Values(3, -1),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_set4, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>({3, 2, 3, 8})),
+        ::testing::Values(std::vector<size_t>({2, 2, 3, 8})),
+        ::testing::Values(0, -4),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_set5, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>({3, 2, 3, 4, 8})),
+        ::testing::Values(std::vector<size_t>({3, 2, 3, 5, 8})),
+        ::testing::Values(3, -2),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank4axis0, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{7, 7, 8, 4}),
+        ::testing::Values(std::vector<size_t>{2, 7, 8, 4}),
+        ::testing::Values(0),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank4axis1, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{6, 1, 8, 4}),
+        ::testing::Values(std::vector<size_t>{6, 8, 8, 4}),
+        ::testing::Values(1, -3),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank4axis2, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{6, 7, 4, 4}),
+        ::testing::Values(std::vector<size_t>{6, 7, 2, 4}),
+        ::testing::Values(2, -2),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank4axis3, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{6, 5, 8, 7}),
+        ::testing::Values(std::vector<size_t>{6, 5, 8, 7}),
+        ::testing::Values(3, -1),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank5axis0, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{2, 3, 9, 4, 9}),
+        ::testing::Values(std::vector<size_t>{1, 3, 9, 4, 9}),
+        ::testing::Values(0),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank5axis1, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{2, 3, 5, 4, 7}),
+        ::testing::Values(std::vector<size_t>{2, 9, 5, 4, 7}),
+        ::testing::Values(1, -4),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank5axis2, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{1, 2, 6, 8, 9}),
+        ::testing::Values(std::vector<size_t>{1, 2, 6, 8, 9}),
+        ::testing::Values(2, -3),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank5axis3, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{2, 2, 4, 7, 7}),
+        ::testing::Values(std::vector<size_t>{2, 2, 4, 3, 7}),
+        ::testing::Values(3, -2),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank5axis4, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{1, 3, 9, 3, 2}),
+        ::testing::Values(std::vector<size_t>{1, 3, 9, 3, 9}),
+        ::testing::Values(4, -1),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank6axis0, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{3, 3, 2, 4, 4, 3}),
+        ::testing::Values(std::vector<size_t>{7, 3, 2, 4, 4, 3}),
+        ::testing::Values(0),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank6axis1, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{1, 6, 2, 3, 5, 9}),
+        ::testing::Values(std::vector<size_t>{1, 6, 2, 3, 5, 9}),
+        ::testing::Values(1, -5),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank6axis2, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{2, 3, 9, 7, 2, 1}),
+        ::testing::Values(std::vector<size_t>{2, 3, 5, 7, 2, 1}),
+        ::testing::Values(2, -4),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank6axis3, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{1, 3, 4, 5, 1, 3}),
+        ::testing::Values(std::vector<size_t>{1, 3, 4, 4, 1, 3}),
+        ::testing::Values(3, -3),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank6axis4, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{1, 3, 2, 4, 3, 3}),
+        ::testing::Values(std::vector<size_t>{1, 3, 2, 4, 6, 3}),
+        ::testing::Values(4, -2),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GatherElements_rank6axis5, GatherElementsLayerTest,
+    ::testing::Combine(
+        ::testing::Values(std::vector<size_t>{2, 1, 7, 8, 1, 6}),
+        ::testing::Values(std::vector<size_t>{2, 1, 7, 8, 1, 5}),
+        ::testing::Values(5, -1),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+    GatherElementsLayerTest::getTestCaseName);
+
+}  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/config.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/config.cpp
index 69d84a1e4aa..490db81c352 100644
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/config.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/config.cpp
@@ -259,6 +259,21 @@ INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, CorrectConfigTests,
         ::testing::ValuesIn(getCorrectMultiConfigs())),
     CorrectConfigTests::getTestCaseName);
 
+const std::vector<std::map<std::string, std::string>>& getCorrectAutoConfigs() {
+    static const std::vector<std::map<std::string, std::string>> correctAutoConfigs = {
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_MYRIAD}, {InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, "YES"}},
+            {{InferenceEngine::KEY_AUTO_DEVICE_LIST , CommonTestUtils::DEVICE_MYRIAD}, {std::string("AUTO_"), "NAN"}}
+    };
+    return correctAutoConfigs;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, CorrectConfigTests,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(getPrecisions()),
+                                 ::testing::Values(CommonTestUtils::DEVICE_AUTO),
+                                 ::testing::ValuesIn(getCorrectAutoConfigs())),
+                         CorrectConfigTests::getTestCaseName);
+
 const std::vector<std::pair<std::string, InferenceEngine::Parameter>>& getDefaultEntries() {
     static const std::vector<std::pair<std::string, InferenceEngine::Parameter>> defaultEntries = {
         {KEY_LOG_LEVEL, {LOG_NONE}},
@@ -915,6 +930,44 @@ const std::vector<std::map<std::string, std::string>>& getIncorrectMultiConfigs(
     return incorrectMultiConfigs;
 }
 
+const std::vector<std::map<std::string, std::string>>& getIncorrectAutoConfigs() {
+    static const std::vector<std::map<std::string, std::string>> incorrectAutoConfigs = {
+        {
+                {InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_AUTO},
+                {KEY_LOG_LEVEL, "INCORRECT_LOG_LEVEL"},
+        },
+        {
+                {InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_AUTO},
+                {InferenceEngine::MYRIAD_PROTOCOL, "BLUETOOTH"}
+        },
+        {
+                {InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_AUTO},
+                {InferenceEngine::MYRIAD_ENABLE_HW_ACCELERATION, "ON"}
+        },
+        {
+                {InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_AUTO},
+                {InferenceEngine::MYRIAD_ENABLE_RECEIVING_TENSOR_TIME, "ON"}
+        },
+        {
+                {InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_AUTO},
+                {KEY_PERF_COUNT, "ON"}
+        },
+        {
+                {InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_AUTO},
+                {InferenceEngine::MYRIAD_THROUGHPUT_STREAMS, "ONE"}
+        },
+        {
+                {InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_AUTO},
+                {KEY_EXCLUSIVE_ASYNC_REQUESTS, "ON"}
+        },
+        {
+                {InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_AUTO},
+                {InferenceEngine::MYRIAD_DDR_TYPE, "1GB"}
+        },
+    };
+    return incorrectAutoConfigs;
+}
+
 INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, IncorrectConfigTests,
     ::testing::Combine(
         ::testing::ValuesIn(getPrecisions()),
@@ -922,6 +975,13 @@ INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, IncorrectConfigTests,
         ::testing::ValuesIn(getIncorrectMultiConfigs())),
     IncorrectConfigTests::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, IncorrectConfigTests,
+     ::testing::Combine(
+        ::testing::ValuesIn(getPrecisions()),
+        ::testing::Values(CommonTestUtils::DEVICE_AUTO),
+        ::testing::ValuesIn(getIncorrectAutoConfigs())),
+     IncorrectConfigTests::getTestCaseName);
+
 INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, IncorrectConfigSingleOptionTests,
     ::testing::Combine(
         ::testing::ValuesIn(getPrecisions()),
@@ -956,4 +1016,10 @@ INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, IncorrectConfigAPITests,
         ::testing::ValuesIn(getIncorrectMultiConfigs())),
     IncorrectConfigAPITests::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, IncorrectConfigAPITests,
+     ::testing::Combine(
+        ::testing::ValuesIn(getPrecisions()),
+        ::testing::Values(CommonTestUtils::DEVICE_AUTO),
+        ::testing::ValuesIn(getIncorrectAutoConfigs())),
+     IncorrectConfigAPITests::getTestCaseName);
 } // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/infer_request.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/infer_request.cpp
index 5e515c8b562..341a320ec3d 100644
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/infer_request.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/infer_request.cpp
@@ -17,6 +17,11 @@ const std::vector<std::map<std::string, std::string>> configs = {
         {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_MYRIAD}}
 };
 
+const std::vector<std::map<std::string, std::string>> autoconfigs = {
+        {{InferenceEngine::KEY_AUTO_DEVICE_LIST, CommonTestUtils::DEVICE_MYRIAD}},
+        {{InferenceEngine::KEY_AUTO_DEVICE_LIST , std::string(CommonTestUtils::DEVICE_CPU) + "," + CommonTestUtils::DEVICE_MYRIAD}}
+};
+
 INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, InferRequestTests,
         ::testing::Combine(
                 ::testing::ValuesIn(netPrecisions),
@@ -30,4 +35,11 @@ INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, InferRequestTests,
                 ::testing::Values(CommonTestUtils::DEVICE_MULTI),
                 ::testing::ValuesIn(configs)),
         InferRequestTests::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, InferRequestTests,
+        ::testing::Combine(
+                ::testing::ValuesIn(netPrecisions),
+                ::testing::Values(CommonTestUtils::DEVICE_AUTO),
+                ::testing::ValuesIn(autoconfigs)),
+        InferRequestTests::getTestCaseName);
 }  // namespace
diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp
index 228c892fa3a..5fd53290687 100644
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/concat.cpp
@@ -10,7 +10,7 @@
 using namespace LayerTestsDefinitions;
 
 namespace {
-std::vector<size_t> axes = {0, 1, 2, 3};
+std::vector<int> axes = {0, 1, 2, 3};
 std::vector<std::vector<std::vector<size_t>>> inShapes = {
         {{10, 10, 10, 10}, {10, 10, 10, 10}},
         {{10, 10, 10, 10}, {10, 10, 10, 10}, {10, 10, 10, 10}},
diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/reshape_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/reshape_transformation.hpp
index 29175cf77ee..9275538b8cf 100644
--- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/reshape_transformation.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/reshape_transformation.hpp
@@ -17,7 +17,8 @@ public:
     ngraph::PartialShape inputShape;
     std::vector<int> reshapeConstValues;
     ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize;
-    bool isTransformed;
+    std::string layerType;
+    std::string expectedKernelType;
 };
 
 typedef std::tuple<
@@ -35,6 +36,7 @@ public:
 
 protected:
     void SetUp() override;
+    void Run() override;
 };
 
 }  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/adaptive_pooling.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/adaptive_pooling.hpp
new file mode 100644
index 00000000000..c81fa49599b
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/adaptive_pooling.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/single_layer/adaptive_pooling.hpp"
+
+namespace LayerTestsDefinitions {
+
+TEST_P(AdaPoolLayerTest, CompareWithRefs) {
+Run();
+}
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp
index 0172b8c903d..e140257c799 100644
--- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp
@@ -8,7 +8,7 @@
 
 namespace LayerTestsDefinitions {
 
-TEST_P(FakeQuantizeLayerTestRevise, CompareWithRefs) {
+TEST_P(FakeQuantizeLayerTest, CompareWithRefs) {
     Run();
     SKIP_IF_CURRENT_TEST_IS_DISABLED();
 
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gather_elements.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gather_elements.hpp
new file mode 100644
index 00000000000..eea88d4abf3
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/gather_elements.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/single_layer/gather_elements.hpp"
+
+namespace LayerTestsDefinitions {
+
+TEST_P(GatherElementsLayerTest, CompareWithRefs) {
+    Run();
+}
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/matrix_nms.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/matrix_nms.hpp
new file mode 100644
index 00000000000..21e89bf0474
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/matrix_nms.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/single_layer/matrix_nms.hpp"
+
+namespace LayerTestsDefinitions {
+
+TEST_P(MatrixNmsLayerTest, CompareWithRefs) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/multiclass_nms.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/multiclass_nms.hpp
new file mode 100644
index 00000000000..e89ba2d126c
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/multiclass_nms.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/single_layer/multiclass_nms.hpp"
+
+namespace LayerTestsDefinitions {
+
+TEST_P(MulticlassNmsLayerTest, CompareWithRefs) {
+    Run();
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/reshape_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/reshape_transformation.cpp
index 2d5141c6800..1c227ce27a3 100644
--- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/reshape_transformation.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/reshape_transformation.cpp
@@ -6,15 +6,11 @@
 
 #include <memory>
 #include <tuple>
-#include <vector>
-#include <string>
 #include <ie_core.hpp>
 
-#include "ngraph_functions/builders.hpp"
 #include <transformations/init_node_info.hpp>
 #include "lpt_ngraph_functions/reshape_function.hpp"
 
-
 namespace LayerTestsDefinitions {
 
 std::string ReshapeTransformation::getTestCaseName(testing::TestParamInfo<ReshapeTransformationParams> obj) {
@@ -50,6 +46,18 @@ void ReshapeTransformation::SetUp() {
         param.fakeQuantize);
 }
 
+void ReshapeTransformation::Run() {
+    LayerTestsCommon::Run();
+
+    const auto params = std::get<3>(GetParam());
+    auto actualPrecision = getRuntimePrecisionByType(params.layerType);
+    const auto expectedPrecision = params.expectedKernelType;
+    if ((expectedPrecision == "FP32") && (actualPrecision == "FP16")) {
+        actualPrecision = "FP32";
+    }
+    EXPECT_EQ(actualPrecision, expectedPrecision);
+}
+
 TEST_P(ReshapeTransformation, CompareWithRefImpl) {
     Run();
 };
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp
index 72172e69924..bde6ba57578 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp
@@ -83,6 +83,10 @@ public:
     std::string getRuntimePrecision(const std::string& layerName);
     std::string getRuntimePrecisionByType(const std::string& layerType);
 
+#ifndef NDEBUG
+    void showRuntimePrecisions();
+#endif
+
     template<class T_IE, class T_NGRAPH>
     static void Compare(const T_NGRAPH *expected, const T_IE *actual, std::size_t size, float threshold) {
         for (std::size_t i = 0; i < size; ++i) {
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/adaptive_pooling.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/adaptive_pooling.hpp
new file mode 100644
index 00000000000..fcbb8d0ebcf
--- /dev/null
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/adaptive_pooling.hpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+
+namespace LayerTestsDefinitions {
+using adapoolParams = std::tuple<
+        std::vector<size_t>,                // feature map shape
+        std::vector<int>,                   // pooled spatial shape
+        std::string,                        // pooling mode
+        InferenceEngine::Precision,         // net precision
+        LayerTestsUtils::TargetDevice>;     // device name
+
+class AdaPoolLayerTest : public testing::WithParamInterface<adapoolParams>,
+                         virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<adapoolParams> obj);
+
+protected:
+    void SetUp() override;
+};
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/concat.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/concat.hpp
index bdc6cb44de0..3485562acac 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/concat.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/concat.hpp
@@ -16,8 +16,7 @@
 namespace LayerTestsDefinitions {
 
 using concatParamsTuple = typename std::tuple<
-        //TODO: according to specification axis have to be int, negative values are allowed
-        size_t,                            // Concat axis
+        int,                               // Concat axis
         std::vector<std::vector<size_t>>,  // Input shapes
         InferenceEngine::Precision,        // Network precision
         InferenceEngine::Precision,        // Input precision
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/fake_quantize.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/fake_quantize.hpp
index d4811fcdce0..5ed65139cf4 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/fake_quantize.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/fake_quantize.hpp
@@ -28,10 +28,11 @@ namespace LayerTestsDefinitions {
 
 
 typedef std::tuple<
-        size_t,              // levels
-        std::vector<size_t>, // const inputs shape
-        std::vector<float>,  // fake quantize inputLow, inputHigh, outputLow, outputHigh or empty for random
-        std::vector<float>   // input generator data: low, high, resolution
+        size_t,                         // fake quantize levels
+        std::vector<size_t>,            // fake quantize inputs shape
+        std::vector<float>,             // fake quantize (inputLow, inputHigh, outputLow, outputHigh) or empty for random
+        std::vector<float>,             // input generator data (low, high, resolution) or empty for default
+        ngraph::op::AutoBroadcastSpec   // fake quantize broadcast mode
 > fqSpecificParams;
 typedef std::tuple<
         fqSpecificParams,
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp
new file mode 100644
index 00000000000..9be3b082c3b
--- /dev/null
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/matrix_nms.hpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <string>
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace LayerTestsDefinitions {
+
+using InputShapeParams = std::tuple<size_t,  // Number of batches
+                                    size_t,  // Number of boxes
+                                    size_t>; // Number of classes
+
+using InputPrecisions = std::tuple<InferenceEngine::Precision,  // boxes and scores precisions
+                                   InferenceEngine::Precision,  // max_output_boxes_per_class precision
+                                   InferenceEngine::Precision>; // iou_threshold, score_threshold, soft_nms_sigma precisions
+
+using TopKParams = std::tuple<int,      // Maximum number of boxes to be selected per class
+                              int>;     // Maximum number of boxes to be selected per batch element
+
+using ThresholdParams = std::tuple<float,   // minimum score to consider box for the processing
+                                   float,   // gaussian_sigma parameter for gaussian decay_function
+                                   float>;  // filter out boxes with low confidence score after decaying
+
+using NmsParams = std::tuple<InputShapeParams,                                   // Params using to create 1st and 2nd inputs
+                             InputPrecisions,                                    // Input precisions
+                             ngraph::op::v8::MatrixNms::SortResultType,          // Order of output elements
+                             ngraph::element::Type,                              // Output type
+                             TopKParams,                                         // Maximum number of boxes topk params
+                             ThresholdParams,                                    // Thresholds: score_threshold, gaussian_sigma, post_threshold
+                             int,                                                // Background class id
+                             bool,                                               // If boxes are normalized
+                             ngraph::op::v8::MatrixNms::DecayFunction,           // Decay function
+                             std::string>;                                       // Device name
+
+class MatrixNmsLayerTest : public testing::WithParamInterface<NmsParams>, virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<NmsParams> obj);
+    void GenerateInputs() override;
+    void Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> &expectedOutputs,
+                 const std::vector<InferenceEngine::Blob::Ptr> &actualOutputs)
+    override;
+
+protected:
+    void SetUp() override;
+
+private:
+    size_t numBatches, numBoxes, numClasses;
+    size_t maxOutputBoxesPerClass;
+    size_t maxOutputBoxesPerBatch;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp
new file mode 100644
index 00000000000..4add46d8ce1
--- /dev/null
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/multiclass_nms.hpp
@@ -0,0 +1,59 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <tuple>
+
+#include "ngraph_functions/builders.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+
+namespace LayerTestsDefinitions {
+
+using InputShapeParams = std::tuple<size_t,   // Number of batches
+                                    size_t,   // Number of boxes
+                                    size_t>;  // Number of classes
+
+using InputPrecisions = std::tuple<InferenceEngine::Precision,   // boxes and scores precisions
+                                   InferenceEngine::Precision,   // max_output_boxes_per_class
+                                                                 // precision
+                                   InferenceEngine::Precision>;  // iou_threshold, score_threshold,
+                                                                 // soft_nms_sigma precisions
+
+using InputfloatVar = std::tuple<float,   // iouThreshold
+                                 float,   // scoreThreshold
+                                 float>;  // nmsEta
+
+using InputboolVar = std::tuple<bool,   // nmsEta
+                                bool>;  // normalized
+
+using MulticlassNmsParams = std::tuple<InputShapeParams,                           // Params using to create 1st and 2nd inputs
+                                       InputPrecisions,                            // Input precisions
+                                       int32_t,                                    // Max output boxes per class
+                                       InputfloatVar,                              // iouThreshold, scoreThreshold, nmsEta
+                                       int32_t,                                    // background_class
+                                       int32_t,                                    // keep_top_k
+                                       ngraph::element::Type,                      // Output type
+                                       ngraph::op::util::NmsBase::SortResultType,  // SortResultType
+                                       InputboolVar,                               // Sort result across batch, normalized
+                                       std::string>;
+
+class MulticlassNmsLayerTest : public testing::WithParamInterface<MulticlassNmsParams>, virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<MulticlassNmsParams> obj);
+    void GenerateInputs() override;
+    void Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>& expectedOutputs,
+                 const std::vector<InferenceEngine::Blob::Ptr>& actualOutputs) override;
+
+protected:
+    void SetUp() override;
+
+private:
+    size_t numBatches, numBoxes, numClasses;
+    size_t maxOutputBoxesPerClass;
+    size_t maxOutputBoxesPerBatch;
+};
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
index 056826aff86..3c1639b978f 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
@@ -474,6 +474,24 @@ std::string LayerTestsCommon::getRuntimePrecisionByType(const std::string& layer
     return "";
 }
 
+#ifndef NDEBUG
+void LayerTestsCommon::showRuntimePrecisions() {
+    const auto execGraph = executableNetwork.GetExecGraphInfo();
+    const auto function = execGraph.getFunction();
+
+    for (const auto& op : function->get_ops()) {
+        const auto& rtInfo = op->get_rt_info();
+        const auto& typeIt = rtInfo.find("layerType");
+
+        const auto type = ngraph::as_type_ptr<ngraph::VariantWrapper<std::string>>(typeIt->second)->get();
+        const auto& it = rtInfo.find("runtimePrecision");
+
+        const auto rtPrecisionPtr = ngraph::as_type_ptr<ngraph::VariantWrapper<std::string>>(it->second);
+        std::cout << type << ": " << rtPrecisionPtr->get() << std::endl;
+    }
+}
+#endif
+
 void LayerTestsCommon::SetRefMode(RefMode mode) {
     refMode = mode;
 }
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/adaptive_pooling.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/adaptive_pooling.cpp
new file mode 100644
index 00000000000..4cf40860130
--- /dev/null
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/adaptive_pooling.cpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset8.hpp>
+
+#include "ngraph_functions/builders.hpp"
+#include "shared_test_classes/single_layer/adaptive_pooling.hpp"
+
+using namespace InferenceEngine;
+using namespace FuncTestUtils::PrecisionUtils;
+
+namespace LayerTestsDefinitions {
+
+std::string AdaPoolLayerTest::getTestCaseName(testing::TestParamInfo<adapoolParams> obj) {
+    std::vector<size_t> inputShape;
+    std::vector<int> pooledSpatialShape;
+
+    std::string poolingMode;
+    InferenceEngine::Precision netPrecision;
+    std::string targetDevice;
+    std::tie(inputShape, pooledSpatialShape, poolingMode, netPrecision, targetDevice) = obj.param;
+
+    std::ostringstream result;
+
+    result << "in_shape=" << CommonTestUtils::vec2str(inputShape) << "_";
+    result << "pooled_spatial_shape=" << CommonTestUtils::vec2str(pooledSpatialShape) << "_";
+    result << "mode=" << poolingMode << "_";
+    result << "prec=" << netPrecision.name() << "_";
+    result << "dev=" << targetDevice;
+    return result.str();
+}
+
+void AdaPoolLayerTest::SetUp() {
+    std::vector<size_t> inputShape;
+    std::vector<int> pooledSpatialShape;
+    std::string poolingMode;
+    InferenceEngine::Precision netPrecision;
+    std::tie(inputShape, pooledSpatialShape, poolingMode, netPrecision, targetDevice) = this->GetParam();
+
+    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+    auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
+
+    ngraph::Shape pooledShape = {pooledSpatialShape.size() };
+    auto pooledParam = ngraph::builder::makeConstant<int32_t>(ngraph::element::i32, pooledShape, pooledSpatialShape);
+
+    // we cannot create abstract Op to use polymorphism
+    auto adapoolMax = std::make_shared<ngraph::opset8::AdaptiveMaxPool>(params[0], pooledParam, ngraph::element::i32);
+    auto adapoolAvg = std::make_shared<ngraph::opset8::AdaptiveAvgPool>(params[0], pooledParam);
+
+    function = (poolingMode == "max" ? std::make_shared<ngraph::Function>(adapoolMax->outputs(), params, "AdaPoolMax") :
+                std::make_shared<ngraph::Function>(adapoolAvg->outputs(), params, "AdaPoolAvg"));
+}
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/fake_quantize.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/fake_quantize.cpp
index f4f100b00ad..ed0731ccd33 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/fake_quantize.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/fake_quantize.cpp
@@ -20,7 +20,8 @@ std::string FakeQuantizeLayerTest::getTestCaseName(const testing::TestParamInfo<
     std::vector<size_t> constShape;
     std::vector<float> fqDirectArgs;
     std::vector<float> inputArg;
-    std::tie(levels, constShape, fqDirectArgs, inputArg) = fqParams;
+    ngraph::op::AutoBroadcastSpec broadcast;
+    std::tie(levels, constShape, fqDirectArgs, inputArg, broadcast) = fqParams;
 
     std::ostringstream result;
     result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
@@ -41,6 +42,7 @@ std::string FakeQuantizeLayerTest::getTestCaseName(const testing::TestParamInfo<
     if (inputArg.size() == 3) {
         result << "_inputArg=" << inputArg[0] << "_" << inputArg[1] << "_" << inputArg[2];
     }
+    result << "_" << broadcast.m_type;
     return result.str();
 }
 
@@ -55,7 +57,8 @@ void FakeQuantizeLayerTest::SetUp() {
     std::vector<size_t> constShape;
     std::vector<float> fqDirectArg;
     std::vector<float> inputArg;
-    std::tie(levels, constShape, fqDirectArg, inputArg) = fqParams;
+    ngraph::op::AutoBroadcastSpec broadcast;
+    std::tie(levels, constShape, fqDirectArg, inputArg, broadcast) = fqParams;
     if (inputArg.size() == 3) {
         inputDataMin = inputArg[0];
         inputDataMax = inputArg[1];
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/gather_elements.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/gather_elements.cpp
index af5832302aa..d559e04a53d 100644
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/gather_elements.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/gather_elements.cpp
@@ -48,7 +48,4 @@ void GatherElementsLayerTest::SetUp() {
     function = std::make_shared<ngraph::Function>(results, params, "gatherEl");
 }
 
-TEST_P(GatherElementsLayerTest, CompareWithRefs) {
-    Run();
-}
 }  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp
new file mode 100644
index 00000000000..2b33a25ae1e
--- /dev/null
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/matrix_nms.cpp
@@ -0,0 +1,250 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/single_layer/matrix_nms.hpp"
+
+namespace LayerTestsDefinitions {
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace FuncTestUtils::PrecisionUtils;
+
+std::string MatrixNmsLayerTest::getTestCaseName(testing::TestParamInfo<NmsParams> obj) {
+    InputShapeParams inShapeParams;
+    InputPrecisions inPrecisions;
+    op::v8::MatrixNms::SortResultType sortResultType;
+    element::Type outType;
+    int backgroudClass;
+    op::v8::MatrixNms::DecayFunction decayFunction;
+    TopKParams topKParams;
+    ThresholdParams thresholdParams;
+    bool normalized;
+    std::string targetDevice;
+    std::tie(inShapeParams, inPrecisions, sortResultType, outType, topKParams, thresholdParams,
+        backgroudClass, normalized, decayFunction, targetDevice) = obj.param;
+
+    size_t numBatches, numBoxes, numClasses;
+    std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
+
+    Precision paramsPrec, maxBoxPrec, thrPrec;
+    std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
+
+    int nmsTopK, keepTopK;
+    std::tie(nmsTopK, keepTopK) = topKParams;
+
+    float score_threshold, gaussian_sigma, post_threshold;
+    std::tie(score_threshold, gaussian_sigma, post_threshold) = thresholdParams;
+
+    std::ostringstream result;
+    result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_";
+    result << "paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
+    result << "sortResultType=" << sortResultType << "_normalized=" << normalized << "_";
+    result << "outType=" << outType << "_nmsTopK=" << nmsTopK << "_keepTopK=" << keepTopK << "_";
+    result << "backgroudClass=" << backgroudClass << "_decayFunction=" << decayFunction << "_";
+    result << "score_threshold=" << score_threshold << "_gaussian_sigma=" << gaussian_sigma << "_";
+    result << "post_threshold=" << post_threshold << "_TargetDevice=" << targetDevice;
+    return result.str();
+}
+
+void MatrixNmsLayerTest::GenerateInputs() {
+    size_t it = 0;
+    for (const auto &input : cnnNetwork.getInputsInfo()) {
+        const auto &info = input.second;
+        Blob::Ptr blob;
+
+        if (it == 1) {
+            blob = make_blob_with_precision(info->getTensorDesc());
+            blob->allocate();
+            CommonTestUtils::fill_data_random_float<Precision::FP32>(blob, 1, 0, 100000);
+        } else {
+            blob = GenerateInput(*info);
+        }
+        inputs.push_back(blob);
+        it++;
+    }
+}
+
+void MatrixNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> &expectedOutputs,
+                                     const std::vector<Blob::Ptr> &actualOutputs) {
+    auto batchIndex = -1;
+    std::vector<int32_t> numPerBatch(numBatches);
+    for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0 ; outputIndex--) {
+        const auto& actual = actualOutputs[outputIndex];
+        const auto _dims = actual->getTensorDesc().getDims();
+        if (_dims.size() == 1 && _dims[0] == numBatches) {
+            batchIndex = outputIndex;
+            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
+            IE_ASSERT(memory);
+            const auto lockedMemory = memory->wmap();
+            const auto actualBuffer = lockedMemory.as<const uint8_t *>();
+            auto buffer = reinterpret_cast<const int32_t *>(actualBuffer);
+            std::copy_n(buffer, numBatches, numPerBatch.begin());
+        }
+    }
+
+    for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0 ; outputIndex--) {
+        const auto& expected = expectedOutputs[outputIndex];
+        const auto& actual = actualOutputs[outputIndex];
+
+        //Compare Selected Outputs & Selected Indices
+        if (outputIndex != batchIndex) {
+            const auto &expectedBuffer = expected.second.data();
+            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
+            IE_ASSERT(memory);
+            const auto lockedMemory = memory->wmap();
+            const auto actualBuffer = lockedMemory.as<const uint8_t *>();
+
+            auto k =  static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
+            // W/A for int4, uint4
+            if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
+                k /= 2;
+            }
+            if (outputIndex == 2) {
+                if (expected.second.size() != k * actual->byteSize())
+                    throw std::runtime_error("Expected and actual size 3rd output have different size");
+            }
+
+            const auto &precision = actual->getTensorDesc().getPrecision();
+            auto expected_offset = 0;
+            auto actual_offset = 0;
+            for (size_t i = 0; i < numPerBatch.size(); i++) {
+                auto validNums = numPerBatch[i];
+                switch (precision) {
+                    case InferenceEngine::Precision::FP32: {
+                        switch (expected.first) {
+                            case ngraph::element::Type_t::f32:
+                                LayerTestsUtils::LayerTestsCommon::Compare(
+                                        reinterpret_cast<const float *>(expectedBuffer) + expected_offset * 6,
+                                        reinterpret_cast<const float *>(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f);
+                                break;
+                            case ngraph::element::Type_t::f64:
+                                LayerTestsUtils::LayerTestsCommon::Compare(
+                                        reinterpret_cast<const double *>(expectedBuffer) + expected_offset * 6,
+                                        reinterpret_cast<const float *>(actualBuffer) + actual_offset * 6, validNums *6, 1e-5f);
+                                break;
+                            default:
+                                break;
+                        }
+
+                        const auto fBuffer = lockedMemory.as<const float *>();
+                        for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) {
+                            ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5)
+                                << "Invalid default value: " << fBuffer[i] << " at index: " << i;
+                        }
+                        break;
+                    }
+                    case InferenceEngine::Precision::I32: {
+                        switch (expected.first) {
+                            case ngraph::element::Type_t::i32:
+                                LayerTestsUtils::LayerTestsCommon::Compare(
+                                        reinterpret_cast<const int32_t *>(expectedBuffer) + expected_offset,
+                                        reinterpret_cast<const int32_t *>(actualBuffer) + actual_offset, validNums, 0);
+                                break;
+                            case ngraph::element::Type_t::i64:
+                                LayerTestsUtils::LayerTestsCommon::Compare(
+                                        reinterpret_cast<const int64_t *>(expectedBuffer) + expected_offset,
+                                        reinterpret_cast<const int32_t *>(actualBuffer) + actual_offset, validNums, 0);
+                                break;
+                            default:
+                                break;
+                        }
+                        const auto iBuffer = lockedMemory.as<const int *>();
+                        for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) {
+                            ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i;
+                        }
+                        break;
+                    }
+                    default:
+                        FAIL() << "Comparator for " << precision << " precision isn't supported";
+                }
+                expected_offset += validNums;
+                actual_offset += maxOutputBoxesPerBatch;
+            }
+        } else {
+            const auto &expectedBuffer = expected.second.data();
+            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
+            IE_ASSERT(memory);
+            const auto lockedMemory = memory->wmap();
+            const auto actualBuffer = lockedMemory.as<const uint8_t *>();
+
+            auto k =  static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
+            // W/A for int4, uint4
+            if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
+                k /= 2;
+            }
+            if (outputIndex == 2) {
+                if (expected.second.size() != k * actual->byteSize())
+                    throw std::runtime_error("Expected and actual size 3rd output have different size");
+            }
+
+            const auto &precision = actual->getTensorDesc().getPrecision();
+            size_t size = expected.second.size() / (k * actual->getTensorDesc().getPrecision().size());
+            switch (precision) {
+                case InferenceEngine::Precision::I32: {
+                    switch (expected.first) {
+                        case ngraph::element::Type_t::i32:
+                            LayerTestsUtils::LayerTestsCommon::Compare(
+                                    reinterpret_cast<const int32_t *>(expectedBuffer),
+                                    reinterpret_cast<const int32_t *>(actualBuffer), size, 0);
+                            break;
+                        case ngraph::element::Type_t::i64:
+                            LayerTestsUtils::LayerTestsCommon::Compare(
+                                    reinterpret_cast<const int64_t *>(expectedBuffer),
+                                    reinterpret_cast<const int32_t *>(actualBuffer), size, 0);
+                            break;
+                        default:
+                            break;
+                    }
+                    break;
+                }
+                default:
+                    FAIL() << "Comparator for " << precision << " precision isn't supported";
+            }
+        }
+    }
+}
+
+void MatrixNmsLayerTest::SetUp() {
+    InputShapeParams inShapeParams;
+    InputPrecisions inPrecisions;
+    op::v8::MatrixNms::Attributes attrs;
+    TopKParams topKParams;
+    ThresholdParams thresholdParams;
+
+    std::tie(inShapeParams, inPrecisions, attrs.sort_result_type, attrs.output_type, topKParams, thresholdParams,
+        attrs.background_class, attrs.normalized, attrs.decay_function, targetDevice) = this->GetParam();
+
+    std::tie(attrs.nms_top_k, attrs.keep_top_k) = topKParams;
+    std::tie(attrs.score_threshold, attrs.gaussian_sigma, attrs.post_threshold) = thresholdParams;
+    std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
+    auto realClasses = numClasses;
+    if (attrs.background_class >=0 && attrs.background_class <= numClasses) {
+        realClasses = realClasses - 1;
+    }
+
+    maxOutputBoxesPerClass = 0;
+    if (attrs.nms_top_k >= 0)
+        maxOutputBoxesPerClass = std::min(numBoxes, static_cast<size_t>(attrs.nms_top_k));
+    else
+        maxOutputBoxesPerClass = numBoxes;
+
+    maxOutputBoxesPerBatch  = maxOutputBoxesPerClass * realClasses;
+    if (attrs.keep_top_k >= 0)
+        maxOutputBoxesPerBatch =
+                std::min(maxOutputBoxesPerBatch, static_cast<size_t>(attrs.keep_top_k));
+    Precision paramsPrec, maxBoxPrec, thrPrec;
+    std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
+
+    const std::vector<size_t> boxesShape{numBatches, numBoxes, 4}, scoresShape{numBatches, numClasses, numBoxes};
+    auto ngPrc = convertIE2nGraphPrc(paramsPrec);
+    auto params = builder::makeParams(ngPrc, {boxesShape, scoresShape});
+    auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(params));
+    auto nms = std::make_shared<opset8::MatrixNms>(paramOuts[0], paramOuts[1], attrs);
+    auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(element::f32, Shape{1}, {1}));
+    auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(attrs.output_type, Shape{1}, {1}));
+    auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(attrs.output_type, Shape{1}, {1}));
+    function = std::make_shared<Function>(OutputVector{nms_0_identity, nms_1_identity, nms_2_identity}, params, "NMS");
+}
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp
new file mode 100644
index 00000000000..e8532bad227
--- /dev/null
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/multiclass_nms.cpp
@@ -0,0 +1,270 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/single_layer/multiclass_nms.hpp"
+
+namespace LayerTestsDefinitions {
+
+using namespace ngraph;
+using namespace InferenceEngine;
+using namespace FuncTestUtils::PrecisionUtils;
+
+std::string MulticlassNmsLayerTest::getTestCaseName(testing::TestParamInfo<MulticlassNmsParams> obj) {
+    InputShapeParams inShapeParams;
+    InputPrecisions inPrecisions;
+    int32_t nmsTopK, backgroundClass, keepTopK;
+    element::Type outType;
+
+    op::util::NmsBase::SortResultType sortResultType;
+
+    InputfloatVar inFloatVar;
+    InputboolVar inboolVar;
+
+    std::string targetDevice;
+
+    std::tie(inShapeParams, inPrecisions, nmsTopK, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) = obj.param;
+
+    size_t numBatches, numBoxes, numClasses;
+    std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
+
+    Precision paramsPrec, maxBoxPrec, thrPrec;
+    std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
+
+    float iouThr, scoreThr, nmsEta;
+    std::tie(iouThr, scoreThr, nmsEta) = inFloatVar;
+
+    bool sortResCB, normalized;
+    std::tie(sortResCB, normalized) = inboolVar;
+
+    std::ostringstream result;
+    result << "numBatches=" << numBatches << "_numBoxes=" << numBoxes << "_numClasses=" << numClasses << "_";
+    result << "paramsPrec=" << paramsPrec << "_maxBoxPrec=" << maxBoxPrec << "_thrPrec=" << thrPrec << "_";
+    result << "nmsTopK=" << nmsTopK << "_";
+    result << "iouThr=" << iouThr << "_scoreThr=" << scoreThr << "_backgroundClass=" << backgroundClass << "_";
+    result << "keepTopK=" << keepTopK << "_outType=" << outType << "_";
+    result << "sortResultType=" << sortResultType << "_sortResCrossBatch=" << sortResCB << "_nmsEta=" << nmsEta << "_normalized=" << normalized << "_";
+    result << "TargetDevice=" << targetDevice;
+    return result.str();
+}
+
+void MulticlassNmsLayerTest::GenerateInputs() {
+    size_t it = 0;
+    for (const auto& input : cnnNetwork.getInputsInfo()) {
+        const auto& info = input.second;
+        Blob::Ptr blob;
+
+        if (it == 1) {
+            blob = make_blob_with_precision(info->getTensorDesc());
+            blob->allocate();
+            CommonTestUtils::fill_data_random_float<Precision::FP32>(blob, 1, 0, 1000);
+        } else {
+            blob = GenerateInput(*info);
+        }
+        inputs.push_back(blob);
+        it++;
+    }
+}
+
+void MulticlassNmsLayerTest::Compare(const std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>& expectedOutputs,
+                                     const std::vector<Blob::Ptr>& actualOutputs) {
+    auto batchIndex = -1;
+    std::vector<int32_t> numPerBatch(numBatches);
+    for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0; outputIndex--) {
+        const auto& actual = actualOutputs[outputIndex];
+        const auto _dims = actual->getTensorDesc().getDims();
+        if (_dims.size() == 1 && _dims[0] == numBatches) {
+            batchIndex = outputIndex;
+            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
+            IE_ASSERT(memory);
+            const auto lockedMemory = memory->wmap();
+            const auto actualBuffer = lockedMemory.as<const uint8_t*>();
+            auto buffer = reinterpret_cast<const int32_t*>(actualBuffer);
+            std::copy_n(buffer, numBatches, numPerBatch.begin());
+        }
+    }
+
+    for (int outputIndex = static_cast<int>(expectedOutputs.size()) - 1; outputIndex >= 0; outputIndex--) {
+        const auto& expected = expectedOutputs[outputIndex];
+        const auto& actual = actualOutputs[outputIndex];
+
+        // Compare Selected Outputs & Selected Indices
+        if (outputIndex != batchIndex) {
+            const auto& expectedBuffer = expected.second.data();
+            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
+            IE_ASSERT(memory);
+            const auto lockedMemory = memory->wmap();
+            const auto actualBuffer = lockedMemory.as<const uint8_t*>();
+
+            auto k = static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
+            // W/A for int4, uint4
+            if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
+                k /= 2;
+            }
+            if (outputIndex == 2) {
+                if (expected.second.size() != k * actual->byteSize())
+                    throw std::runtime_error("Expected and actual size 3rd output have different "
+                                             "size");
+            }
+
+            const auto& precision = actual->getTensorDesc().getPrecision();
+            auto expected_offset = 0;
+            auto actual_offset = 0;
+            for (size_t i = 0; i < numPerBatch.size(); i++) {
+                auto validNums = numPerBatch[i];
+                switch (precision) {
+                case InferenceEngine::Precision::FP32: {
+                    switch (expected.first) {
+                    case ngraph::element::Type_t::f32:
+                        LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const float*>(expectedBuffer) + expected_offset * 6,
+                                                                   reinterpret_cast<const float*>(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f);
+                        break;
+                    case ngraph::element::Type_t::f64:
+                        LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const double*>(expectedBuffer) + expected_offset * 6,
+                                                                   reinterpret_cast<const float*>(actualBuffer) + actual_offset * 6, validNums * 6, 1e-5f);
+                        break;
+                    default:
+                        break;
+                    }
+
+                    const auto fBuffer = lockedMemory.as<const float*>();
+                    for (size_t tailing = validNums * 6; tailing < maxOutputBoxesPerBatch * 6; tailing++) {
+                        ASSERT_TRUE(std::abs(fBuffer[(actual_offset * 6 + tailing)] - -1.f) < 1e-5)
+                            << "Invalid default value: " << fBuffer[i] << " at index: " << i;
+                    }
+                    break;
+                }
+                case InferenceEngine::Precision::I32: {
+                    switch (expected.first) {
+                    case ngraph::element::Type_t::i32:
+                        LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int32_t*>(expectedBuffer) + expected_offset,
+                                                                   reinterpret_cast<const int32_t*>(actualBuffer) + actual_offset, validNums, 0);
+                        break;
+                    case ngraph::element::Type_t::i64:
+                        LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int64_t*>(expectedBuffer) + expected_offset,
+                                                                   reinterpret_cast<const int32_t*>(actualBuffer) + actual_offset, validNums, 0);
+                        break;
+                    default:
+                        break;
+                    }
+                    const auto iBuffer = lockedMemory.as<const int*>();
+                    for (size_t tailing = validNums; tailing < maxOutputBoxesPerBatch; tailing++) {
+                        ASSERT_TRUE(iBuffer[actual_offset + tailing] == -1) << "Invalid default value: " << iBuffer[i] << " at index: " << i;
+                    }
+                    break;
+                }
+                default:
+                    FAIL() << "Comparator for " << precision << " precision isn't supported";
+                }
+                expected_offset += validNums;
+                actual_offset += maxOutputBoxesPerBatch;
+            }
+        } else {
+            const auto& expectedBuffer = expected.second.data();
+            auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(actual);
+            IE_ASSERT(memory);
+            const auto lockedMemory = memory->wmap();
+            const auto actualBuffer = lockedMemory.as<const uint8_t*>();
+
+            auto k = static_cast<float>(expected.first.size()) / actual->getTensorDesc().getPrecision().size();
+            // W/A for int4, uint4
+            if (expected.first == ngraph::element::Type_t::u4 || expected.first == ngraph::element::Type_t::i4) {
+                k /= 2;
+            }
+            if (outputIndex == 2) {
+                if (expected.second.size() != k * actual->byteSize())
+                    throw std::runtime_error("Expected and actual size 3rd output have different "
+                                             "size");
+            }
+
+            const auto& precision = actual->getTensorDesc().getPrecision();
+            size_t size = expected.second.size() / (k * actual->getTensorDesc().getPrecision().size());
+            switch (precision) {
+            case InferenceEngine::Precision::I32: {
+                switch (expected.first) {
+                case ngraph::element::Type_t::i32:
+                    LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int32_t*>(expectedBuffer), reinterpret_cast<const int32_t*>(actualBuffer),
+                                                               size, 0);
+                    break;
+                case ngraph::element::Type_t::i64:
+                    LayerTestsUtils::LayerTestsCommon::Compare(reinterpret_cast<const int64_t*>(expectedBuffer), reinterpret_cast<const int32_t*>(actualBuffer),
+                                                               size, 0);
+                    break;
+                default:
+                    break;
+                }
+                break;
+            }
+            default:
+                FAIL() << "Comparator for " << precision << " precision isn't supported";
+            }
+        }
+    }
+}
+
+void MulticlassNmsLayerTest::SetUp() {
+    InputShapeParams inShapeParams;
+    InputPrecisions inPrecisions;
+    op::v8::MulticlassNms::Attributes attrs;
+    size_t maxOutBoxesPerClass, backgroundClass, keepTopK;
+    element::Type outType;
+
+    op::util::NmsBase::SortResultType sortResultType;
+
+    InputfloatVar inFloatVar;
+    InputboolVar inboolVar;
+
+    std::tie(inShapeParams, inPrecisions, maxOutBoxesPerClass, inFloatVar, backgroundClass, keepTopK, outType, sortResultType, inboolVar, targetDevice) =
+        this->GetParam();
+
+    // size_t numBatches, numBoxes, numClasses;
+    std::tie(numBatches, numBoxes, numClasses) = inShapeParams;
+    auto realClasses = numClasses;
+    if (backgroundClass >= 0 && backgroundClass <= numClasses) {
+        realClasses = realClasses - 1;
+    }
+
+    maxOutputBoxesPerClass = 0;
+    if (maxOutBoxesPerClass >= 0)
+        maxOutputBoxesPerClass = std::min(numBoxes, static_cast<size_t>(maxOutBoxesPerClass));
+    else
+        maxOutputBoxesPerClass = numBoxes;
+
+    maxOutputBoxesPerBatch = maxOutputBoxesPerClass * realClasses;
+    if (keepTopK >= 0)
+        maxOutputBoxesPerBatch = std::min(maxOutputBoxesPerBatch, static_cast<size_t>(keepTopK));
+
+    Precision paramsPrec, maxBoxPrec, thrPrec;
+    std::tie(paramsPrec, maxBoxPrec, thrPrec) = inPrecisions;
+
+    float iouThr, scoreThr, nmsEta;
+    std::tie(iouThr, scoreThr, nmsEta) = inFloatVar;
+
+    bool sortResCB, normalized;
+    std::tie(sortResCB, normalized) = inboolVar;
+
+    const std::vector<size_t> boxesShape {numBatches, numBoxes, 4}, scoresShape {numBatches, numClasses, numBoxes};
+    auto ngPrc = convertIE2nGraphPrc(paramsPrec);
+    auto params = builder::makeParams(ngPrc, {boxesShape, scoresShape});
+    auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(params));
+
+    attrs.iou_threshold = iouThr;
+    attrs.score_threshold = scoreThr;
+    attrs.nms_eta = nmsEta;
+    attrs.sort_result_type = sortResultType;
+    attrs.sort_result_across_batch = sortResCB;
+    attrs.output_type = outType;
+    attrs.nms_top_k = maxOutBoxesPerClass;
+    attrs.keep_top_k = keepTopK;
+    attrs.background_class = backgroundClass;
+    attrs.normalized = normalized;
+
+    auto nms = std::make_shared<opset8::MulticlassNms>(paramOuts[0], paramOuts[1], attrs);
+
+    auto nms_0_identity = std::make_shared<opset5::Multiply>(nms->output(0), opset5::Constant::create(ngPrc, Shape {1}, {1}));
+    auto nms_1_identity = std::make_shared<opset5::Multiply>(nms->output(1), opset5::Constant::create(outType, Shape {1}, {1}));
+    auto nms_2_identity = std::make_shared<opset5::Multiply>(nms->output(2), opset5::Constant::create(outType, Shape {1}, {1}));
+    function = std::make_shared<Function>(OutputVector {nms_0_identity, nms_1_identity, nms_2_identity}, params, "MulticlassNMS");
+}
+
+}  // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
index 2ba869a6aa5..05f8f92e4d3 100644
--- a/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/data_utils.hpp
@@ -94,7 +94,11 @@ size_t byte_size(const InferenceEngine::TensorDesc &tdesc);
 template<InferenceEngine::Precision::ePrecision PRC>
 inline void
 fill_data_roi(InferenceEngine::Blob::Ptr &blob, const uint32_t range, const int height, const int width, const float omega,
-              const bool is_roi_max_mode, const int seed = 1) {
+              const bool is_roi_max_mode, const int seed = 1, void (*propGenerator)(InferenceEngine::Blob::Ptr &) = nullptr) {
+    if (propGenerator != nullptr) {
+        propGenerator(blob);
+        return;
+    }
     using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
     auto *data = blob->buffer().as<dataType *>();
     std::default_random_engine random(seed);
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/file_utils.cpp b/inference-engine/tests/ie_test_utils/common_test_utils/file_utils.cpp
new file mode 100644
index 00000000000..decb88d6e1b
--- /dev/null
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/file_utils.cpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/file_util.hpp>
+#include <cstring>
+
+#ifdef __APPLE__
+# include <mach-o/dyld.h>
+#endif
+
+#ifdef _WIN32
+# ifndef NOMINMAX
+#  define NOMINMAX
+# endif
+# include <Windows.h>
+#else
+# include <dlfcn.h>
+# include <unistd.h>
+# include <limits.h>
+#endif
+
+namespace CommonTestUtils {
+
+std::string getExecutableDirectory() {
+    std::string path;
+#ifdef _WIN32
+    char buffer[MAX_PATH];
+    int len = GetModuleFileNameA(NULL, buffer, MAX_PATH);
+#elif defined(__APPLE__)
+    Dl_info info;
+    dladdr(reinterpret_cast<void*>(getExecutableDirectory), &info);
+    const char * buffer = info.dli_fname;
+    int len = std::strlen(buffer);
+#else
+    char buffer[PATH_MAX];
+    int len = readlink("/proc/self/exe", buffer, PATH_MAX);
+#endif
+    if (len < 0) {
+        throw "Can't get test executable path name";
+    }
+    path = std::string(buffer, len);
+    return ngraph::file_util::get_directory(path);
+}
+
+std::string getModelFromTestModelZoo(const std::string & relModelPath) {
+    return ngraph::file_util::path_join(CommonTestUtils::getExecutableDirectory(), relModelPath);
+}
+
+} // namespace CommonTestUtils
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/file_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/file_utils.hpp
index 34aa4c75c44..163d58d6aff 100644
--- a/inference-engine/tests/ie_test_utils/common_test_utils/file_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/file_utils.hpp
@@ -239,4 +239,9 @@ inline std::vector<std::string> splitStringByDelimiter(std::string paths, const
     splitPath.push_back(paths);
     return splitPath;
 }
+
+std::string getExecutableDirectory();
+
+std::string getModelFromTestModelZoo(const std::string & relModelPath);
+
 }  // namespace CommonTestUtils
diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/unicode_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/unicode_utils.hpp
index f88b8dbe24c..ce0e31af860 100644
--- a/inference-engine/tests/ie_test_utils/common_test_utils/unicode_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/common_test_utils/unicode_utils.hpp
@@ -14,6 +14,7 @@
 #include "w_dirent.h"
 
 #ifdef ENABLE_UNICODE_PATH_SUPPORT
+
 namespace CommonTestUtils {
 
 inline void fixSlashes(std::string &str) {
@@ -55,11 +56,12 @@ inline bool copyFile(std::string source_path, std::wstring dest_path) {
 
 inline std::wstring addUnicodePostfixToPath(std::string source_path, std::wstring postfix) {
     fixSlashes(source_path);
-    std::wstring result = stringToWString(source_path);
-    std::wstring file_name = result.substr(0, result.size() - 4);
-    std::wstring extension = result.substr(result.size() - 4, result.size());
-    result = file_name + postfix + extension;
-    return result;
+    auto result = stringToWString(source_path);
+    auto extPos = result.rfind('.');
+    auto extension = result.substr(extPos, result.size());
+    auto file_name = result.substr(0, extPos);
+
+    return file_name + postfix + extension;
 }
 
 inline void removeFile(std::wstring path) {
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/include/functional_test_utils/precision_utils.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/include/functional_test_utils/precision_utils.hpp
index ff7ba66412f..6cec9ea30e9 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/include/functional_test_utils/precision_utils.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/include/functional_test_utils/precision_utils.hpp
@@ -13,7 +13,7 @@
 namespace FuncTestUtils {
 namespace PrecisionUtils {
 
-// Copied from inference-engine/src/inference_engine/ie_ngraph_utils.hpp
+// Copied from inference-engine/src/inference_engine/src/ie_ngraph_utils.hpp
 inline ::ngraph::element::Type convertIE2nGraphPrc(const InferenceEngine::Precision& precision) {
     InferenceEngine::Precision::ePrecision pType = precision;
     switch (pType) {
diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
index 60d91f93b49..fee2d9ba6d2 100644
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py
@@ -31,6 +31,7 @@ VERIFIED_OP_REFERENCES = [
     'DepthToSpace-1',
     'DetectionOutput-1',
     'Divide-1',
+    'Erf-1',
     'ExperimentalDetectronDetectionOutput-6',
     'ExperimentalDetectronGenerateProposalsSingleImage-6',
     'ExperimentalDetectronPriorGridGenerator-6',
@@ -52,9 +53,12 @@ VERIFIED_OP_REFERENCES = [
     'HSwish-4',
     'HardSigmoid-1',
     'Interpolate-4',
+    'Less-1',
+    'LessEqual-1'
     'LRN-1',
     'LSTMCell-4',
     'LSTMSequence-5',
+    'LogicalAnd-1'
     'LogSoftmax-5',
     'Loop-5',
     'MVN-1',
diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
index 55a8f482978..8e70dfdc6d8 100644
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
@@ -443,6 +443,10 @@ std::shared_ptr<ngraph::Node> makeComparison(const ngraph::Output<Node> &in0,
                                              const ngraph::Output<Node> &in1,
                                              ngraph::helpers::ComparisonTypes comparisonType);
 
+std::shared_ptr<ngraph::Node> makeConversion(const ngraph::Output<Node>& in,
+                                             const element::Type& type,
+                                             const ngraph::helpers::ConversionTypes& conversionType);
+
 std::shared_ptr<ngraph::Node> makeLogical(const ngraph::Output<Node> &in0,
                                           const ngraph::Output<Node> &in1,
                                           ngraph::helpers::LogicalTypes logicalType);
diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
index 86ba1f04487..b23cf0908ac 100644
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp
@@ -148,6 +148,11 @@ enum ComparisonTypes {
     GREATER_EQUAL
 };
 
+enum ConversionTypes {
+    CONVERT,
+    CONVERT_LIKE
+};
+
 enum LogicalTypes {
     LOGICAL_AND,
     LOGICAL_OR,
diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/conversion.cpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/conversion.cpp
new file mode 100644
index 00000000000..5b9e95cc719
--- /dev/null
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/conversion.cpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <ngraph/opsets/opset1.hpp>
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+
+namespace ngraph {
+namespace builder {
+
+std::shared_ptr<ngraph::Node> makeConversion(const ngraph::Output<Node>& in,
+                                             const element::Type& output_type,
+                                             const ngraph::helpers::ConversionTypes& conversionType) {
+    if (conversionType == ngraph::helpers::ConversionTypes::CONVERT) {
+        return std::make_shared<ngraph::opset1::Convert>(in, output_type);
+    } else if (conversionType == ngraph::helpers::ConversionTypes::CONVERT_LIKE) {
+        const auto like = std::make_shared<op::Constant>(output_type, ngraph::Shape{1});
+        return std::make_shared<ngraph::opset1::ConvertLike>(in, like);
+    } else {
+        throw std::runtime_error("Incorrect type of Conversion operation");
+    }
+}
+
+}  // namespace builder
+}  // namespace ngraph
diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp
index 2c5a07540b0..6b3043a0063 100644
--- a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp
+++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp
@@ -186,7 +186,9 @@ std::shared_ptr<Function> foldFunction(const std::shared_ptr<Function> &function
         }
     }
 
+    NGRAPH_SUPPRESS_DEPRECATED_START;
     const auto &foldedFunc = specialize_function(function, paramElementTypes, paramShapes, inBuffers);
+    NGRAPH_SUPPRESS_DEPRECATED_END;
     ngraph::pass::ConstantFolding().run_on_function(foldedFunc);
     for (const auto &op : foldedFunc->get_ops()) {
         NGRAPH_CHECK(op::is_constant(op) || op::is_output(op) || op::is_parameter(op),
diff --git a/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp b/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp
index fd828df6dd9..66eecf43cf3 100644
--- a/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp
+++ b/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp
@@ -6,18 +6,19 @@
 #include <gtest/gtest.h>
 
 #include "mkldnn_memory.h"
+#include "cpu_memory_desc_utils.h"
 
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
 TEST(MemDescTest, Conversion) {
     // Check if conversion keep desc structure
-    // dnnl::memory::desc -> MKLDNNMemoryDesc -> TensorDesc -> MKLDNNMemoryDesc -> dnnl::memory::desc
+    // dnnl::memory::desc -> MKLDNNMemoryDesc -> BlockedMemoryDesc -> MKLDNNMemoryDesc -> dnnl::memory::desc
     auto converted_correctly = [] (dnnl::memory::format_tag fmt, dnnl::memory::dims dims) {
         dnnl::memory::desc orig_tdesc {dims, dnnl::memory::data_type::u8, fmt};
         MKLDNNMemoryDesc plg_tdesc {orig_tdesc};
-        TensorDesc ie_tdesc {plg_tdesc};
-        MKLDNNMemoryDesc plg_tdesc_after {ie_tdesc};
+        BlockedMemoryDesc blk_tdesc = MemoryDescUtils::convertToBlockedDescriptor(plg_tdesc);
+        MKLDNNMemoryDesc plg_tdesc_after = MemoryDescUtils::convertToMKLDNNMemoryDesc(blk_tdesc);
         dnnl::memory::desc after_tdesc(plg_tdesc_after);
 
         return  orig_tdesc == after_tdesc;
@@ -40,12 +41,11 @@ TEST(MemDescTest, CompareWithTensorDescRecomputedStrides) {
     auto converted_correctly = [] (dnnl::memory::format_tag fmt, dnnl::memory::dims dims) {
         dnnl::memory::desc orig_tdesc {dims, dnnl::memory::data_type::u8, fmt};
         MKLDNNMemoryDesc plg_tdesc {orig_tdesc};
-        TensorDesc ie_tdesc {plg_tdesc};
+        BlockedMemoryDesc blk_tdesc = MemoryDescUtils::convertToBlockedDescriptor(plg_tdesc);
 
-        const BlockingDesc block_dess(ie_tdesc.getBlockingDesc().getBlockDims(), ie_tdesc.getBlockingDesc().getOrder());
-        TensorDesc recomputed_tdesc(ie_tdesc.getPrecision(), ie_tdesc.getDims(), block_dess);
+        BlockedMemoryDesc recomputed_blk_tdesc(blk_tdesc.getPrecision(), blk_tdesc.getShape().getStaticDims(), blk_tdesc.getBlockDims(), blk_tdesc.getOrder());
 
-        return  ie_tdesc == recomputed_tdesc;
+        return  blk_tdesc.isCompatible(recomputed_blk_tdesc);
     };
 
     std::pair<dnnl::memory::format_tag, dnnl::memory::dims> payload[] {
@@ -61,16 +61,6 @@ TEST(MemDescTest, CompareWithTensorDescRecomputedStrides) {
         ASSERT_TRUE(converted_correctly(p.first, p.second));
 }
 
-TEST(MemDescTest, ConversionKeepAny) {
-    dnnl::memory::desc tdesc {{1, 2, 3, 4}, dnnl::memory::data_type::u8, dnnl::memory::format_tag::any};
-    MKLDNNMemoryDesc plg_tdesc {tdesc};
-    TensorDesc ie_tdesc {plg_tdesc};
-    MKLDNNMemoryDesc plg_tdesc_2 {ie_tdesc};
-    dnnl::memory::desc tdesc_2 {plg_tdesc_2};
-
-    ASSERT_TRUE(tdesc == tdesc_2);
-}
-
 TEST(MemDescTest, isPlainCheck) {
     const auto dims = dnnl::memory::dims {3, 2, 5, 7};
     const auto type = dnnl::memory::data_type::u8;
@@ -78,9 +68,9 @@ TEST(MemDescTest, isPlainCheck) {
     dnnl::memory::desc permt_tdesc {dims, type, dnnl::memory::format_tag::acdb};
     dnnl::memory::desc blckd_tdesc {dims, type, dnnl::memory::format_tag::aBcd8b};
 
-    ASSERT_TRUE(MKLDNNMemoryDesc(plain_tdesc).isPlainFormat());
-    ASSERT_FALSE(MKLDNNMemoryDesc(permt_tdesc).isPlainFormat());
-    ASSERT_FALSE(MKLDNNMemoryDesc(blckd_tdesc).isPlainFormat());
+    ASSERT_TRUE(MKLDNNMemoryDesc(plain_tdesc).hasLayoutType(LayoutType::ncsp));
+    ASSERT_FALSE(MKLDNNMemoryDesc(permt_tdesc).hasLayoutType(LayoutType::ncsp));
+    ASSERT_FALSE(MKLDNNMemoryDesc(blckd_tdesc).hasLayoutType(LayoutType::ncsp));
 }
 
 TEST(MemDescTest, isBlockedCCheck) {
@@ -89,23 +79,21 @@ TEST(MemDescTest, isBlockedCCheck) {
 
     dnnl::memory::desc plain_tdesc {dims, type, dnnl::memory::format_tag::abcd};
     dnnl::memory::desc tailc_tdesc {dims, type, dnnl::memory::format_tag::acdb};
-    dnnl::memory::desc blck4_tdesc {dims, type, dnnl::memory::format_tag::aBcd4b};
     dnnl::memory::desc blck8_tdesc {dims, type, dnnl::memory::format_tag::aBcd8b};
     dnnl::memory::desc blck8_permCD_tdesc {dims, type, dnnl::memory::format_tag::aBdc16b};
-    ASSERT_FALSE(MKLDNNMemoryDesc(plain_tdesc).isBlockedCFormat());
-    ASSERT_FALSE(MKLDNNMemoryDesc(tailc_tdesc).isBlockedCFormat());
-    ASSERT_TRUE(MKLDNNMemoryDesc(blck4_tdesc).isBlockedCFormat());
-    ASSERT_TRUE(MKLDNNMemoryDesc(blck8_tdesc).isBlockedCFormat());
-    ASSERT_FALSE(MKLDNNMemoryDesc(blck8_permCD_tdesc).isBlockedCFormat());
-    ASSERT_FALSE(MKLDNNMemoryDesc(blck4_tdesc).isBlockedCFormat(8));
-    ASSERT_TRUE(MKLDNNMemoryDesc(blck4_tdesc).isBlockedCFormat(4));
+    const MKLDNNMemoryDesc plain_mdesc(plain_tdesc);
+    const MKLDNNMemoryDesc tailc_mdesc(tailc_tdesc);
+    ASSERT_FALSE(plain_mdesc.hasLayoutType(LayoutType::nCsp8c) || plain_mdesc.hasLayoutType(LayoutType::nCsp16c));
+    ASSERT_FALSE(tailc_mdesc.hasLayoutType(LayoutType::nCsp8c) || tailc_mdesc.hasLayoutType(LayoutType::nCsp16c));
+    ASSERT_TRUE(MKLDNNMemoryDesc(blck8_tdesc).hasLayoutType(LayoutType::nCsp8c));
+    ASSERT_FALSE(MKLDNNMemoryDesc(blck8_permCD_tdesc).hasLayoutType(LayoutType::nCsp16c));
 
     const auto crop_dims = dnnl::memory::dims {2, 1, 5, 7};
     const auto crop_off = dnnl::memory::dims {1, 0, 0, 0};
     dnnl::memory::desc blck8_crop_tdesc = blck8_tdesc.submemory_desc(crop_dims, crop_off);
     dnnl::memory::desc blck8_permCD_crop_tdesc = blck8_permCD_tdesc.submemory_desc(crop_dims, crop_off);
-    ASSERT_TRUE(MKLDNNMemoryDesc(blck8_crop_tdesc).isBlockedCFormat());
-    ASSERT_FALSE(MKLDNNMemoryDesc(blck8_permCD_crop_tdesc).isBlockedCFormat());
+    ASSERT_TRUE(MKLDNNMemoryDesc(blck8_crop_tdesc).hasLayoutType(LayoutType::nCsp8c));
+    ASSERT_FALSE(MKLDNNMemoryDesc(blck8_permCD_crop_tdesc).hasLayoutType(LayoutType::nCsp8c));
 }
 
 TEST(MemDescTest, isTailCCheck) {
@@ -116,18 +104,18 @@ TEST(MemDescTest, isTailCCheck) {
     dnnl::memory::desc tailc_tdesc {dims, type, dnnl::memory::format_tag::acdb};
     dnnl::memory::desc permt_tdesc {dims, type, dnnl::memory::format_tag::bcda};
     dnnl::memory::desc blck8_tdesc {dims, type, dnnl::memory::format_tag::aBcd8b};
-    ASSERT_FALSE(MKLDNNMemoryDesc(plain_tdesc).isTailCFormat());
-    ASSERT_FALSE(MKLDNNMemoryDesc(permt_tdesc).isTailCFormat());
-    ASSERT_TRUE(MKLDNNMemoryDesc(tailc_tdesc).isTailCFormat());
-    ASSERT_FALSE(MKLDNNMemoryDesc(blck8_tdesc).isTailCFormat());
+    ASSERT_FALSE(MKLDNNMemoryDesc(plain_tdesc).hasLayoutType(LayoutType::nspc));
+    ASSERT_FALSE(MKLDNNMemoryDesc(permt_tdesc).hasLayoutType(LayoutType::nspc));
+    ASSERT_TRUE(MKLDNNMemoryDesc(tailc_tdesc).hasLayoutType(LayoutType::nspc));
+    ASSERT_FALSE(MKLDNNMemoryDesc(blck8_tdesc).hasLayoutType(LayoutType::nspc));
 
     dnnl::memory::desc blck8_permCD_tdesc {dims, type, dnnl::memory::format_tag::aBdc16b};
-    ASSERT_FALSE(MKLDNNMemoryDesc(blck8_permCD_tdesc).isTailCFormat());
+    ASSERT_FALSE(MKLDNNMemoryDesc(blck8_permCD_tdesc).hasLayoutType(LayoutType::nspc));
 
     const auto crop_dims = dnnl::memory::dims {2, 1, 5, 7};
     const auto crop_off = dnnl::memory::dims {1, 0, 0, 0};
     dnnl::memory::desc tailc_crop_tdesc = blck8_tdesc.submemory_desc(crop_dims, crop_off);
-    ASSERT_FALSE(MKLDNNMemoryDesc(tailc_crop_tdesc).isTailCFormat());
+    ASSERT_FALSE(MKLDNNMemoryDesc(tailc_crop_tdesc).hasLayoutType(LayoutType::nspc));
 }
 
 TEST(MemDescTest, constructWithPlainFormat) {
diff --git a/inference-engine/tests/unit/frontends/onnx_import/CMakeLists.txt b/inference-engine/tests/unit/frontends/onnx_import/CMakeLists.txt
index 2f3efc8aae5..6b44002c684 100644
--- a/inference-engine/tests/unit/frontends/onnx_import/CMakeLists.txt
+++ b/inference-engine/tests/unit/frontends/onnx_import/CMakeLists.txt
@@ -2,20 +2,21 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-if (NOT NGRAPH_USE_PROTOBUF_LITE)
-    set(TARGET_NAME onnxImporterUnitTests)
+set(TARGET_NAME onnxImporterUnitTests)
 
-    addIeTargetTest(
-            NAME ${TARGET_NAME}
-            ROOT ${CMAKE_CURRENT_SOURCE_DIR}
-            LINK_LIBRARIES
-                gtest
-                gtest_main
-                onnx_importer
-            DEFINES
-                ONNX_MODELS_DIR=\"${CMAKE_CURRENT_SOURCE_DIR}/models\"
-            ADD_CPPLINT
-            LABELS
-                ONNX
-    )
-endif()
+addIeTargetTest(
+        NAME ${TARGET_NAME}
+        ROOT ${CMAKE_CURRENT_SOURCE_DIR}
+        LINK_LIBRARIES
+            gtest
+            gtest_main
+            commonTestUtils
+            onnx_importer
+        DEFINES
+            ONNX_MODELS_DIR=\"${TEST_MODEL_ZOO}/onnx_import\"
+        ADD_CPPLINT
+        DEPENDENCIES
+            test_model_zoo
+        LABELS
+            ONNX
+)
diff --git a/inference-engine/tests/unit/frontends/onnx_import/onnx_importer_test.cpp b/inference-engine/tests/unit/frontends/onnx_import/onnx_importer_test.cpp
index f2b08deae6f..6c7fa1aeda5 100644
--- a/inference-engine/tests/unit/frontends/onnx_import/onnx_importer_test.cpp
+++ b/inference-engine/tests/unit/frontends/onnx_import/onnx_importer_test.cpp
@@ -7,11 +7,13 @@
 #include <string>
 #include <fstream>
 
+#include "common_test_utils/file_utils.hpp"
 #include "ngraph/file_util.hpp"
 #include "onnx_import/onnx.hpp"
 
 TEST(ONNX_Importer_Tests, ImportBasicModel) {
-    auto model_file_path = ngraph::file_util::path_join(ONNX_MODELS_DIR, "add_abc_initializers.prototxt");
+    auto model_file_path = CommonTestUtils::getModelFromTestModelZoo(
+        ngraph::file_util::path_join(ONNX_MODELS_DIR, "add_abc_initializers.onnx"));
     auto function = ngraph::onnx_import::import_onnx_model(model_file_path);
 
     int count_additions = 0;
@@ -35,7 +37,8 @@ TEST(ONNX_Importer_Tests, ImportBasicModel) {
 }
 
 TEST(ONNX_Importer_Tests, ImportModelWithFusedOp) {
-    auto model_file_path = ngraph::file_util::path_join(ONNX_MODELS_DIR, "selu.prototxt");
+    auto model_file_path = CommonTestUtils::getModelFromTestModelZoo(
+        ngraph::file_util::path_join(ONNX_MODELS_DIR, "selu.onnx"));
     auto function = ngraph::onnx_import::import_onnx_model(model_file_path);
 
     int count_selu = 0;
@@ -59,7 +62,8 @@ TEST(ONNX_Importer_Tests, ImportModelWithFusedOp) {
 }
 
 TEST(ONNX_Importer_Tests, ImportModelWithMultiOutput) {
-    auto model_file_path = ngraph::file_util::path_join(ONNX_MODELS_DIR, "topk.prototxt");
+    auto model_file_path = CommonTestUtils::getModelFromTestModelZoo(
+        ngraph::file_util::path_join(ONNX_MODELS_DIR, "topk.onnx"));
     auto function = ngraph::onnx_import::import_onnx_model(model_file_path);
 
     int count_topk = 0;
@@ -86,7 +90,8 @@ TEST(ONNX_Importer_Tests, ImportModelWithMultiOutput) {
 }
 
 TEST(ONNX_Importer_Tests, ImportModelWithNotSupportedOp) {
-    auto model_file_path = ngraph::file_util::path_join(ONNX_MODELS_DIR, "not_supported.prototxt");
+    auto model_file_path = CommonTestUtils::getModelFromTestModelZoo(
+        ngraph::file_util::path_join(ONNX_MODELS_DIR, "not_supported.onnx"));
     try {
         auto function = ngraph::onnx_import::import_onnx_model(model_file_path);
         FAIL() << "Any expection was thrown despite the ONNX model is not supported";
@@ -100,7 +105,8 @@ TEST(ONNX_Importer_Tests, ImportModelWithNotSupportedOp) {
 }
 
 TEST(ONNX_Importer_Tests, ImportModelWhenFileDoesNotExist) {
-    auto model_file_path = ngraph::file_util::path_join(ONNX_MODELS_DIR, "not_exist_file.prototxt");
+    auto model_file_path = CommonTestUtils::getModelFromTestModelZoo(
+        ngraph::file_util::path_join(ONNX_MODELS_DIR, "not_exist_file.onnx"));
     try {
         auto function = ngraph::onnx_import::import_onnx_model(model_file_path);
         FAIL() << "Any expection was thrown despite the ONNX model file does not exist";
@@ -113,26 +119,26 @@ TEST(ONNX_Importer_Tests, ImportModelWhenFileDoesNotExist) {
     }
 }
 
-TEST(ONNX_Importer_Tests, ImportModelFromStream) {
-    auto model_file_path = ngraph::file_util::path_join(ONNX_MODELS_DIR, "addmul_abc.prototxt");
+// TODO: CVS-61224
+TEST(ONNX_Importer_Tests, DISABLED_ImportModelFromStream) {
+    auto model_file_path = CommonTestUtils::getModelFromTestModelZoo(
+        ngraph::file_util::path_join(ONNX_MODELS_DIR, "addmul_abc.onnx"));
     std::ifstream model_file_stream(model_file_path);
-    if (model_file_stream.is_open()) {
-        int count_adds = 0;
-        int count_multiplies = 0;
-        int count_parameters = 0;
+    ASSERT_TRUE(model_file_stream.is_open());
+    int count_adds = 0;
+    int count_multiplies = 0;
+    int count_parameters = 0;
 
-        auto function = ngraph::onnx_import::import_onnx_model(model_file_stream);
-        for (auto op : function->get_ops()) {
+    auto function = ngraph::onnx_import::import_onnx_model(model_file_stream);
+    for (auto op : function->get_ops()) {
         const auto op_type = std::string(op->get_type_name());
-            count_adds += (op_type == "Add" ? 1 : 0);
-            count_multiplies += (op_type == "Multiply" ? 1 : 0);
-            count_parameters += (op_type == "Parameter" ? 1 : 0);
-        }
-        ASSERT_EQ(count_adds, 1);
-        ASSERT_EQ(count_multiplies, 1);
-        ASSERT_EQ(count_parameters, 3);
+        count_adds += (op_type == "Add" ? 1 : 0);
+        count_multiplies += (op_type == "Multiply" ? 1 : 0);
+        count_parameters += (op_type == "Parameter" ? 1 : 0);
     }
-    model_file_stream.close();
+    ASSERT_EQ(count_adds, 1);
+    ASSERT_EQ(count_multiplies, 1);
+    ASSERT_EQ(count_parameters, 3);
 }
 
 TEST(ONNX_Importer_Tests, GetSupportedOperators) {
diff --git a/inference-engine/tests/unit/gna/ngraph/transformations/gna_convert_padded2valid_conv.cpp b/inference-engine/tests/unit/gna/ngraph/transformations/gna_convert_padded2valid_conv.cpp
index 662ded11485..381847b7a1c 100644
--- a/inference-engine/tests/unit/gna/ngraph/transformations/gna_convert_padded2valid_conv.cpp
+++ b/inference-engine/tests/unit/gna/ngraph/transformations/gna_convert_padded2valid_conv.cpp
@@ -24,7 +24,7 @@ enum class modelType {
     TranspConvBcastAddActTransp,        /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Activation Function => Transpose(NCHW->NHWC) */
     TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPool => Activation Function => Transpose(NCHW->NHWC) */
     TranspConvTranspBcastAdd,           /* Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => Bias */
-    TranspConvTranspBcastAddAct,        /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) => Bias => Activation Function */
+    TranspConvTranspBcastAddAct         /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) => Bias => Activation Function */
 };
 
 typedef std::tuple<
@@ -39,12 +39,12 @@ typedef std::tuple<
     ngraph::Strides,        // Max Pool stride
     ngraph::Shape,          // Max Pool shape
     ngraph::op::PadType     // Padding type
-> padded2ValidParams;
+> padded2ValidConvParams;
 
 typedef std::tuple<
     bool,                   // With / without Fake Quantize layers
-    padded2ValidParams      // Test parameters
-> fqPadded2ValidParams;
+    padded2ValidConvParams      // Test parameters
+> fqPadded2ValidConvParams;
 
 struct ConvData {
     size_t input_height;
@@ -185,26 +185,26 @@ std::shared_ptr<ngraph::Function> get_initial_function(const bool& fq,
     const ngraph::Shape& maxpool_shape,
     const ngraph::op::PadType& pad_type,
     ConvData& conv_data) {
-    auto inputParams = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::i64, input_shape);
-    auto result = createFunction(fq, model, inputParams, filters_shape, conv_stride, pads_begin, pads_end, conv_dilation, bias_shape,
+    auto input_params = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::i64, input_shape);
+    auto result = createFunction(fq, model, input_params, filters_shape, conv_stride, pads_begin, pads_end, conv_dilation, bias_shape,
         maxpool_stride, maxpool_shape, pad_type, &conv_data);
-    return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{inputParams});
+    return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
 
 class ConvertPadded2ValidConvTestInvalidFixture : public CommonTestUtils::TestsCommon,
-    public ::testing::WithParamInterface<fqPadded2ValidParams> {
+    public ::testing::WithParamInterface<fqPadded2ValidConvParams> {
 public:
     void SetUp() override;
 public:
     std::shared_ptr<ngraph::Function> function, reference_function;
-    modelType model;
 };
 
 void ConvertPadded2ValidConvTestInvalidFixture::SetUp() {
     bool fq;
-    padded2ValidParams params;
+    padded2ValidConvParams params;
+    modelType model;
     ngraph::PartialShape input_shape;
     ngraph::Shape filters_shape, bias_shape, maxpool_shape;
     ngraph::Strides conv_stride, conv_dilation, maxpool_stride;
@@ -224,7 +224,7 @@ void ConvertPadded2ValidConvTestInvalidFixture::SetUp() {
 // ---------------------------------------------------------------------------------------------------------------------
 
 class ConvertPadded2ValidConvTestFixture: public CommonTestUtils::TestsCommon,
-    public ::testing::WithParamInterface<fqPadded2ValidParams> {
+    public ::testing::WithParamInterface<fqPadded2ValidConvParams> {
 public:
     void SetUp() override;
     std::shared_ptr<ngraph::Function> get_reference(const bool& fq,
@@ -242,12 +242,12 @@ public:
         const ConvData& conv_data);
 public:
     std::shared_ptr<ngraph::Function> function, reference_function;
-    modelType model;
 };
 
 void ConvertPadded2ValidConvTestFixture::SetUp() {
     bool fq;
-    padded2ValidParams params;
+    padded2ValidConvParams params;
+    modelType model;
     ngraph::PartialShape input_shape;
     ngraph::Shape filters_shape, bias_shape, maxpool_shape;
     ngraph::Strides conv_stride, conv_dilation, maxpool_stride;
@@ -367,7 +367,7 @@ std::shared_ptr<ngraph::Function> ConvertPadded2ValidConvTestFixture::get_refere
     const ngraph::Shape& maxpool_shape,
     const ngraph::op::PadType& pad_type,
     const ConvData& conv_data) {
-    auto inputParams = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::i64, input_shape);
+    auto input_params = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::i64, input_shape);
 
     // Add padding where neccessary
 
@@ -379,7 +379,7 @@ std::shared_ptr<ngraph::Function> ConvertPadded2ValidConvTestFixture::get_refere
     // ... row ...
     // padding
     // padding
-    auto padded_input_plane = CreatePaddedNet(inputParams, conv_data);
+    auto padded_input_plane = CreatePaddedNet(input_params, conv_data);
     std::shared_ptr<ngraph::opset7::Result> result;
 
     if (padded_input_plane) {
@@ -394,32 +394,19 @@ std::shared_ptr<ngraph::Function> ConvertPadded2ValidConvTestFixture::get_refere
             maxpool_stride, maxpool_shape, ngraph::op::PadType::EXPLICIT, nullptr);
     } else {
         // Valid padding
-        result = createFunction(fq, model, inputParams, filters_shape, conv_stride, pads_begin, pads_end, conv_dilation, bias_shape,
+        result = createFunction(fq, model, input_params, filters_shape, conv_stride, pads_begin, pads_end, conv_dilation, bias_shape,
             maxpool_stride, maxpool_shape, pad_type, nullptr);
     }
 
-    return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{inputParams});
+    return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
 
-void execute_test(const modelType& model, std::shared_ptr<ngraph::Function> function, std::shared_ptr<ngraph::Function> reference_function) {
+void execute_test(std::shared_ptr<ngraph::Function> function, std::shared_ptr<ngraph::Function> reference_function) {
     ngraph::pass::Manager manager;
     manager.register_pass<ngraph::pass::InitNodeInfo>();
-
-    switch (model) {
-    default:
-    case modelType::TranspConvTransp:
-    case modelType::TranspConvBcastAddTransp:
-    case modelType::TranspConvBcastAddMaxPoolTransp:
-    case modelType::TranspConvBcastAddActTransp:
-    case modelType::TranspConvBcastAddMaxPoolActTransp:
-    case modelType::TranspConvTranspBcastAdd:
-    case modelType::TranspConvTranspBcastAddAct:
-        manager.register_pass<GNAPluginNS::ConvertPadded2ValidConv>();
-        break;
-    }
-
+    manager.register_pass<GNAPluginNS::ConvertPadded2ValidConv>();
     manager.run_passes(function);
     const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES);
     const FunctionsComparator::Result result = func_comparator(function, reference_function);
@@ -427,7 +414,7 @@ void execute_test(const modelType& model, std::shared_ptr<ngraph::Function> func
 }
 
 TEST_P(ConvertPadded2ValidConvTestFixture, CompareFunctions) {
-    execute_test(model, function, reference_function);
+    execute_test(function, reference_function);
 }
 
 INSTANTIATE_TEST_SUITE_P(ConvertPadded2ValidConvTestSuite, ConvertPadded2ValidConvTestFixture,
@@ -458,7 +445,7 @@ INSTANTIATE_TEST_SUITE_P(ConvertPadded2ValidConvTestSuite, ConvertPadded2ValidCo
                 ngraph::Shape{1, 1, 1, 4}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}, ngraph::op::PadType::EXPLICIT))));
 
 TEST_P(ConvertPadded2ValidConvTestInvalidFixture, CompareFunctions) {
-    execute_test(model, function, reference_function);
+    execute_test(function, reference_function);
 }
 
 INSTANTIATE_TEST_SUITE_P(ConvertPadded2ValidConvInvalidTestSuite, ConvertPadded2ValidConvTestInvalidFixture,
diff --git a/inference-engine/tests/unit/gna/ngraph/transformations/gna_decompose_2d_conv.cpp b/inference-engine/tests/unit/gna/ngraph/transformations/gna_decompose_2d_conv.cpp
new file mode 100644
index 00000000000..c7e9323638e
--- /dev/null
+++ b/inference-engine/tests/unit/gna/ngraph/transformations/gna_decompose_2d_conv.cpp
@@ -0,0 +1,769 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <tuple>
+
+#include "transformations/decompose_2d_conv.hpp"
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset7.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+#include "backend/gna_limitations.hpp"
+
+namespace testing {
+
+namespace {
+
+enum class modelType {
+    TranspConvTransp = 0,               /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) */
+    TranspConvBcastAddTransp,           /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Transpose(NCHW->NHWC) */
+    TranspConvBcastAddMaxPoolTransp,    /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPooling => Transpose(NCHW->NHWC) (2D Max Pool case) */
+    TranspConvBcastAddActTransp,        /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Activation Function => Transpose(NCHW->NHWC) */
+    TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPool => Activation Function => Transpose(NCHW->NHWC) */
+    TranspConvTranspBcastAdd,           /* Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => Bias */
+    TranspConvTranspBcastAddAct         /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) => Bias => Activation Function */
+};
+
+typedef std::tuple<
+    modelType,              // Test model
+    ngraph::PartialShape,   // Input shape
+    ngraph::Shape,          // Convolution filter shape
+    ngraph::Strides,        // Convolution stride
+    ngraph::Strides,        // Convolution dilation
+    ngraph::Shape,          // Bias shape
+    ngraph::Strides,        // Max Pool stride
+    ngraph::Shape           // Max Pool shape
+> decompose2DConvParams;
+
+typedef std::tuple<
+    bool,                   // With / without Fake Quantize layers
+    decompose2DConvParams   // Test parameters
+> fqDecompose2DConvParams;
+
+struct GraphData {
+    std::shared_ptr<ngraph::Node> input_node;
+    std::shared_ptr<ngraph::opset7::FakeQuantize>fq_conv;
+    std::shared_ptr<ngraph::opset7::Convolution> conv;
+    std::shared_ptr<ngraph::opset7::Add> bias;
+    std::shared_ptr<ngraph::opset7::FakeQuantize>fq_bias;
+    std::shared_ptr<ngraph::opset7::MaxPool> max_pool;
+    std::shared_ptr<ngraph::op::util::UnaryElementwiseArithmetic> af;
+    std::shared_ptr<ngraph::opset7::FakeQuantize>fq_af;
+    std::shared_ptr<ngraph::Node> bias_const;
+    std::shared_ptr<ngraph::Node> last_op_in_sequence_for_replacement;
+    size_t conv_count;
+    size_t pool_size_width;
+    size_t pool_stride_width;
+};
+
+struct ConvParams {
+    size_t input_height;
+    size_t input_width;
+    size_t input_channel_count;
+    size_t output_channel_count;
+    size_t filter_height;
+    size_t filter_width;
+    size_t filter_count;
+    size_t filter_channel_count;
+    size_t filter_dilation_height;
+    size_t filter_dilation_width;
+    size_t filter_stride_height;
+    size_t filter_stride_width;
+    size_t output_height;
+    size_t output_width;
+};
+
+void GetConvParams(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvParams& conv_params) {
+    conv_params.output_height = conv->get_output_shape(0)[2];
+    conv_params.output_width = conv->get_output_shape(0)[3];
+    conv_params.input_channel_count = conv->input_value(0).get_shape()[1];
+    conv_params.input_height = conv->input_value(0).get_shape()[2];
+    conv_params.input_width = conv->input_value(0).get_shape()[3];
+    conv_params.filter_count = conv->input_value(1).get_shape()[0];
+    conv_params.filter_channel_count = conv->input_value(1).get_shape()[1];
+    conv_params.filter_height = conv->input_value(1).get_shape()[2];
+    conv_params.filter_width = conv->input_value(1).get_shape()[3];
+    conv_params.filter_dilation_height = conv->get_dilations()[0];
+    conv_params.filter_dilation_width = conv->get_dilations()[1];
+    conv_params.filter_stride_height = conv->get_strides()[0];
+    conv_params.filter_stride_width = conv->get_strides()[1];
+    conv_params.output_channel_count = conv_params.filter_count;
+}
+
+std::shared_ptr<ngraph::opset7::FakeQuantize> createFQ(std::shared_ptr<ngraph::Node>& in_node) {
+    auto input_low = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1});
+    auto input_high = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {5});
+    auto output_low = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0});
+    auto output_high = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {10});
+    return std::make_shared<ngraph::opset7::FakeQuantize>(in_node, input_low, input_high, output_low, output_high, 11);
+}
+
+std::shared_ptr<ngraph::Node> createBiasFQ(const ngraph::Output<ngraph::Node>& in_node,
+    std::shared_ptr<ngraph::opset7::Constant>& bias_const, std::shared_ptr<ngraph::opset7::Add>& bias, const bool& fq) {
+    std::shared_ptr<ngraph::Node> node;
+    bias = std::make_shared<ngraph::opset7::Add>(in_node, bias_const);
+    node = bias;
+
+    if (fq) {
+        node = createFQ(node);
+    }
+
+    return node;
+}
+
+std::shared_ptr<ngraph::opset7::Result> createFunction(const bool& fq,
+    const modelType& model,
+    const ngraph::Output<ngraph::Node>& input_node,
+    const ngraph::Shape& filters_shape,
+    const ngraph::Strides& conv_stride,
+    const ngraph::Strides& conv_dilation,
+    const ngraph::Shape& bias_shape,
+    const ngraph::Strides& maxpool_stride,
+    const ngraph::Shape& maxpool_shape,
+    GraphData* graph_data,
+    ConvParams* conv_params) {
+    auto transpose_in_order = std::make_shared<ngraph::opset7::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<int64_t>{0, 3, 1, 2});
+    auto transpose_in = std::make_shared<ngraph::opset7::Transpose>(input_node, transpose_in_order);
+    std::shared_ptr<ngraph::Node> fq_filters = std::make_shared<ngraph::opset7::Constant>(ngraph::element::i64,
+        ngraph::Shape{4, input_node.get_shape()[3], filters_shape[0], filters_shape[1]});
+
+    if (fq) {
+        fq_filters = createFQ(fq_filters);
+    }
+
+    auto conv = std::make_shared<ngraph::opset7::Convolution>(transpose_in, fq_filters, conv_stride,
+        ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0}, conv_dilation, ngraph::op::PadType::VALID);
+    if (conv_params)
+        GetConvParams(conv, *conv_params);
+
+    auto transpose_out_order = std::make_shared<ngraph::opset7::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<int64_t>{0, 2, 3, 1});
+    auto bias_const = std::make_shared<ngraph::opset7::Constant>(ngraph::element::i64, bias_shape);
+    std::shared_ptr<ngraph::opset7::Add> bias = nullptr;
+    std::shared_ptr<ngraph::Node> fq_bias = nullptr, fq_af = nullptr;
+    std::shared_ptr<ngraph::opset7::MaxPool> max_pool = nullptr;
+    std::shared_ptr<ngraph::Node> activation = nullptr;
+    std::shared_ptr<ngraph::Node> last_op = std::make_shared<ngraph::opset7::Transpose>(conv, transpose_out_order);
+
+    switch (model) {
+    case modelType::TranspConvBcastAddTransp:
+    {
+        fq_bias = createBiasFQ(conv, bias_const, bias, fq);
+        last_op = std::make_shared<ngraph::opset7::Transpose>(fq_bias, transpose_out_order);
+    }
+    break;
+
+    case modelType::TranspConvBcastAddMaxPoolTransp:
+    {
+        fq_bias = createBiasFQ(conv, bias_const, bias, fq);
+        max_pool = std::make_shared<ngraph::opset7::MaxPool>(fq_bias, maxpool_stride, ngraph::Shape{0, 0}, ngraph::Shape{0, 0}, maxpool_shape,
+            ngraph::op::RoundingType::FLOOR, ngraph::op::PadType::VALID);
+        auto transpose = std::make_shared<ngraph::opset7::Transpose>(max_pool, transpose_out_order);
+        last_op = std::make_shared<ngraph::opset7::Relu>(transpose);
+    }
+    break;
+
+    case modelType::TranspConvBcastAddActTransp:
+    {
+        fq_bias = createBiasFQ(conv, bias_const, bias, fq);
+        activation = std::make_shared<ngraph::opset7::Relu>(fq_bias);
+        last_op = std::make_shared<ngraph::opset7::Transpose>(activation, transpose_out_order);
+    }
+    break;
+
+    case modelType::TranspConvBcastAddMaxPoolActTransp:
+    {
+        fq_bias = createBiasFQ(conv, bias_const, bias, fq);
+        max_pool = std::make_shared<ngraph::opset7::MaxPool>(fq_bias, maxpool_stride, ngraph::Shape{0, 0}, ngraph::Shape{0, 0}, maxpool_shape,
+            ngraph::op::RoundingType::FLOOR, ngraph::op::PadType::VALID);
+        activation = std::make_shared<ngraph::opset7::Relu>(max_pool);
+        if (fq) {
+            fq_af = createFQ(activation);
+        }
+        last_op = std::make_shared<ngraph::opset7::Transpose>(fq_af ? fq_af : activation, transpose_out_order);
+    }
+    break;
+
+    case modelType::TranspConvTranspBcastAdd:
+    {
+        last_op = createBiasFQ(last_op, bias_const, bias, fq);
+    }
+    break;
+
+    case modelType::TranspConvTranspBcastAddAct:
+    {
+        fq_bias = createBiasFQ(last_op, bias_const, bias, fq);
+        last_op = std::make_shared<ngraph::opset7::Relu>(fq_bias);
+    }
+    break;
+
+    case modelType::TranspConvTransp:
+    default:
+        break;
+    }
+
+    if (graph_data) {
+        graph_data->fq_conv = fq ? std::dynamic_pointer_cast<ngraph::opset7::FakeQuantize>(fq_filters) : nullptr;
+        graph_data->conv = conv;
+        graph_data->bias = bias;
+        graph_data->fq_bias = fq ? std::dynamic_pointer_cast<ngraph::opset7::FakeQuantize>(fq_bias) : nullptr;
+        graph_data->af = std::dynamic_pointer_cast<ngraph::op::util::UnaryElementwiseArithmetic>(activation);
+        graph_data->fq_af = fq ? std::dynamic_pointer_cast<ngraph::opset7::FakeQuantize>(fq_af) : nullptr;
+        graph_data->max_pool = max_pool;
+        graph_data->last_op_in_sequence_for_replacement = last_op;
+        graph_data->bias_const = nullptr;
+        graph_data->conv_count = 0;
+
+        if (max_pool) {
+            graph_data->pool_size_width = max_pool->get_kernel()[1];
+            graph_data->pool_stride_width = max_pool->get_strides()[1];
+        }
+    }
+
+    return std::make_shared<ngraph::opset7::Result>(last_op);
+}
+
+std::shared_ptr<ngraph::Function> get_initial_function(const bool& fq,
+    const modelType& model,
+    const ngraph::PartialShape& input_shape,
+    const ngraph::Shape& filters_shape,
+    const ngraph::Strides& conv_stride,
+    const ngraph::Strides& conv_dilation,
+    const ngraph::Shape& bias_shape,
+    const ngraph::Strides& maxpool_stride,
+    const ngraph::Shape& maxpool_shape,
+    GraphData& graph_data,
+    ConvParams& conv_params) {
+    auto input_params = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::i64, input_shape);
+    auto result = createFunction(fq, model, input_params, filters_shape, conv_stride, conv_dilation, bias_shape,
+        maxpool_stride, maxpool_shape, &graph_data , &conv_params);
+    return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
+}
+
+// ---------------------------------------------------------------------------------------------------------------------
+
+class Decompose2DConvTestInvalidFixture : public CommonTestUtils::TestsCommon,
+    public ::testing::WithParamInterface<fqDecompose2DConvParams> {
+public:
+    void SetUp() override;
+public:
+    std::shared_ptr<ngraph::Function> function, reference_function;
+    modelType model;
+};
+
+void Decompose2DConvTestInvalidFixture::SetUp() {
+    bool fq;
+    decompose2DConvParams params;
+    ngraph::PartialShape input_shape;
+    ngraph::Shape filters_shape, bias_shape, maxpool_shape;
+    ngraph::Strides conv_stride, conv_dilation, maxpool_stride;
+    GraphData graph_data{};
+    ConvParams conv_params{};
+    std::tie(fq, params) = this->GetParam();
+    std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation,
+        bias_shape, maxpool_stride, maxpool_shape) = params;
+
+    function = get_initial_function(fq, model, input_shape, filters_shape, conv_stride, conv_dilation,
+        bias_shape, maxpool_stride, maxpool_shape, graph_data, conv_params);
+    reference_function = get_initial_function(fq, model, input_shape, filters_shape, conv_stride, conv_dilation,
+        bias_shape, maxpool_stride, maxpool_shape, graph_data, conv_params);
+}
+
+// ---------------------------------------------------------------------------------------------------------------------
+
+class Decompose2DConvTestFixture: public CommonTestUtils::TestsCommon,
+    public ::testing::WithParamInterface<fqDecompose2DConvParams> {
+public:
+    void SetUp() override;
+    std::shared_ptr<ngraph::Function> get_reference(const bool& fq,
+        const modelType& model,
+        const ngraph::PartialShape& input_shape,
+        GraphData& graph_data,
+        ConvParams& conv_params);
+public:
+    std::shared_ptr<ngraph::Function> function, reference_function;
+    modelType model;
+};
+
+void Decompose2DConvTestFixture::SetUp() {
+    bool fq;
+    decompose2DConvParams params;
+    ngraph::PartialShape input_shape;
+    ngraph::Shape filters_shape, bias_shape, maxpool_shape;
+    ngraph::Strides conv_stride, conv_dilation, maxpool_stride;
+    GraphData graph_data{};
+    ConvParams conv_params{};
+    std::tie(fq, params) = this->GetParam();
+    std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation,
+        bias_shape, maxpool_stride, maxpool_shape) = params;
+
+    function = get_initial_function(fq, model, input_shape, filters_shape, conv_stride, conv_dilation,
+        bias_shape, maxpool_stride, maxpool_shape, graph_data, conv_params);
+    reference_function = get_reference(fq, model, input_shape, graph_data, conv_params);
+}
+
+std::shared_ptr<ngraph::Node> ReshapeBiasConst(std::shared_ptr<ngraph::opset7::Add> conv_bias, const ConvParams& conv_params) {
+    auto add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(conv_bias->input_value(1).get_node_shared_ptr());
+
+    IE_ASSERT(add_const);
+
+    auto bias_size = shape_size(add_const->get_shape());
+    return ngraph::op::util::make_try_fold<ngraph::opset7::Reshape>(add_const,
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{1, bias_size, 1, 1}), false);
+}
+
+std::shared_ptr<ngraph::opset7::StridedSlice> FlatCrop(ngraph::Output<ngraph::Node> input, size_t offset, size_t size) {
+    auto shape = input.get_shape();
+    return std::make_shared<ngraph::opset7::StridedSlice>(
+        input,                                                                                                  // data
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset}),          // begin slice index
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset + size}),   // end slice index
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)1, (size_t)1}),       // strides
+        std::vector<int64_t>{1, 0},                                                                             // begin mask
+        std::vector<int64_t>{1, 0});                                                                            // end mask
+}
+
+static std::vector<std::shared_ptr<ngraph::Node>> Split2DConvFilters(std::shared_ptr<ngraph::opset7::Constant>& filters,
+    const bool& vertical_permute, const bool& horizontal_permute, const size_t& split_channels) {
+
+    if (!horizontal_permute && !vertical_permute && split_channels == 1)
+        return {filters};
+
+    std::vector <std::shared_ptr<ngraph::Node>> result;
+    ngraph::Shape reshape_shape;
+    auto flat_filters = filters->outputs();
+    const auto filter_shape = filters->get_output_shape(0);
+    IE_ASSERT(filter_shape.size() == 4);
+
+    if (split_channels > 1) {
+        const auto axis_node = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
+        const auto split = std::make_shared<ngraph::opset7::Split>(filters, axis_node, split_channels);
+        flat_filters = split->outputs();
+    }
+
+    if (horizontal_permute) {
+        for (size_t split_index = 0; split_index < split_channels; split_index++) {
+            ngraph::Output<ngraph::Node>& flat_filter = flat_filters[split_index];
+            result.push_back(std::make_shared<ngraph::opset7::Transpose>(flat_filter,
+                ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{0, 1, 3, 2})));
+        }
+    }
+
+    if (vertical_permute && horizontal_permute) {
+        reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[2] * filter_shape[3] / split_channels, 1, 1};
+    } else if (vertical_permute && !horizontal_permute) {
+        reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[2] / split_channels, 1, filter_shape[3]};
+    } else if (!vertical_permute && horizontal_permute) {
+        reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] * filter_shape[3] / split_channels, filter_shape[2], 1};
+    } else {
+        reshape_shape = ngraph::Shape{filter_shape[0], filter_shape[1] / split_channels, filter_shape[2], filter_shape[3]};
+    }
+
+    for (auto& new_filter : result)
+        new_filter = ngraph::op::util::make_try_fold<ngraph::opset7::Reshape>(new_filter,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, reshape_shape), false);
+
+    return result;
+}
+
+ngraph::OutputVector SplitInput(const GraphData& graph_data, ConvParams& conv_params) {
+    // We need to have proper input shape first
+    ngraph::OutputVector split_planes;
+    auto padded_input_plane = std::make_shared<ngraph::opset7::Reshape>(graph_data.input_node,
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2},
+            ngraph::Shape{1, shape_size(graph_data.input_node->get_shape())}), false);
+
+    if (graph_data.conv_count > 1) {
+        // If we split input plane and filters due to GNA limitations - we must sum their results at the end
+        conv_params.input_channel_count /= graph_data.conv_count;
+
+        auto reshape_before_transpose = std::make_shared<ngraph::opset7::Reshape>(padded_input_plane,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2},
+                {shape_size(padded_input_plane->get_shape()) / graph_data.conv_count, graph_data.conv_count}), false);
+
+        auto transpose_before_channel_wise_split = std::make_shared<ngraph::opset7::Transpose>(reshape_before_transpose,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0));
+
+        const auto axis_node = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0});
+        const auto split = std::make_shared<ngraph::opset7::Split>(transpose_before_channel_wise_split, axis_node, graph_data.conv_count);
+        split_planes = split->outputs();
+    } else {
+        split_planes.push_back(padded_input_plane);
+    }
+
+    return split_planes;
+}
+
+std::vector<std::shared_ptr<ngraph::Node>> SplitFilters(const GraphData& graph_data, ConvParams& conv_params) {
+    // If the input plane exceeds GNA limits and we have split into several convolutions, then we need to split filter data as well;
+    // we also need to take filter height and potential dilation into account when modifying the filters
+
+    // Take account of fake quantize when getting filter values
+    auto filter_values = std::dynamic_pointer_cast<ngraph::opset7::Constant>(graph_data.fq_conv == nullptr ?
+        graph_data.conv->input_value(1).get_node_shared_ptr() : graph_data.fq_conv->input_value(0).get_node_shared_ptr());
+    bool vertical_permute = (conv_params.filter_height > 1);
+    bool horizontal_permute = (conv_params.filter_dilation_width > 1);
+    std::vector<std::shared_ptr<ngraph::Node>> h_1_filters{};
+
+    h_1_filters = Split2DConvFilters(filter_values, vertical_permute, horizontal_permute, graph_data.conv_count);
+
+    return h_1_filters;
+}
+
+void TransformInput(const GraphData& graph_data, const ConvParams& conv_params, ngraph::Output<ngraph::Node>& split_input_plane) {
+    /*
+    *              Padded row - NHWC order
+    *                  |
+    *        Split in vertical dim (filter height)
+    *                / | \
+    *                Concat
+    *                  |
+    *              Transpose
+    */
+
+    // First we need to prepare flat (height = 1) slices of input data proper for flattened (height = 1) filters created later on;
+    // the input datat is overlapping (duplicated)
+    ngraph::OutputVector dilated_input_planes;
+    for (size_t filter_height = 0; filter_height < conv_params.filter_height; filter_height++) {
+        size_t offset;
+
+        if (conv_params.filter_stride_height > 1) {
+            // Prepare strided slices of input data
+            for (size_t output_height = 0; output_height < conv_params.output_height; output_height++) {
+                offset = (filter_height * conv_params.filter_dilation_height + output_height * conv_params.filter_stride_height) *
+                    conv_params.input_width * conv_params.input_channel_count;
+                auto slice = FlatCrop(split_input_plane, offset, conv_params.input_width * conv_params.input_channel_count);
+                dilated_input_planes.push_back(slice);
+            }
+        } else {
+            offset = filter_height * conv_params.filter_dilation_height * conv_params.input_width * conv_params.input_channel_count;
+            auto slice = FlatCrop(split_input_plane, offset, conv_params.input_width * conv_params.input_channel_count * conv_params.output_height);
+            dilated_input_planes.push_back(slice);
+        }
+    }
+
+    // Interleaving dilated input planes
+    std::shared_ptr<ngraph::Node> dilated_chunks_concat = std::make_shared<ngraph::opset7::Concat>(dilated_input_planes, 0);
+
+    // Additional reshape is required for strided slices of input intended for each filter row
+    if (conv_params.filter_stride_height > 1) {
+        dilated_chunks_concat = std::make_shared<ngraph::opset7::Reshape>(dilated_chunks_concat,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2},
+                {conv_params.filter_height, conv_params.input_width * conv_params.input_channel_count * conv_params.output_height}), false);
+    }
+
+    auto transposed_dilated_chunks = std::make_shared<ngraph::opset7::Transpose>(dilated_chunks_concat,
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0));
+
+    // Flattening of interleaved input planes
+    auto flattened_dilated_transposed_input = std::make_shared<ngraph::opset7::Reshape>(transposed_dilated_chunks,
+        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2},
+            {(size_t)1, conv_params.input_width * conv_params.input_channel_count * conv_params.output_height * conv_params.filter_height}), false);
+
+    split_input_plane = flattened_dilated_transposed_input;
+}
+
+static void InsertFQLayer(const std::shared_ptr<ngraph::opset7::FakeQuantize> fqLayer,
+    std::shared_ptr<ngraph::Node> lastNode) {
+    if (fqLayer != nullptr) {
+        lastNode = fqLayer->clone_with_new_inputs({lastNode,
+            fqLayer->input_value(1), fqLayer->input_value(2),
+            fqLayer->input_value(3), fqLayer->input_value(4)});
+    }
+}
+
+std::shared_ptr<ngraph::Node> Create1DConv(const GraphData& graph_data, const ConvParams& conv_params, const ngraph::Output<ngraph::Node>& input,
+    std::shared_ptr<ngraph::Node> filters, const size_t conv_index, const size_t h_index) {
+        // Transpose NHWC => NCHW
+        std::shared_ptr<ngraph::Node> nchw_input = std::make_shared<ngraph::opset7::Transpose>(input,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 1, 2})->output(0));
+
+        // Fake quantize
+        InsertFQLayer(graph_data.fq_conv, filters);
+
+        // 1D Convolution
+        auto conv = std::make_shared<ngraph::opset7::Convolution>(nchw_input, filters,
+            ngraph::Strides{1, conv_params.filter_stride_width}, ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0},
+            ngraph::Strides{1, 1}, ngraph::op::PadType::VALID);
+        std::string conv_name = graph_data.conv->get_friendly_name() + "_H_" + std::to_string(h_index) + "_CH_" + std::to_string(0);
+        conv->set_friendly_name(conv_name);
+
+        // Bias
+        std::shared_ptr<ngraph::Node> last_conv_block_op = conv;
+        if (graph_data.bias_const && conv_index == 0) {
+            last_conv_block_op = std::make_shared<ngraph::opset7::Add>(conv, graph_data.bias_const);
+            InsertFQLayer(graph_data.fq_bias, last_conv_block_op);
+        }
+
+        // Max pooling
+        if (graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) {
+            last_conv_block_op = std::make_shared<ngraph::opset7::MaxPool>(last_conv_block_op, ngraph::Strides{1, graph_data.pool_stride_width},
+                ngraph::Shape{0, 0}, ngraph::Shape{0, 0}, ngraph::Shape{1, graph_data.pool_size_width}, graph_data.max_pool->get_rounding_type(),
+                ngraph::op::PadType::VALID);
+        }
+        // Activation function
+        if (graph_data.af && graph_data.conv_count == 1) {
+            last_conv_block_op = graph_data.af->copy_with_new_inputs({last_conv_block_op});
+            InsertFQLayer(graph_data.fq_af, last_conv_block_op);
+        }
+
+        // Transpose NCHW => NHWC
+        auto nhwc_output = std::make_shared<ngraph::opset7::Transpose>(last_conv_block_op,
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 2, 3, 1})->output(0));
+        return nhwc_output;
+}
+
+std::shared_ptr<ngraph::Node> CreateDeomposedConv(const GraphData& graph_data, ConvParams& conv_params,
+    ngraph::Output<ngraph::Node>& reduced_input_plane, const std::vector<std::shared_ptr<ngraph::Node>>& h_1_filters, const size_t conv_index) {
+    ngraph::OutputVector result_chunks;
+    std::shared_ptr<ngraph::Node> last_op;
+    bool horizontal_permute = (conv_params.filter_dilation_width > 1);
+    size_t h_1_filter_channel_count = (conv_params.input_channel_count * conv_params.filter_height);
+
+    for (size_t output_height = 0; output_height < conv_params.output_height; output_height++) {
+        size_t offset = output_height * conv_params.input_width * h_1_filter_channel_count;
+        auto row = (conv_params.output_height == 1) ? reduced_input_plane :
+            FlatCrop(reduced_input_plane, offset, conv_params.input_width * h_1_filter_channel_count);
+        /*
+            *              Padded row
+            *                  |
+            *        ??? <Dilation !=1> ???
+            *                  |
+            *         Split in vertical dim
+            *                / | \
+            *                Concat
+            *                  |
+            *               Permute
+            *                  |
+            *              Transpose (NHWC => NCHW)
+            *                  |
+            *                1D Conv (Bias | MaxPooling)
+            *                  |
+            *              Transpose (NCHW => NHWC)
+            */
+        auto nhwc_conv_y_input = row;
+
+        if (horizontal_permute) {
+            // Horizontal split - transform input accordingly
+            ngraph::OutputVector dilated_chunks;
+            std::shared_ptr<ngraph::Node> dilated_chunks_concat = nhwc_conv_y_input.get_node_shared_ptr();
+
+            // We need to calculate some parameters in case horizontal stride > 1 is used, because if we use the ones available from the original convolution
+            // we won't take into account the fact horizontal strides will be supported by the newly created 1D convolution, and not by decomposition
+            size_t filter_dilation_width = conv_params.filter_width > 1 ? conv_params.filter_dilation_width : 1;
+            size_t output_width = (conv_params.input_width - (conv_params.filter_width + filter_dilation_width - 2));
+
+            if (conv_params.filter_width > 1) {
+                for (size_t filter_width = 0; filter_width < conv_params.filter_width; filter_width++) {
+                    size_t offset = filter_width * conv_params.filter_dilation_width * h_1_filter_channel_count;
+                    auto slice = FlatCrop(row, offset, h_1_filter_channel_count * output_width);
+                    dilated_chunks.push_back(slice);
+                }
+
+                dilated_chunks_concat = std::make_shared<ngraph::opset7::Concat>(dilated_chunks, 0);
+            }
+
+            auto transposed_dilated_chunks = std::make_shared<ngraph::opset7::Transpose>(dilated_chunks_concat,
+                ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {1, 0})->output(0));
+
+            auto flattened_dilated_conv_input = std::make_shared<ngraph::opset7::Reshape>(transposed_dilated_chunks,
+                ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4},
+                    ngraph::Shape{1, 1, output_width, h_1_filter_channel_count * conv_params.filter_width}), false);
+
+            nhwc_conv_y_input = flattened_dilated_conv_input;
+        } else {
+            // If no horizontal split is done, only reshape is required before decomposed convolution
+            nhwc_conv_y_input = std::make_shared<ngraph::opset7::Reshape>(nhwc_conv_y_input,
+                ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4},
+                    ngraph::Shape{1, 1, conv_params.input_width, h_1_filter_channel_count}), false);
+        }
+
+        // Pointwise convolutions
+        // Valid 1D convolution wrapped with transposes NHWC => NCHW => Conv => NCHW => NHWC
+        // Activation function can be fused with convolution only if it isn't split
+        auto nhwc_y_output = Create1DConv(graph_data, conv_params, nhwc_conv_y_input, h_1_filters[conv_index], conv_index, output_height);
+        result_chunks.push_back(nhwc_y_output);
+        last_op = nhwc_y_output;
+    }
+
+    // Horizontal dimemsion greater than 1
+    if (result_chunks.size() > 1) {
+        // Concat in horizontal dimension
+        // In NHWC index of H is 1
+        auto concatenated_sub_results = std::make_shared<ngraph::opset7::Concat>(result_chunks, 1);
+        last_op = concatenated_sub_results;
+    }
+    return last_op;
+}
+
+static size_t CalculateConvCount(const ConvParams& conv_params) {
+    // Check if split of plane due to GNA HW limitations of 768 filter elements is possible
+    size_t conv_count = 1;
+    size_t total_factorized_conv_channel_count = (conv_params.input_channel_count * conv_params.filter_height * conv_params.filter_width);
+    while (total_factorized_conv_channel_count / conv_count > GNAPluginNS::GNALimitations::convFilterMaxSize ||
+        total_factorized_conv_channel_count % conv_count != 0 || conv_params.filter_channel_count % conv_count != 0)
+        conv_count++;
+
+    return conv_count++;
+}
+
+static bool ShouldDecompose(GraphData& graph_data, const ConvParams& conv_params) {
+    // Calculate the number of splits required
+    graph_data.conv_count = CalculateConvCount(conv_params);
+
+    // Concat (copy) layer limitation allows to split up to a certain limit
+    // Currently we are able to split only convolutions without pooling in horizontal dimension
+    if (graph_data.conv_count > GNAPluginNS::GNALimitations::copyMaxGrouping ||
+        ((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
+        return false;
+
+    // GNA supported features or handled otherwise - there is no need to decompose such convolution
+    if (graph_data.conv_count == 1 && (((conv_params.input_height == 1 || conv_params.input_width == 1) &&
+        conv_params.filter_dilation_width == 1 && conv_params.filter_dilation_height == 1) ||
+        (conv_params.input_height > 1 && conv_params.input_width > 1 &&
+            conv_params.input_width == conv_params.filter_width && conv_params.filter_stride_width == 1)))
+        return false;
+
+    return true;
+}
+
+std::shared_ptr<ngraph::opset7::Result> Decompose(const GraphData& graph_data, ConvParams& conv_params) {
+    std::vector<std::shared_ptr<ngraph::Node>> partial_conv_results;
+
+    // Split input and filters due to GNA filter element count limit
+    auto split_planes = SplitInput(graph_data, conv_params);
+    auto h_1_filters = SplitFilters(graph_data, conv_params);
+
+    // Do transformations in each of the splits created above
+    for (size_t conv_index = 0; conv_index < graph_data.conv_count; conv_index++) {
+        ngraph::Output<ngraph::Node>& split_input_plane = split_planes[conv_index];
+
+        // Input data needs to be prepared before 2D convolution decomposition
+        if (conv_params.filter_height > 1 || conv_params.filter_stride_height > 1) {
+            TransformInput(graph_data, conv_params, split_input_plane);
+        }
+
+        auto flat_conv = CreateDeomposedConv(graph_data, conv_params, split_input_plane, h_1_filters, conv_index);
+        partial_conv_results.push_back(flat_conv);
+    }
+
+    std::shared_ptr<ngraph::Node> conv_result = partial_conv_results.front();
+    for (size_t i = 1; i < partial_conv_results.size(); i++) {
+        auto add_result = std::make_shared<ngraph::opset7::Add>(partial_conv_results[i], conv_result);
+        conv_result = add_result;
+    }
+
+    // Activation function after trailing Transpose NCHW->NHWC
+    if (graph_data.af && graph_data.conv_count > 1) {
+        auto af_result = graph_data.af->copy_with_new_inputs({conv_result});
+        conv_result = af_result;
+    }
+    // We need to put the same name as before for the Convolution layer, so its output can be used as network result
+    std::string conv_result_name = graph_data.last_op_in_sequence_for_replacement->get_friendly_name();
+    replace_node(graph_data.last_op_in_sequence_for_replacement, conv_result);
+    conv_result->set_friendly_name(conv_result_name);
+
+    return std::make_shared<ngraph::opset7::Result>(conv_result);
+}
+
+std::shared_ptr<ngraph::Function> Decompose2DConvTestFixture::get_reference(const bool& fq,
+    const modelType& model,
+    const ngraph::PartialShape& input_shape,
+    GraphData& graph_data,
+    ConvParams& conv_params) {
+    auto input_params = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::i64, input_shape);
+    graph_data.input_node = input_params;
+
+    ShouldDecompose(graph_data, conv_params);
+
+    if (model != modelType::TranspConvTransp) {
+        graph_data.bias_const = ReshapeBiasConst(std::dynamic_pointer_cast<ngraph::opset7::Add>(graph_data.bias), conv_params);
+    }
+
+    // Create decomposed reference function
+    std::shared_ptr<ngraph::opset7::Result> result;
+    result = Decompose(graph_data, conv_params);
+    return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
+}
+
+// ---------------------------------------------------------------------------------------------------------------------
+
+void execute_test(modelType model, std::shared_ptr<ngraph::Function> function, std::shared_ptr<ngraph::Function> reference_function) {
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::InitNodeInfo>();
+
+    switch (model) {
+    default:
+    case modelType::TranspConvTransp:
+    case modelType::TranspConvBcastAddTransp:
+    case modelType::TranspConvBcastAddMaxPoolTransp:
+    case modelType::TranspConvBcastAddActTransp:
+    case modelType::TranspConvBcastAddMaxPoolActTransp:
+        manager.register_pass<GNAPluginNS::Decompose2DConv>();
+    case modelType::TranspConvTranspBcastAdd:
+        manager.register_pass<GNAPluginNS::Decompose2DConvTransposedWithBias>();
+    case modelType::TranspConvTranspBcastAddAct:
+        manager.register_pass<GNAPluginNS::Decompose2DConvTransposedWithBiasAF>();
+    }
+
+    manager.run_passes(function);
+    const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES);
+    const FunctionsComparator::Result result = func_comparator(function, reference_function);
+    ASSERT_TRUE(result.valid);
+}
+
+TEST_P(Decompose2DConvTestFixture, CompareFunctions) {
+    execute_test(model, function, reference_function);
+}
+
+INSTANTIATE_TEST_SUITE_P(Decompose2DConvTestSuite, Decompose2DConvTestFixture,
+    ::testing::Combine(
+        // With / without Fake Quantize layers
+        ::testing::Values(true, false),
+        ::testing::Values(
+            std::make_tuple(modelType::TranspConvTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
+            std::make_tuple(modelType::TranspConvBcastAddTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
+            std::make_tuple(modelType::TranspConvBcastAddMaxPoolTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
+            std::make_tuple(modelType::TranspConvBcastAddActTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
+            std::make_tuple(modelType::TranspConvBcastAddMaxPoolActTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
+            std::make_tuple(modelType::TranspConvTranspBcastAdd, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 1, 1, 4}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
+            std::make_tuple(modelType::TranspConvTranspBcastAddAct, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 1, 1, 4}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}))));
+
+TEST_P(Decompose2DConvTestInvalidFixture, CompareFunctions) {
+    execute_test(model, function, reference_function);
+}
+
+INSTANTIATE_TEST_SUITE_P(Decompose2DConvInvalidTestSuite, Decompose2DConvTestInvalidFixture,
+    ::testing::Combine(
+        // With / without Fake Quantize layers
+        ::testing::Values(true, false),
+        ::testing::Values(
+            std::make_tuple(modelType::TranspConvTransp, ngraph::PartialShape{1, 1, 4, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}),
+            std::make_tuple(modelType::TranspConvBcastAddTransp, ngraph::PartialShape{2, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}),
+            std::make_tuple(modelType::TranspConvBcastAddMaxPoolTransp, ngraph::PartialShape{1, 16, 16, 128}, ngraph::Shape{5, 5}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{2, 2}),
+            std::make_tuple(modelType::TranspConvBcastAddActTransp, ngraph::PartialShape{2, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}),
+            std::make_tuple(modelType::TranspConvBcastAddMaxPoolActTransp, ngraph::PartialShape{1, 16, 16, 128}, ngraph::Shape{4, 4}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{2, 2}, ngraph::Shape{1, 2}),
+            std::make_tuple(modelType::TranspConvTranspBcastAdd, ngraph::PartialShape{2, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 1, 1, 4}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}),
+            std::make_tuple(modelType::TranspConvTranspBcastAddAct, ngraph::PartialShape{2, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 1, 1, 4}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}))));
+
+} // namespace
+
+} // namespace testing
diff --git a/inference-engine/tests/unit/inference_engine/transformations/low_precision/reshape_test.cpp b/inference-engine/tests/unit/inference_engine/transformations/low_precision/reshape_test.cpp
index 403f73ae14c..8377d12d029 100644
--- a/inference-engine/tests/unit/inference_engine/transformations/low_precision/reshape_test.cpp
+++ b/inference-engine/tests/unit/inference_engine/transformations/low_precision/reshape_test.cpp
@@ -152,7 +152,6 @@ TEST(LPT_ReshapeTransformation, canBeTransformed_4D_to_2D_perSpacial_TRUE) {
         ngraph::Shape({ 1, 9216 })));
 }
 
-// TODO: story 38439
 TEST(LPT_ReshapeTransformation, canBeTransformed_5D_to_5D_perBatch) {
     ASSERT_FALSE(ngraph::pass::low_precision::ReshapeTransformation::canBeTransformed(
         ngraph::Shape({ 1, 16, 1, 1, 1 }),
diff --git a/inference-engine/tests_deprecated/behavior/vpu/CMakeLists.txt b/inference-engine/tests_deprecated/behavior/vpu/CMakeLists.txt
index 848a17c4367..a2956a887dd 100644
--- a/inference-engine/tests_deprecated/behavior/vpu/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/behavior/vpu/CMakeLists.txt
@@ -57,7 +57,7 @@ function(enable_vpu TARGET_NAME FLAG_NAME PLUGIN_NAME)
         target_include_directories(${TARGET_NAME} PRIVATE
                 $<TARGET_PROPERTY:vpu_graph_transformer,INTERFACE_INCLUDE_DIRECTORIES>
                 ${CMAKE_CURRENT_SOURCE_DIR}/myriad_tests
-                ${IE_MAIN_SOURCE_DIR}/src/inference_engine
+                ${IE_MAIN_SOURCE_DIR}/src/inference_engine/src
                 ${XLINK_INCLUDE}
                 ${XLINK_PLATFORM_INCLUDE})
         target_link_libraries(${TARGET_NAME} PRIVATE mvnc)
diff --git a/inference-engine/thirdparty/CMakeLists.txt b/inference-engine/thirdparty/CMakeLists.txt
index edbf6e6b4ea..7a3ee97b789 100644
--- a/inference-engine/thirdparty/CMakeLists.txt
+++ b/inference-engine/thirdparty/CMakeLists.txt
@@ -41,12 +41,14 @@ function(ie_add_mkldnn)
         set(OpenMP_cmake_included ON) ## to skip "omp simd" inside a code. Lead to some crashes inside NDK LLVM..
     endif()
 
+    if(SUGGEST_OVERRIDE_SUPPORTED)
+        # xbyak compilation fails
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override")
+    endif()
     if(CMAKE_COMPILER_IS_GNUCXX)
         ie_add_compiler_flags(-Wno-undef)
-        if(SUGGEST_OVERRIDE_SUPPORTED)
-            # xbyak compilation fails
-            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-suggest-override")
-        endif()
+    elseif(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-disable=10121")
     endif()
 
     add_subdirectory(mkl-dnn EXCLUDE_FROM_ALL)
diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/graph/build_options.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/graph/build_options.hpp
new file mode 100644
index 00000000000..5ddcac49534
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/graph/build_options.hpp
@@ -0,0 +1,463 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "cldnn/runtime/engine.hpp"
+#include "cldnn/primitives/implementation_desc.hpp"
+
+#include "topology.hpp"
+
+#include <memory>
+#include <vector>
+#include <string>
+#include <map>
+#include <utility>
+
+namespace cldnn {
+
+/// @addtogroup cpp_api C++ API
+/// @{
+
+/// @defgroup cpp_program Program compilation
+/// @{
+
+/// @brief Represents user-provided program build option type.
+enum class build_option_type {
+    /// @brief Allow primitives fusing during program build (default: false).
+    fusing,
+
+    /// @brief Enable implicit reordering for user inputs (default: false).
+    optimize_data,
+
+    /// @brief Enable implicit static input reordering for user inputs (default: false).
+    allow_static_input_reorder,
+
+    /// @brief Enable debug mode (default: false).
+    /// @details This option enforce all program primitives to be accessible as outputs.
+    debug,
+
+    /// @brief User selected list of program outputs.
+    outputs,
+
+    /// @brief User defined learning parameters.
+    learning_config,
+
+    /// @brief Tuning config (default: Tuning is disabled).
+    /// @details The tuner will automatically find the optimal kernel/config for each node in the graph,
+    /// by running multiple implementations and configurations per node and storing the optimal one in cache.
+    /// Expect long execution time in the first run.
+    /// After the first run a cache with the tuning results will be created in the path provided.
+    /// This cache will be used in the next runs.
+    tuning_config,
+
+    /// @brief Specifies a directory to which stages of network compilation should be dumped. (default: empty, i.e. no dumping)
+    graph_dumps_dir,
+    /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching)
+    kernels_cache_dir,
+    /// @brief Name for serialization process
+    serialize_network,
+    load_program,
+    force_implementations
+};
+
+/// @brief Tuning mode.
+enum class tuning_mode {
+    /// @brief Tuning is disabled.
+    tuning_disabled,
+
+    /// @brief Tuning using the cached data (no on-line tuning for non-existing data).
+    tuning_use_cache,
+
+    /// @brief Tuning using the cached data if exist, tune and update cache otherwise.
+    tuning_tune_and_cache,
+
+    /// @brief Tuning using the cached data and update tasks.
+    /// @details Performs updating tasks like removal of invalid caches, promoting to new format, etc.
+    /// No tuning for non-existing data.
+    tuning_use_and_update,
+
+    /// @brief Retune the cache data even if it exists.
+    tuning_retune_and_cache
+};
+
+/// @brief Tuning configuration.
+struct tuning_config_options {
+    tuning_mode mode;
+    std::string cache_file_path;
+
+    tuning_config_options() : mode(tuning_mode::tuning_disabled), cache_file_path("") {}
+};
+
+/// @brief Learning parameters.
+struct learning_params {
+    float momentum = 0.0;
+    float weights_decay = 0.0;
+
+    learning_params() : momentum(0.9f), weights_decay(0.0005f) {}
+};
+
+/// @brief Represents user-provided program build option.
+struct build_option {
+    /// @brief Allow primitives fusing during program build (default: false).
+    static std::shared_ptr<const build_option> fusing(bool enable = false);
+
+    /// @brief Enable implicit reordering for user inputs (default: false).
+    static std::shared_ptr<const build_option> optimize_data(bool enable = false);
+
+    /// @brief Enable implicit reordering for static user inputs (default: false).
+    static std::shared_ptr<const build_option> allow_static_input_reorder(bool enable = false);
+
+    /// @brief Enable debug mode (default: false).
+    /// @details This option enforce all program primitives to be accessible as outputs.
+    static std::shared_ptr<const build_option> debug(bool enable = false);
+
+    /// @brief User selected list of program outputs.
+    static std::shared_ptr<const build_option> outputs(const std::vector<primitive_id>& outs);
+
+    /// @brief Tuning configuration (default: false).
+    /// @details This option will automatically find the optimal kernel/config for each node in the graph,
+    /// by running multiple implementations and configurations per node and storing the optimal one in cache.
+    /// Expect long execution time in the first run (unless the cache only mode is enabled).
+    /// After the first run a cache with the tuning results will be created in the path provided.
+    /// This cache will be used in the next runs.
+    static std::shared_ptr<const build_option> tuning_config(
+        const tuning_config_options& config = tuning_config_options());
+
+    /// @brief Specifies a directory to which stages of network compilation should be dumped (default: empty, i.e. no dumping)
+    static std::shared_ptr<const build_option> graph_dumps_dir(const std::string& dir_path);
+
+    /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching)
+    static std::shared_ptr<const build_option> kernels_cache_dir(const std::string& dir_path);
+
+    /// @brief Specifies a name for serialization process.
+    static std::shared_ptr<const build_option> serialize_network(const std::string& network_name);
+    /// @brief Specifies a name of load_program process.
+    static std::shared_ptr<const build_option> load_program(const std::string& network_name);
+
+    /// @brief User defined learning parameters.
+    static std::shared_ptr<const build_option> learning_config(const learning_params& params = learning_params());
+    /// @brief Specifies user defined implementation details to use.
+    static std::shared_ptr<const build_option> force_implementations(implementation_forcing_map forcing);
+
+    virtual ~build_option() = default;
+
+private:
+    /// @brief Returns option type represented by this object.
+    virtual build_option_type get_type() const = 0;
+
+    friend class build_options;
+};
+
+/// @brief @ref build_option specialization for boolean options.
+template <build_option_type OptType>
+struct build_option_bool : build_option {
+    /// @brief Constructs option.
+    /// @param value Is option enabled.
+    explicit build_option_bool(bool value) : _value(value ? 1 : 0) {}
+
+    /// @brief Is option enabled.
+    bool enabled() const { return _value != 0; }
+
+private:
+    build_option_type get_type() const override { return OptType; }
+    uintptr_t _value;
+};
+
+/// @brief @ref build_option specialization for program outputs list.
+struct build_option_outputs : build_option {
+    /// @brief The list of output ids (names)
+    const std::vector<primitive_id> outputs;
+
+    /// @brief Constructs option.
+    /// @param outs List of ouput ids (names)
+    explicit build_option_outputs(const std::vector<primitive_id>& outs)
+        : outputs(outs) {}
+
+private:
+    /// @brief Returns build_option_type::outputs.
+    build_option_type get_type() const override { return build_option_type::outputs; }
+
+    build_option_outputs(const build_option_outputs& other) = delete;
+    build_option_outputs& operator=(const build_option_outputs& other) = delete;
+};
+
+/// @brief @ref build_option specialization for learning config.
+struct build_option_learning_config : build_option {
+    /// @brief Learning parameters.
+    const learning_params params;
+
+    /// @brief Constructs learning config build option.
+    /// @param learning_params Parameters for learning.
+    explicit build_option_learning_config(const learning_params& params)
+        : params(params) {}
+
+private:
+    /// @brief Returns build_option_type::learning_config.
+    build_option_type get_type() const override { return build_option_type::learning_config; }
+
+    build_option_learning_config(const build_option_learning_config& other) = delete;
+    build_option_learning_config& operator=(const build_option_learning_config& other) = delete;
+};
+
+/// @brief @ref build_option specialization for tuning config.
+struct build_option_tuning_config : build_option {
+    /// @brief Tuning configuration
+    const tuning_config_options config;
+
+    /// @brief Constructs tuning config build option.
+    /// @param tuning_config Configuration for the tuning.
+    explicit build_option_tuning_config(const tuning_config_options& tuning_config)
+        : config(tuning_config) {}
+
+private:
+    /// @brief Returns build_option_type::tuning_config.
+    build_option_type get_type() const override { return build_option_type::tuning_config; }
+
+    build_option_tuning_config(const build_option_tuning_config& other) = delete;
+    build_option_tuning_config& operator=(const build_option_tuning_config& other) = delete;
+};
+
+/// @brief @ref build_option specialization for selecting a directory.
+template <build_option_type OptType>
+struct build_option_directory : build_option {
+    const std::string directory_path;
+
+    /// @brief Constructs option.
+    /// @param outs List of ouput ids (names)
+    explicit build_option_directory(const std::string& dir_path) : directory_path(dir_path) {}
+
+private:
+    /// @brief Returns build_option_type::graph_dumps_dir.
+    build_option_type get_type() const override { return build_option_type::graph_dumps_dir; }
+
+    build_option_directory(const build_option_directory& other) = delete;
+    build_option_directory& operator=(const build_option_directory& other) = delete;
+};
+
+/// @brief @ref build_option specialization for selecting a directory.
+template <build_option_type OptType>
+struct build_option_kernels_cache_dir : build_option {
+    const std::string directory_path;
+
+    explicit build_option_kernels_cache_dir(const std::string& dir_path) : directory_path(dir_path) {}
+
+private:
+    /// @brief Returns build_option_type::kernels_cache_dir.
+    build_option_type get_type() const override { return build_option_type::kernels_cache_dir; }
+
+    build_option_kernels_cache_dir(const build_option_kernels_cache_dir& other) = delete;
+    build_option_kernels_cache_dir& operator=(const build_option_kernels_cache_dir& other) = delete;
+};
+
+/// @brief @ref build_option specialization for serialization process.
+template <build_option_type OptType>
+struct build_option_serialization : build_option {
+    const std::string serialization_network_name;
+
+    explicit build_option_serialization(const std::string& name) : serialization_network_name(name) {}
+
+private:
+    build_option_type get_type() const override { return build_option_type::serialize_network; }
+
+    build_option_serialization(const build_option_serialization& other) = delete;
+    build_option_serialization& operator=(const build_option_serialization& other) = delete;
+};
+
+/// @brief @ref build_option specialization for load_program process.
+template <build_option_type OptType>
+struct build_option_load_program : build_option {
+    const std::string load_program_name;
+
+    explicit build_option_load_program(const std::string& name) : load_program_name(name) {}
+
+private:
+    build_option_type get_type() const override { return build_option_type::load_program; }
+
+    build_option_load_program(const build_option_load_program& other) = delete;
+    build_option_load_program& operator=(const build_option_load_program& other) = delete;
+};
+
+struct build_option_force_implementations : build_option {
+    implementation_forcing_map forcing;
+
+    explicit build_option_force_implementations(implementation_forcing_map _forcing) : forcing(std::move(_forcing)) {}
+private:
+    build_option_type get_type() const override { return build_option_type::force_implementations; }
+
+    build_option_force_implementations(const build_option_force_implementations& other) = delete;
+    build_option_force_implementations& operator=(const build_option_force_implementations& other) = delete;
+};
+
+namespace detail {
+/// @brief Helper template to convert @ref build_option_type value to particular @ref build_option class.
+template <build_option_type OptType>
+struct build_option_traits {
+    /// @brief @ref build_option object type which represents the particular @p OptType.
+    typedef build_option object_type;
+    /// @brief Make default @ref build_option corresponding @p OptType
+    static std::shared_ptr<const build_option> make_default();
+};
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+template <>
+struct build_option_traits<build_option_type::fusing> {
+    typedef build_option_bool<build_option_type::fusing> object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::fusing(); }
+};
+template <>
+struct build_option_traits<build_option_type::optimize_data> {
+    typedef build_option_bool<build_option_type::optimize_data> object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::optimize_data(); }
+};
+template <>
+struct build_option_traits<build_option_type::allow_static_input_reorder> {
+    typedef build_option_bool<build_option_type::allow_static_input_reorder> object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::allow_static_input_reorder(); }
+};
+template <>
+struct build_option_traits<build_option_type::debug> {
+    typedef build_option_bool<build_option_type::debug> object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::debug(); }
+};
+template <>
+struct build_option_traits<build_option_type::outputs> {
+    typedef build_option_outputs object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::outputs({}); }
+};
+template <>
+struct build_option_traits<build_option_type::learning_config> {
+    typedef build_option_learning_config object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::learning_config(); }
+};
+template <>
+struct build_option_traits<build_option_type::tuning_config> {
+    typedef build_option_tuning_config object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::tuning_config(); }
+};
+template <>
+struct build_option_traits<build_option_type::graph_dumps_dir> {
+    typedef build_option_directory<build_option_type::graph_dumps_dir> object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::graph_dumps_dir({}); }
+};
+template <>
+struct build_option_traits<build_option_type::kernels_cache_dir> {
+    typedef build_option_directory<build_option_type::kernels_cache_dir> object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::kernels_cache_dir({}); }
+};
+template <>
+struct build_option_traits<build_option_type::serialize_network> {
+    typedef build_option_serialization<build_option_type::serialize_network> object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::serialize_network({}); }
+};
+template <>
+struct build_option_traits<build_option_type::load_program> {
+    typedef build_option_load_program<build_option_type::load_program> object_type;
+    static std::shared_ptr<const build_option> make_default() { return build_option::load_program({}); }
+};
+template <>
+struct build_option_traits<build_option_type::force_implementations> {
+    using object_type = build_option_force_implementations;
+    static std::shared_ptr<const build_option> make_default() { return build_option::force_implementations({}); }
+};
+
+#endif
+}  // namespace detail
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+inline std::shared_ptr<const build_option> build_option::fusing(bool enable) {
+    return std::make_shared<build_option_bool<build_option_type::fusing>>(enable);
+}
+
+inline std::shared_ptr<const build_option> build_option::optimize_data(bool enable) {
+    return std::make_shared<build_option_bool<build_option_type::optimize_data>>(enable);
+}
+
+inline std::shared_ptr<const build_option> build_option::allow_static_input_reorder(bool enable) {
+    return std::make_shared<build_option_bool<build_option_type::allow_static_input_reorder>>(enable);
+}
+
+inline std::shared_ptr<const build_option> build_option::debug(bool enable) {
+    return std::make_shared<build_option_bool<build_option_type::debug>>(enable);
+}
+
+inline std::shared_ptr<const build_option> build_option::outputs(const std::vector<primitive_id>& outs) {
+    return std::make_shared<build_option_outputs>(outs);
+}
+
+inline std::shared_ptr<const build_option> build_option::learning_config(const learning_params& params) {
+    return std::make_shared<build_option_learning_config>(params);
+}
+
+inline std::shared_ptr<const build_option> build_option::tuning_config(const tuning_config_options& config) {
+    return std::make_shared<build_option_tuning_config>(config);
+}
+
+inline std::shared_ptr<const build_option> build_option::graph_dumps_dir(const std::string& dir_path) {
+    return std::make_shared<build_option_directory<build_option_type::graph_dumps_dir>>(dir_path);
+}
+
+inline std::shared_ptr<const build_option> build_option::kernels_cache_dir(const std::string& dir_path) {
+    return std::make_shared<build_option_directory<build_option_type::kernels_cache_dir>>(dir_path);
+}
+inline std::shared_ptr<const build_option> build_option::serialize_network(const std::string& name) {
+    return std::make_shared<build_option_serialization<build_option_type::serialize_network>>(name);
+}
+inline std::shared_ptr<const build_option> build_option::load_program(const std::string& name) {
+    return std::make_shared<build_option_load_program<build_option_type::load_program>>(name);
+}
+inline std::shared_ptr<const build_option> build_option::force_implementations(implementation_forcing_map forcing) {
+    return std::make_shared<build_option_force_implementations>(std::move(forcing));
+}
+#endif
+
+/// @brief Represents program build options list.
+class build_options {
+public:
+    /// @brief Adds or replace option to the options list
+    void set_option(std::shared_ptr<const build_option> opt) { add_or_replace_option(opt); }
+
+    /// @brief Adds or replace options to the options list
+    template <typename... Args>
+    void set_option(std::shared_ptr<const build_option> opt, Args... args) {
+        add_or_replace_option(opt);
+        set_option(args...);
+    }
+
+    /// @brief Constructs build options list from its arguments.
+    template <typename... Args>
+    explicit build_options(Args... args) {
+        set_option(args...);
+    }
+
+    /// @brief Returns program build option for @p OptType
+    template <build_option_type OptType>
+    std::shared_ptr<const typename detail::build_option_traits<OptType>::object_type> get() const {
+        using T = typename detail::build_option_traits<OptType>::object_type;
+        for (auto& option : _options) {
+            if (option->get_type() == OptType)
+                return std::static_pointer_cast<const T>(option);
+        }
+        return std::static_pointer_cast<const T>(detail::build_option_traits<OptType>::make_default());
+    }
+
+private:
+    friend struct program;
+    std::vector<std::shared_ptr<const build_option>> _options;
+    void set_option(void) {}
+
+    void add_or_replace_option(std::shared_ptr<const build_option> opt) {
+        for (auto& p : _options) {
+            if (p->get_type() == opt->get_type()) {
+                p = opt;
+                return;
+            }
+        }
+        _options.push_back(opt);
+    }
+};
+
+/// @}
+/// @}
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/graph/network.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/graph/network.hpp
index 61134cacc2f..f46a5dd1042 100644
--- a/inference-engine/thirdparty/clDNN/api/cldnn/graph/network.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/graph/network.hpp
@@ -2,31 +2,26 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
 #pragma once
 
+#include "cldnn/graph/topology.hpp"
+#include "cldnn/graph/program.hpp"
 #include "cldnn/runtime/compounds.hpp"
 #include "cldnn/runtime/memory.hpp"
+#include "cldnn/runtime/engine.hpp"
 #include "cldnn/runtime/event.hpp"
 #include "cldnn/runtime/stream.hpp"
-#include "program.hpp"
 
-#include <cstdint>
-#include <algorithm>
 #include <map>
 #include <vector>
-#include <utility>
+#include <unordered_map>
 #include <string>
+#include <memory>
+#include <list>
+#include <set>
 
 namespace cldnn {
 
-/// @addtogroup cpp_api C++ API
-/// @{
-
-/// @defgroup cpp_network Network Execution
-/// @{
-
 /// @brief Represents network output returned by @ref network::get_output().
 struct network_output {
     /// @brief Returns @ref event associated with the output.
@@ -52,103 +47,78 @@ private:
     friend struct network;
 };
 
-struct network_impl;
+class primitive_inst;
 
-/// @brief Executable network allocated from @ref program.
 struct network {
-    /// @brief Allocate network
-    /// @param program The program object which contains compiled primitives this network should allocate memory for.
-    /// @param stream_id Stream ID of this network. 0 is for primary stream, the others are secondary.
-    /// Used to determine whether an extra copy of primitive's memory needed.
-    explicit network(program const& program, uint16_t stream_id);
-
-    /// @brief Constructs network object from implicitly created program object. This is a shorthand for network(program(engine, topology, options))
-    /// @param engine
-    /// @param topology
-    /// @param options
-    /// @param options
+public:
+    using ptr = std::shared_ptr<network>;
+    explicit network(program::ptr program, stream::ptr stream, bool is_internal = false, bool is_primary_stream = true);
     network(engine& engine,
-            const topology& topology,
+            const topology& topo,
             const build_options& options = build_options(),
-            uint16_t stream_id = 0)
-        : network(program(engine, topology, options), stream_id) {}
+            bool is_internal = false);
+    network(engine& engine,
+            const std::set<std::shared_ptr<program_node>>& nodes,
+            const build_options& options,
+            bool is_internal);
 
-    /// @brief Constructs network object from C API @ref cldnn_network.
-    explicit network(std::shared_ptr<network_impl> impl) : _impl(impl) {
-        if (_impl == nullptr)
-            throw std::invalid_argument("implementation pointer should not be null");
+    network(program::ptr program,
+            uint16_t stream_id = 0);
+
+    ~network();
+
+
+    static ptr build_network(engine& engine,
+                             const topology& topology,
+                             const build_options& options = build_options(),
+                             bool is_internal = false);
+    static ptr build_network(engine& engine,
+                             const std::set<std::shared_ptr<program_node>>& nodes,
+                             const build_options& options,
+                             bool is_internal);
+
+    static ptr allocate_network(stream::ptr stream,
+                                program::ptr program,
+                                bool is_internal = false,
+                                bool is_primary_stream = false);
+
+    static ptr allocate_network(engine& engine,
+                                program::ptr program,
+                                bool is_internal = false,
+                                bool is_primary_stream = false);
+    program::cptr get_program() const { return _program; }
+    program::ptr get_program() { return _program; }
+    engine& get_engine() const { return _program->get_engine(); }
+
+    void reset_execution(bool wait = true);
+    void set_input_data(const primitive_id& id, memory::ptr data);
+    void set_output_memory(const primitive_id& id, memory::ptr mem);
+
+    std::vector<std::shared_ptr<primitive_inst>> const& get_outputs() { return _outputs; }
+
+    const std::vector<std::shared_ptr<const primitive_inst>>& get_outputs() const {
+        return reinterpret_cast<const std::vector<std::shared_ptr<const primitive_inst>>&>(_outputs);
     }
 
-    /// @brief Copy construction.
-    network(const network& other) : _impl(other._impl) { }
-
-    /// @brief Copy assignment.
-    network& operator=(const network& other) {
-        if (_impl == other._impl)
-            return *this;
-        _impl = other._impl;
-        return *this;
-    }
-
-    friend bool operator==(const network& lhs, const network& rhs) { return lhs._impl == rhs._impl; }
-    friend bool operator!=(const network& lhs, const network& rhs) { return !(lhs == rhs); }
-
-    /// @brief Returns @ref engine by which network was built.
-    engine& get_engine() const;
-
-    /// @brief Returns network internal @ref program.
-    program get_program() const;
-
-    /// @brief Provides @ref memory for @ref input_layout primitives defined by user in source @ref topology.
-    void set_input_data(const primitive_id& id, memory::ptr mem) const;
-
-    /// @brief Provides user-supplied @ref memory for output primitives defined by user in source @ref topology.
-    void set_output_memory(const primitive_id& id, memory::ptr mem) const;
-
-    /// @brief Return stream id.
-    uint16_t get_stream_id();
-
-    stream& get_stream() const;
-
-    stream::ptr get_stream_ptr() const;
-
-    /// @brief Return internal network id.
-    uint32_t get_id();
-
-    std::string get_primitive_info(const primitive_id& id) const;
-
-    /// @brief Returns description of final runtime graph
-    std::vector<primitive_info> get_primitives_info();
-
-    /// @brief Returns description of all optimization stages
-    std::vector<std::pair<std::string, std::vector<primitive_info>>> get_optimization_steps_info();
-
-    /// @brief Returns the list of executed primitives.
-    std::vector<primitive_id> get_executed_primitive_ids() const;
-
-    /// @brief Returns the list of all primitives ids in network.
-    std::vector<primitive_id> get_all_primitive_ids() const;
-
-    /// @brief Returns the list of all primitives ids in network before graph optimization.
-    std::vector<primitive_id> get_all_primitive_org_ids() const;
-
-    /// @brief Returns the list of network inputs.
-    std::vector<primitive_id> get_input_ids() const;
-
-    /// @brief Returns the list of available network outputs.
-    std::vector<primitive_id> get_output_ids() const;
-
-    /// @brief Returns @ref memory object for particular @p output. Can be called before network execution
-    memory::ptr get_output_memory(const primitive_id& output_id) const;
-
-    /// @brief Returns @ref event object for particular @p primitive. Can't be called before network execution
-    event::ptr get_primitive_event(const primitive_id& output_id) const;
-
-    /// @brief Returns @ref network_output object for particular @p output. Can't be called before network execution
-    network_output get_output(const primitive_id& output_id) const {
+    network_output get_output(const primitive_id& output_id) {
         return network_output(get_primitive_event(output_id), get_output_memory(output_id), get_stream_ptr());
     }
 
+    memory::ptr get_output_memory(const primitive_id& output_id);
+
+    /// @brief Returns the list of primitive ids before and after graph optimization.
+    /// @details If primitive was not optimized, the old and actual id will be the same.
+    /// @n If primitive was optimized during graph optimization, the actual id will be "_optimized_".
+    std::map<primitive_id, primitive_id> get_all_primitives() const {
+        auto primitive_ids = get_all_primitive_ids();
+        auto primitive_org_ids = get_all_primitive_org_ids();
+        std::map<primitive_id, primitive_id> result;
+        for (decltype(primitive_org_ids.size()) i = 0; i < primitive_org_ids.size(); i++) {
+            result.emplace(primitive_org_ids[i], primitive_ids[i]);
+        }
+        return result;
+    }
+
     /// @brief Returns the list of @ref event for the primitives that were executed in network.
     std::map<primitive_id, event::ptr> get_executed_primitives() const {
         auto primitive_ids = get_executed_primitive_ids();
@@ -168,32 +138,75 @@ struct network {
         return result;
     }
 
-    /// @brief Returns the list of primitive ids before and after graph optimization.
-    /// @details If primitive was not optimized, the old and actual id will be the same.
-    /// @n If primitive was optimized during graph optimization, the actual id will be "_optimized_".
-    std::map<primitive_id, primitive_id> get_all_primitives() const {
-        auto primitive_ids = get_all_primitive_ids();
-        auto primitive_org_ids = get_all_primitive_org_ids();
-        std::map<primitive_id, primitive_id> result;
-        for (decltype(primitive_org_ids.size()) i = 0; i < primitive_org_ids.size(); i++) {
-            result.emplace(primitive_org_ids[i], primitive_ids[i]);
-        }
-        return result;
-    }
+    std::vector<primitive_id> get_output_ids() const;
+    std::vector<primitive_id> get_input_ids() const;
+    std::vector<primitive_id> get_executed_primitive_ids() const;
+    std::vector<primitive_id> get_all_primitive_ids() const;
+    std::vector<primitive_id> get_all_primitive_org_ids() const;
+    const program::primitives_info& get_primitives_info() const;
+    const program::graph_optimizer_info& get_optimizer_passes_info() const;
+    void execute_impl(const std::vector<event::ptr>& events);
 
     /// @brief Executes network and returns the list of @ref network_output.
     /// @param dependencies List of @ref event objects to be waited before network execution.
     /// @note User should call set_input_data() for every @ref input_layout defined in source @ref topology
     /// before network execution.
-    std::map<primitive_id, network_output> execute(const std::vector<event::ptr>& dependencies = {}) const;
+    std::map<primitive_id, network_output> execute(const std::vector<event::ptr>& dependencies = {});
 
-    /// @brief Returns wrapped C API @ref cldnn_network handler.
-    network_impl* get() const { return _impl.get(); }
+    void validate_primitives();
+    void set_arguments();
+    // Implementation specific calls
+    std::shared_ptr<primitive_inst> get_primitive(const primitive_id& id);
+    std::string get_primitive_info(const primitive_id& id) const;
+    const event::ptr& get_primitive_event(const primitive_id& id) const { return _events.at(id); }
+    bool has_event(const primitive_id& id) const { return _events.count(id); }
+    std::vector<std::shared_ptr<primitive_inst>> get_primitives(const std::vector<primitive_id>& ids);
+    std::vector<std::shared_ptr<primitive_inst>> get_primitives(const std::vector<program_node*>& nodes);
+    void execute_primitive(const std::shared_ptr<primitive_inst>& primitive,
+                           const std::vector<event::ptr>& events);
+    void allocate_primitives();
+    void build_insts_deps();
+    uint32_t get_id() const { return net_id; }
+    stream& get_stream() const { return *_stream; }
+    stream::ptr get_stream_ptr() const { return _stream; }
+    bool is_internal() const { return _internal; }
+    bool is_primary_stream() { return _is_primary_stream; }
+
+    /// Create memory object with specified @p layout and allocation @p type for primitive with @p id
+    /// Underlying memory handle can be reused with other primitives from memory pool based on @p dependencies
+    memory_ptr get_memory_from_pool(const layout& layout,
+                                    primitive_id id,
+                                    std::set<primitive_id> dependencies,
+                                    allocation_type type,
+                                    bool reusable = true);
 
 private:
-    std::shared_ptr<network_impl> _impl;
+    using output_chains_map = std::map<primitive_id, std::vector<std::shared_ptr<primitive_inst>>>;
+    uint32_t net_id = 0;
+    program::ptr _program;
+    stream::ptr _stream;
+    std::unique_ptr<memory_pool> _memory_pool;
+    bool _internal;
+    bool _is_primary_stream;
+    bool _reset_arguments;
+
+    std::map<primitive_id, std::shared_ptr<primitive_inst>> _primitives;
+    std::vector<std::shared_ptr<primitive_inst>> _inputs;
+    std::vector<std::shared_ptr<primitive_inst>> _outputs;
+    std::list<std::shared_ptr<primitive_inst>> _exec_order;
+    std::list<std::shared_ptr<primitive_inst>> _data_outputs;
+
+    std::unordered_map<primitive_id, event::ptr> _events;
+    output_chains_map _output_chains;
+
+    void build_exec_order();
+    void allocate_primitive_instance(program_node const& node);
+    void transfer_memory_to_device(std::shared_ptr<primitive_inst> instance, program_node const& node);
+    void add_to_exec_order(const primitive_id& id);
+    std::shared_ptr<primitive_inst> find_in_internal_networks(const primitive_id& id);
+    std::shared_ptr<primitive_inst> find_primitive(const primitive_id& id);
+    void check_names();
+    void add_default_output_chains();
+    output_chains_map::iterator add_output_chain(std::shared_ptr<primitive_inst>& p_inst);
 };
-CLDNN_API_CLASS(network)
-/// @}
-/// @}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp
index 5f1a7cc4925..ee98838c740 100644
--- a/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp
@@ -5,501 +5,323 @@
 #pragma once
 
 #include "cldnn/runtime/engine.hpp"
-#include "cldnn/primitives/implementation_desc.hpp"
+#include "cldnn/runtime/stream.hpp"
+#include "build_options.hpp"
 
-#include "topology.hpp"
-
-#include <memory>
-#include <vector>
+#include <list>
 #include <string>
+#include <vector>
+#include <memory>
 #include <map>
 #include <utility>
+#include <set>
+
+namespace kernel_selector {
+class TuningCache;
+}  // namespace kernel_selector
 
 namespace cldnn {
 
-/// @addtogroup cpp_api C++ API
-/// @{
+struct topology;
+struct program_node;
+class layout_optimizer;
+class pass_manager;
+class base_pass;
+class program_wrapper;
+class kernels_cache;
 
-/// @defgroup cpp_program Program compilation
-/// @{
 
-/// @brief Represents user-provided program build option type.
-enum class build_option_type {
-    /// @brief Allow primitives fusing during program build (default: false).
-    fusing,
-
-    /// @brief Enable implicit reordering for user inputs (default: false).
-    optimize_data,
-
-    /// @brief Enable implicit static input reordering for user inputs (default: false).
-    allow_static_input_reorder,
-
-    /// @brief Enable debug mode (default: false).
-    /// @details This option enforce all program primitives to be accessible as outputs.
-    debug,
-
-    /// @brief User selected list of program outputs.
-    outputs,
-
-    /// @brief User defined learning parameters.
-    learning_config,
-
-    /// @brief Tuning config (default: Tuning is disabled).
-    /// @details The tuner will automatically find the optimal kernel/config for each node in the graph,
-    /// by running multiple implementations and configurations per node and storing the optimal one in cache.
-    /// Expect long execution time in the first run.
-    /// After the first run a cache with the tuning results will be created in the path provided.
-    /// This cache will be used in the next runs.
-    tuning_config,
-
-    /// @brief Specifies a directory to which stages of network compilation should be dumped. (default: empty, i.e. no dumping)
-    graph_dumps_dir,
-    /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching)
-    kernels_cache_dir,
-    /// @brief Name for serialization process
-    serialize_network,
-    load_program,
-    force_implementations
-};
-
-/// @brief Tuning mode.
-enum class tuning_mode {
-    /// @brief Tuning is disabled.
-    tuning_disabled,
-
-    /// @brief Tuning using the cached data (no on-line tuning for non-existing data).
-    tuning_use_cache,
-
-    /// @brief Tuning using the cached data if exist, tune and update cache otherwise.
-    tuning_tune_and_cache,
-
-    /// @brief Tuning using the cached data and update tasks.
-    /// @details Performs updating tasks like removal of invalid caches, promoting to new format, etc.
-    /// No tuning for non-existing data.
-    tuning_use_and_update,
-
-    /// @brief Retune the cache data even if it exists.
-    tuning_retune_and_cache
-};
-
-/// @brief Tuning configuration.
-struct tuning_config_options {
-    tuning_mode mode;
-    std::string cache_file_path;
-
-    tuning_config_options() : mode(tuning_mode::tuning_disabled), cache_file_path("") {}
-};
-
-/// @brief Learning parameters.
-struct learning_params {
-    float momentum = 0.0;
-    float weights_decay = 0.0;
-
-    learning_params() : momentum(0.9f), weights_decay(0.0005f) {}
-};
-
-/// @brief Represents user-provided program build option.
-struct build_option {
-    /// @brief Allow primitives fusing during program build (default: false).
-    static std::shared_ptr<const build_option> fusing(bool enable = false);
-
-    /// @brief Enable implicit reordering for user inputs (default: false).
-    static std::shared_ptr<const build_option> optimize_data(bool enable = false);
-
-    /// @brief Enable implicit reordering for static user inputs (default: false).
-    static std::shared_ptr<const build_option> allow_static_input_reorder(bool enable = false);
-
-    /// @brief Enable debug mode (default: false).
-    /// @details This option enforce all program primitives to be accessible as outputs.
-    static std::shared_ptr<const build_option> debug(bool enable = false);
-
-    /// @brief User selected list of program outputs.
-    static std::shared_ptr<const build_option> outputs(const std::vector<primitive_id>& outs);
-
-    /// @brief Tuning configuration (default: false).
-    /// @details This option will automatically find the optimal kernel/config for each node in the graph,
-    /// by running multiple implementations and configurations per node and storing the optimal one in cache.
-    /// Expect long execution time in the first run (unless the cache only mode is enabled).
-    /// After the first run a cache with the tuning results will be created in the path provided.
-    /// This cache will be used in the next runs.
-    static std::shared_ptr<const build_option> tuning_config(
-        const tuning_config_options& config = tuning_config_options());
-
-    /// @brief Specifies a directory to which stages of network compilation should be dumped (default: empty, i.e. no dumping)
-    static std::shared_ptr<const build_option> graph_dumps_dir(const std::string& dir_path);
-
-    /// @brief Specifies a directory to which compiled kernels should be cached or can be loaded from. (default: empty, i.e. no caching)
-    static std::shared_ptr<const build_option> kernels_cache_dir(const std::string& dir_path);
-
-    /// @brief Specifies a name for serialization process.
-    static std::shared_ptr<const build_option> serialize_network(const std::string& network_name);
-    /// @brief Specifies a name of load_program process.
-    static std::shared_ptr<const build_option> load_program(const std::string& network_name);
-
-    /// @brief User defined learning parameters.
-    static std::shared_ptr<const build_option> learning_config(const learning_params& params = learning_params());
-    /// @brief Specifies user defined implementation details to use.
-    static std::shared_ptr<const build_option> force_implementations(implementation_forcing_map forcing);
-
-    virtual ~build_option() = default;
-
-private:
-    /// @brief Returns option type represented by this object.
-    virtual build_option_type get_type() const = 0;
-
-    friend class build_options;
-};
-
-/// @brief @ref build_option specialization for boolean options.
-template <build_option_type OptType>
-struct build_option_bool : build_option {
-    /// @brief Constructs option.
-    /// @param value Is option enabled.
-    explicit build_option_bool(bool value) : _value(value ? 1 : 0) {}
-
-    /// @brief Is option enabled.
-    bool enabled() const { return _value != 0; }
-
-private:
-    build_option_type get_type() const override { return OptType; }
-    uintptr_t _value;
-};
-
-/// @brief @ref build_option specialization for program outputs list.
-struct build_option_outputs : build_option {
-    /// @brief The list of output ids (names)
-    const std::vector<primitive_id> outputs;
-
-    /// @brief Constructs option.
-    /// @param outs List of ouput ids (names)
-    explicit build_option_outputs(const std::vector<primitive_id>& outs)
-        : outputs(outs) {}
-
-private:
-    /// @brief Returns build_option_type::outputs.
-    build_option_type get_type() const override { return build_option_type::outputs; }
-
-    build_option_outputs(const build_option_outputs& other) = delete;
-    build_option_outputs& operator=(const build_option_outputs& other) = delete;
-};
-
-/// @brief @ref build_option specialization for learning config.
-struct build_option_learning_config : build_option {
-    /// @brief Learning parameters.
-    const learning_params params;
-
-    /// @brief Constructs learning config build option.
-    /// @param learning_params Parameters for learning.
-    explicit build_option_learning_config(const learning_params& params)
-        : params(params) {}
-
-private:
-    /// @brief Returns build_option_type::learning_config.
-    build_option_type get_type() const override { return build_option_type::learning_config; }
-
-    build_option_learning_config(const build_option_learning_config& other) = delete;
-    build_option_learning_config& operator=(const build_option_learning_config& other) = delete;
-};
-
-/// @brief @ref build_option specialization for tuning config.
-struct build_option_tuning_config : build_option {
-    /// @brief Tuning configuration
-    const tuning_config_options config;
-
-    /// @brief Constructs tuning config build option.
-    /// @param tuning_config Configuration for the tuning.
-    explicit build_option_tuning_config(const tuning_config_options& tuning_config)
-        : config(tuning_config) {}
-
-private:
-    /// @brief Returns build_option_type::tuning_config.
-    build_option_type get_type() const override { return build_option_type::tuning_config; }
-
-    build_option_tuning_config(const build_option_tuning_config& other) = delete;
-    build_option_tuning_config& operator=(const build_option_tuning_config& other) = delete;
-};
-
-/// @brief @ref build_option specialization for selecting a directory.
-template <build_option_type OptType>
-struct build_option_directory : build_option {
-    const std::string directory_path;
-
-    /// @brief Constructs option.
-    /// @param outs List of ouput ids (names)
-    explicit build_option_directory(const std::string& dir_path) : directory_path(dir_path) {}
-
-private:
-    /// @brief Returns build_option_type::graph_dumps_dir.
-    build_option_type get_type() const override { return build_option_type::graph_dumps_dir; }
-
-    build_option_directory(const build_option_directory& other) = delete;
-    build_option_directory& operator=(const build_option_directory& other) = delete;
-};
-
-/// @brief @ref build_option specialization for selecting a directory.
-template <build_option_type OptType>
-struct build_option_kernels_cache_dir : build_option {
-    const std::string directory_path;
-
-    explicit build_option_kernels_cache_dir(const std::string& dir_path) : directory_path(dir_path) {}
-
-private:
-    /// @brief Returns build_option_type::kernels_cache_dir.
-    build_option_type get_type() const override { return build_option_type::kernels_cache_dir; }
-
-    build_option_kernels_cache_dir(const build_option_kernels_cache_dir& other) = delete;
-    build_option_kernels_cache_dir& operator=(const build_option_kernels_cache_dir& other) = delete;
-};
-
-/// @brief @ref build_option specialization for serialization process.
-template <build_option_type OptType>
-struct build_option_serialization : build_option {
-    const std::string serialization_network_name;
-
-    explicit build_option_serialization(const std::string& name) : serialization_network_name(name) {}
-
-private:
-    build_option_type get_type() const override { return build_option_type::serialize_network; }
-
-    build_option_serialization(const build_option_serialization& other) = delete;
-    build_option_serialization& operator=(const build_option_serialization& other) = delete;
-};
-
-/// @brief @ref build_option specialization for load_program process.
-template <build_option_type OptType>
-struct build_option_load_program : build_option {
-    const std::string load_program_name;
-
-    explicit build_option_load_program(const std::string& name) : load_program_name(name) {}
-
-private:
-    build_option_type get_type() const override { return build_option_type::load_program; }
-
-    build_option_load_program(const build_option_load_program& other) = delete;
-    build_option_load_program& operator=(const build_option_load_program& other) = delete;
-};
-
-struct build_option_force_implementations : build_option {
-    implementation_forcing_map forcing;
-
-    explicit build_option_force_implementations(implementation_forcing_map _forcing) : forcing(std::move(_forcing)) {}
-private:
-    build_option_type get_type() const override { return build_option_type::force_implementations; }
-
-    build_option_force_implementations(const build_option_force_implementations& other) = delete;
-    build_option_force_implementations& operator=(const build_option_force_implementations& other) = delete;
-};
-
-namespace detail {
-/// @brief Helper template to convert @ref build_option_type value to particular @ref build_option class.
-template <build_option_type OptType>
-struct build_option_traits {
-    /// @brief @ref build_option object type which represents the particular @p OptType.
-    typedef build_option object_type;
-    /// @brief Make default @ref build_option corresponding @p OptType
-    static std::shared_ptr<const build_option> make_default();
-};
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-template <>
-struct build_option_traits<build_option_type::fusing> {
-    typedef build_option_bool<build_option_type::fusing> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::fusing(); }
-};
-template <>
-struct build_option_traits<build_option_type::optimize_data> {
-    typedef build_option_bool<build_option_type::optimize_data> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::optimize_data(); }
-};
-template <>
-struct build_option_traits<build_option_type::allow_static_input_reorder> {
-    typedef build_option_bool<build_option_type::allow_static_input_reorder> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::allow_static_input_reorder(); }
-};
-template <>
-struct build_option_traits<build_option_type::debug> {
-    typedef build_option_bool<build_option_type::debug> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::debug(); }
-};
-template <>
-struct build_option_traits<build_option_type::outputs> {
-    typedef build_option_outputs object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::outputs({}); }
-};
-template <>
-struct build_option_traits<build_option_type::learning_config> {
-    typedef build_option_learning_config object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::learning_config(); }
-};
-template <>
-struct build_option_traits<build_option_type::tuning_config> {
-    typedef build_option_tuning_config object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::tuning_config(); }
-};
-template <>
-struct build_option_traits<build_option_type::graph_dumps_dir> {
-    typedef build_option_directory<build_option_type::graph_dumps_dir> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::graph_dumps_dir({}); }
-};
-template <>
-struct build_option_traits<build_option_type::kernels_cache_dir> {
-    typedef build_option_directory<build_option_type::kernels_cache_dir> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::kernels_cache_dir({}); }
-};
-template <>
-struct build_option_traits<build_option_type::serialize_network> {
-    typedef build_option_serialization<build_option_type::serialize_network> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::serialize_network({}); }
-};
-template <>
-struct build_option_traits<build_option_type::load_program> {
-    typedef build_option_load_program<build_option_type::load_program> object_type;
-    static std::shared_ptr<const build_option> make_default() { return build_option::load_program({}); }
-};
-template <>
-struct build_option_traits<build_option_type::force_implementations> {
-    using object_type = build_option_force_implementations;
-    static std::shared_ptr<const build_option> make_default() { return build_option::force_implementations({}); }
-};
-
-#endif
-}  // namespace detail
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-inline std::shared_ptr<const build_option> build_option::fusing(bool enable) {
-    return std::make_shared<build_option_bool<build_option_type::fusing>>(enable);
-}
-
-inline std::shared_ptr<const build_option> build_option::optimize_data(bool enable) {
-    return std::make_shared<build_option_bool<build_option_type::optimize_data>>(enable);
-}
-
-inline std::shared_ptr<const build_option> build_option::allow_static_input_reorder(bool enable) {
-    return std::make_shared<build_option_bool<build_option_type::allow_static_input_reorder>>(enable);
-}
-
-inline std::shared_ptr<const build_option> build_option::debug(bool enable) {
-    return std::make_shared<build_option_bool<build_option_type::debug>>(enable);
-}
-
-inline std::shared_ptr<const build_option> build_option::outputs(const std::vector<primitive_id>& outs) {
-    return std::make_shared<build_option_outputs>(outs);
-}
-
-inline std::shared_ptr<const build_option> build_option::learning_config(const learning_params& params) {
-    return std::make_shared<build_option_learning_config>(params);
-}
-
-inline std::shared_ptr<const build_option> build_option::tuning_config(const tuning_config_options& config) {
-    return std::make_shared<build_option_tuning_config>(config);
-}
-
-inline std::shared_ptr<const build_option> build_option::graph_dumps_dir(const std::string& dir_path) {
-    return std::make_shared<build_option_directory<build_option_type::graph_dumps_dir>>(dir_path);
-}
-
-inline std::shared_ptr<const build_option> build_option::kernels_cache_dir(const std::string& dir_path) {
-    return std::make_shared<build_option_directory<build_option_type::kernels_cache_dir>>(dir_path);
-}
-inline std::shared_ptr<const build_option> build_option::serialize_network(const std::string& name) {
-    return std::make_shared<build_option_serialization<build_option_type::serialize_network>>(name);
-}
-inline std::shared_ptr<const build_option> build_option::load_program(const std::string& name) {
-    return std::make_shared<build_option_load_program<build_option_type::load_program>>(name);
-}
-inline std::shared_ptr<const build_option> build_option::force_implementations(implementation_forcing_map forcing) {
-    return std::make_shared<build_option_force_implementations>(std::move(forcing));
-}
-#endif
-
-/// @brief Represents program build options list.
-class build_options {
-public:
-    /// @brief Adds or replace option to the options list
-    void set_option(std::shared_ptr<const build_option> opt) { add_or_replace_option(opt); }
-
-    /// @brief Adds or replace options to the options list
-    template <typename... Args>
-    void set_option(std::shared_ptr<const build_option> opt, Args... args) {
-        add_or_replace_option(opt);
-        set_option(args...);
-    }
-
-    /// @brief Constructs build options list from its arguments.
-    template <typename... Args>
-    explicit build_options(Args... args) {
-        set_option(args...);
-    }
-
-    /// @brief Returns program build option for @p OptType
-    template <build_option_type OptType>
-    std::shared_ptr<const typename detail::build_option_traits<OptType>::object_type> get() const {
-        using T = typename detail::build_option_traits<OptType>::object_type;
-        for (auto& option : _options) {
-            if (option->get_type() == OptType)
-                return std::static_pointer_cast<const T>(option);
-        }
-        return std::static_pointer_cast<const T>(detail::build_option_traits<OptType>::make_default());
-    }
-
-private:
-    friend struct program;
-    std::vector<std::shared_ptr<const build_option>> _options;
-    void set_option(void) {}
-
-    void add_or_replace_option(std::shared_ptr<const build_option> opt) {
-        for (auto& p : _options) {
-            if (p->get_type() == opt->get_type()) {
-                p = opt;
-                return;
-            }
-        }
-        _options.push_back(opt);
-    }
-};
-
-struct program_impl;
-
-/// @brief Compiled program build from @ref topology by @ref engine
 struct program {
-    friend struct network;
-
+    using ptr = std::shared_ptr<program>;
+    using cptr = std::shared_ptr<const program>;
+    friend class calculate_prior_boxes;      // to be removed when possible
+    friend class graph_initializations;      // to be removed when possible
+    friend class prepare_padding;            // to be removed when possible
+    friend class propagate_constants;        // to be removed when possible
+    friend class pre_replace_deconv;         // to be removed when possible
+    friend class prepare_primitive_fusing;   // to be removed when possible
+    friend class prepare_quantization;       // to be removed when possible
+    friend class prepare_conv_eltw_fusing;   // to be removed when possible
+    friend class reorder_inputs;             // to be removed when possible
+    friend class remove_redundant_reorders;  // to be removed when possible
+    friend class program_wrapper;       // this class is intended to extend the interface of program for
+                                             // the usage within tests_core_internal project only
 public:
-    /// @brief Builds executable program based on user-defined @p topology by specified @p engine.
-    /// @param[in] engine The engine which will be used to build the program.
-    /// @param[in] topology The user-defined topology on which the network will be based.
-    /// @param[in] options Program build options. See @ref build_option and @ref build_options for details.
-    program(engine& engine, const topology& topology, const build_options& options = build_options());
+    struct nodes_ordering {
+    public:
+        typedef std::list<program_node*> list_of_nodes;
+        typedef list_of_nodes::const_iterator const_iterator;
+        typedef list_of_nodes::const_reverse_iterator const_reverse_iterator;
+        typedef list_of_nodes::iterator node_iterator;
+        typedef list_of_nodes::reverse_iterator node_reverse_iterator;
+        const_iterator begin() const { return _processing_order.begin(); }
+        const_iterator end() const { return _processing_order.end(); }
+        const_reverse_iterator rbegin() const { return _processing_order.rbegin(); }
+        const_reverse_iterator rend() const { return _processing_order.rend(); }
 
-    /// @brief Copy constructor.
-    program(const program& other) : _impl(other._impl) { }
+        void calc_processing_order_visit(program_node* node);
+        void calc_processing_order(program& p);
+        int32_t get_processing_number(program_node* node) const {
+            return get_processing_number(get_processing_iterator(*node));
+        }
+        int32_t get_processing_number(node_iterator iter) const {
+            return 1 + (int32_t)std::distance(_processing_order.begin(), const_iterator(iter));
+        }
+        void calculate_BFS_processing_order();
+        size_t size() { return _processing_order.size(); }
+        bool is_correct(program_node* node);
 
-    /// @brief Dereferences the counter of the underlying C API @ref cldnn_program handler.
-    ~program() { }
+        node_iterator get_processing_iterator(program_node& node) const { return processing_order_iterators.at(&node); }
+        void clear() {
+            processing_order_iterators.clear();
+            _processing_order.clear();
+        }
 
-    /// @brief Assigns new value by releasing previously referenced C API @ref cldnn_program handler and retaining the one referenced by @p other.
-    program& operator=(const program& other) {
-        if (_impl == other._impl)
+        void insert(program_node* key_node, program_node* node) {
+            node_iterator _where = processing_order_iterators.at(key_node);
+            processing_order_iterators[node] = _processing_order.insert(_where, node);
+        }
+
+        void insert_next(program_node* key_node, program_node* node) {
+            node_iterator _where = std::next(processing_order_iterators.at(key_node));
+            processing_order_iterators[node] = _processing_order.insert(_where, node);
+        }
+
+        void erase(program_node* key_node) {
+            node_iterator i = processing_order_iterators.at(key_node);
+            processing_order_iterators.erase(key_node);
+            _processing_order.erase(i);
+        }
+
+    private:
+        list_of_nodes _processing_order;
+        std::map<program_node*, node_iterator> processing_order_iterators;
+    };
+
+    template <class T>
+    struct single_element_container {
+        explicit single_element_container(T& t) : elem(&t) {}
+        constexpr size_t size() const { return 1; }
+        single_element_container begin() const { return single_element_container(elem); }
+        single_element_container end() const { return single_element_container(nullptr); }
+        single_element_container& operator++() {
+            elem = nullptr;
             return *this;
-        _impl = other._impl;
-        return *this;
-    }
+        }
+        bool operator!=(single_element_container const& sec) { return elem != sec.elem; }
 
-    /// @brief Checks whether @p lhs and @p rhs reference the same C API @ref cldnn_program handler
-    friend bool operator==(const program& lhs, const program& rhs) { return lhs._impl == rhs._impl; }
-    /// @brief Checks whether @p lhs and @p rhs reference different C API @ref cldnn_program handlers
-    friend bool operator!=(const program& lhs, const program& rhs) { return !(lhs == rhs); }
+        T operator*() { return *elem; }
 
-    std::shared_ptr<program_impl> get() const { return _impl; }
+    private:
+        explicit single_element_container(T* t) : elem(t) {}
+
+        T* elem;
+    };
+
+    typedef std::vector<primitive_info> primitives_info;
+    typedef std::vector<std::pair<std::string, primitives_info>> graph_optimizer_info;
+    typedef std::pair<primitive_id, std::vector<primitive_id>> optimized_info;
+
+    program(engine& engine_ref,
+            topology const& topology,
+            build_options const& options,
+            bool is_internal = false,
+            bool no_optimizations = false,
+            bool is_body_program = false);
+    /* constructor used to build a program from subset of nodes of other program (used in propagate_constants) */
+    program(engine& engine_ref,
+            std::set<std::shared_ptr<program_node>> const& nodes,
+            build_options const& options,
+            bool is_internal);
+    ~program();
+    engine& get_engine() const { return _engine; }
+    const build_options& get_options() const { return options; }
+    std::list<program_node*>& get_inputs() {
+        return inputs;
+    }  // ToDo: redesign trim to ouptut pass to make it const as_well as get_engine and get options
+    std::vector<program_node*>& get_outputs() {
+        return outputs;
+    }  // ToDo: redesign reorder-inputs pass to make it const as_well as get_engine and get options
+    bool is_loop_body() const { return is_body_program; }
+    bool is_debug_build() const { return options.get<build_option_type::debug>()->enabled(); }
+    const nodes_ordering& get_processing_order() const;
+    nodes_ordering& get_processing_order();
+    uint32_t get_prog_id() { return prog_id; }
+    stream& get_stream() { return *_stream; }
+    const std::list<primitive_id>& get_optimized_out() const { return optimized_out; }
+    const std::list<optimized_info>& get_optimized() const { return optimized; }
+    bool has_node(const primitive_id& prim) const { return nodes_map.count(prim) > 0; }
+    program_node& get_node(primitive_id const& id);
+    program_node const& get_node(primitive_id const& id) const;
+    std::shared_ptr<program_node> get_node_ptr(const primitive_id& prim) { return nodes_map.at(prim); }
+    std::shared_ptr<program_node> get_node_ptr(const primitive_id& prim) const { return nodes_map.at(prim); }
+
+    // returns already existing program_node for given primitive 'prim' (lookup in 'nodes_map')
+    // if it was previously created, otherwise creates and then returns program_node
+    program_node& get_or_create(std::shared_ptr<primitive> prim);
+
+    // Inserts given program_node 'node' as an intermediate node between 'next' and it's
+    //  dependency at 'prev_idx' index.
+    void add_intermediate(program_node& node,
+                          program_node& next,
+                          size_t prev_idx,
+                          bool connect_int_node_with_old_dep = true,
+                          bool move_usrs_of_prev_to_node = false);
+
+    // Gets or creates program_node for given primitive 'prim' and inserts it as an intermediate
+    // node between 'next' and it's dependency at 'prev_idx' index.
+    void add_intermediate(std::shared_ptr<primitive> prim,
+                          program_node& next,
+                          size_t prev_idx,
+                          bool connect_int_node_with_old_dep = true,
+                          bool move_usrs_of_prev_to_node = false);
+
+    // Inserts given program_node 'node' as an intermediate node between 'next' and it's
+    //  dependency prev
+    void add_intermediate(program_node& node,
+                          program_node& next,
+                          program_node& prev,
+                          bool connect_int_node_with_old_dep = true,
+                          bool move_usrs_of_prev_to_node = false);
+
+    // removes a node from the graph and deletes it afterwards,
+    // prereq: node cannot be marked as output and has to have exactly one dependency
+    // returns if 'node' has been extracted and removed successfully
+    bool extract_and_remove(program_node& node);
+
+    // Fuses two nodes into fused_node and removes peer_node from graph
+    void fuse_nodes(program_node& fused_node, program_node& peer_node, std::map<primitive_id, std::vector<primitive_id>>* fusing_history);
+
+    // returns if 'node' has been removed
+    bool remove_if_dangling(program_node& node);
+
+    void mark_if_constant(program_node& node);
+    // mark if the node is in data flow assuming that all dependencies are marked properly
+    void mark_if_data_flow(program_node& node);
+    // Reverses connection - user becomes dependency.
+
+    void remove_nodes(std::vector<program_node*>& to_remove);
+    void dump_program(const char* stage,
+                      bool with_full_info,
+                      std::function<bool(program_node const&)> const& filter = nullptr) const;
+
+    const primitives_info& get_primitives_info() const;
+    const graph_optimizer_info& get_optimizer_passes_info() const;
+    void save_pass_info(std::string pass_name);
+
+    void add_optimized_primitive_info(primitive_id optimized_primitive_id, std::vector<primitive_id> replaced_with_ids = {});
+
+    void reset_program();
+    uint32_t get_id() const { return prog_id; }
+
+    static ptr build_program(engine& engine,
+                             const topology& topology,
+                             const build_options& options,
+                             bool is_internal = false,
+                             bool no_optimizations = false,
+                             bool is_body_program = false);
+    static ptr build_program(engine& engine,
+                             const std::set<std::shared_ptr<program_node>>& nodes,
+                             const build_options& options,
+                             bool is_internal);
+    static void init_primitives();
+    void compile();
+    void init_kernels();
+    kernel_id add_kernel(const std::shared_ptr<kernel_string> kernel_sring);
+    kernel::ptr get_kernel(kernel_id id);
+
+    void load_tuning_cache();
+    std::shared_ptr<kernel_selector::TuningCache> get_tuning_cache() const { return tuning_cache; }
 
 private:
-    std::shared_ptr<program_impl> _impl;
+    uint32_t prog_id = 0;
+    engine& _engine;
+    stream::ptr _stream;
+    // TODO: Consider moving it to engine
+    std::unique_ptr<kernels_cache> _kernels_cache;
+    build_options options;
+    std::list<program_node*> inputs;
+    std::vector<program_node*> outputs;
+    nodes_ordering processing_order;
+    std::unique_ptr<pass_manager> pm;
+    std::shared_ptr<kernel_selector::TuningCache> tuning_cache;
+    bool is_body_program;
 
-    explicit program(std::shared_ptr<program_impl> impl) : _impl(impl) {
-        if (_impl == nullptr)
-            throw std::invalid_argument("implementation pointer should not be null");
+    std::map<primitive_id, std::shared_ptr<program_node>> nodes_map;
+    std::list<primitive_id> optimized_out;
+
+    std::list<optimized_info> optimized;
+    primitives_info prim_info;
+    graph_optimizer_info optimizer_passes_info;
+
+    primitives_info get_current_stage_info() const;
+    /*
+    ** High-level functions, in order of usage
+    */
+    /* build nodes internal structure based on topology */
+    void prepare_nodes(topology const& topology);
+    /* build nodes internal structure based on the subset of nodes of other program  (used in propagate_constants) */
+    void prepare_nodes(std::set<std::shared_ptr<program_node>> const& nodes);
+    void add_node_dependencies(program_node* node_ptr);
+    void copy_node_dependencies(program_node* dest, program_node* src);
+    void build_program(bool is_internal);
+    void init_graph();
+    void set_options();
+    void set_layout_optimizer_attributes(layout_optimizer& lo);
+
+    void apply_opt_pass(base_pass& pass);
+
+    template <class Pass, typename... Args>
+    typename std::enable_if<std::is_base_of<base_pass, Pass>::value &&
+                            std::is_constructible<Pass, Args...>::value>::type
+    apply_opt_pass(Args&&... args) {
+        auto pass = Pass(std::forward<Args>(args)...);
+        apply_opt_pass(pass);
     }
+
+    void run_graph_compilation();
+    void pre_optimize_graph(bool is_internal);
+    void post_optimize_graph(bool is_internal);
+    void cleanup();
+    void transfer_memory_to_device();
+
+    /*
+    ** Analysis functions
+    */
+    // TODO: Remove once we will get full support for input/output padding in all primitive implementations.
+    bool analyze_output_size_handling_need();
+
+    /*
+    ** Optimization functions
+    */
+    void apply_needed_padding(program_node& node, program_node& prev_node, const padding& needed_padding);
+
+    /*
+    ** Memory pool functions
+    */
+    void prepare_memory_dependencies();
+    std::string get_memory_dependencies_string() const;
+
+    /*
+    ** Utilities
+    */
+    void add_split_outputs();
+    // mark if the node is constant assuming that all dependencies are marked properly
+    void reverse_connection(program_node& dep_node, program_node& user_node);
+
+    void add_connection(program_node& prev, program_node& next);
+
+    void remove_connection(program_node& prev, program_node& next);
+
+    void remove_all_connections(program_node& node);
+
+    void rename(program_node& node, primitive_id const& new_id);
+    void swap_names(program_node& node1, program_node& node2);
+    void replace_all_usages(program_node& old_node, program_node& new_node);
+
+    // old_node - node which will be replaced
+    // new_node - node which will replace the old one
+    void replace(program_node& old_node, program_node& new_node);
 };
-/// @}
-/// @}
+
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/graph/topology.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/graph/topology.hpp
index 4155c7ac529..b2be5d1300b 100644
--- a/inference-engine/thirdparty/clDNN/api/cldnn/graph/topology.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/graph/topology.hpp
@@ -2,31 +2,24 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
 #pragma once
 
-#include "cldnn/runtime/compounds.hpp"
 #include "cldnn/primitives/primitive.hpp"
+#include "cldnn/primitives/input_layout.hpp"
 
-#include <cstdint>
-#include <vector>
+#include <map>
 #include <memory>
+#include <vector>
 
 namespace cldnn {
 
-/// @addtogroup cpp_api C++ API
-/// @{
+typedef std::map<primitive_id, std::shared_ptr<primitive>> topology_map;
 
-/// @defgroup cpp_topology Network Topology
-/// @{
-
-struct topology_impl;
-
-/// @brief Network topology to be defined by user.
 struct topology {
-    /// @brief Constructs empty network topology.
-    topology();
+public:
+    using ptr = std::shared_ptr<topology>;
+    explicit topology(const topology_map& map) : _primitives(map) {}
+    topology() : _primitives({}) {}
 
     /// @brief Constructs topology containing primitives provided in argument(s).
     template <class... Args>
@@ -34,29 +27,6 @@ struct topology {
         add<Args...>(args...);
     }
 
-    /// @brief Copy construction.
-    topology(const topology& other) : _impl(other._impl) { }
-
-    /// @brief Copy assignment.
-    topology& operator=(const topology& other) {
-        if (_impl == other._impl)
-            return *this;
-        _impl = other._impl;
-        return *this;
-    }
-
-    /// Construct C++ topology based on C API @p cldnn_topology
-    explicit topology(std::shared_ptr<topology_impl> other) : _impl(other) {
-        if (_impl == nullptr)
-            throw std::invalid_argument("implementation pointer should not be null");
-    }
-
-    /// @brief Releases wrapped C API @ref cldnn_topology.
-    ~topology() { }
-
-    friend bool operator==(const topology& lhs, const topology& rhs) { return lhs._impl == rhs._impl; }
-    friend bool operator!=(const topology& lhs, const topology& rhs) { return !(lhs == rhs); }
-
     void add_primitive(std::shared_ptr<primitive> desc);
 
     /// @brief Adds a primitive to topology.
@@ -72,22 +42,15 @@ struct topology {
         add<Args...>(args...);
     }
 
-    /// @brief Returns wrapped implementation pointer.
-    std::shared_ptr<topology_impl> get() const { return _impl; }
+    const std::shared_ptr<primitive>& at(primitive_id id) const;
 
-    const std::vector<primitive_id> get_primitive_ids() const;
+    void change_input_layout(const primitive_id& id, const layout& new_layout);
 
-    void change_input_layout(primitive_id id, const layout& new_layout);
+    const topology_map& get_primitives() const { return _primitives; }
 
-    const std::shared_ptr<primitive>& at(const primitive_id& id) const;
+    const std::vector<primitive_id> get_primitives_ids() const;
 
 private:
-    friend class engine;
-    friend struct network;
-    std::shared_ptr<topology_impl> _impl;
+    topology_map _primitives;
 };
-
-CLDNN_API_CLASS(topology)
-/// @}
-/// @}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather_elements.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather_elements.hpp
new file mode 100644
index 00000000000..d6d0ca9fdb2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather_elements.hpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "primitive.hpp"
+
+namespace cldnn {
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief
+/// @details
+struct gather_elements : public primitive_base<gather_elements> {
+    CLDNN_DECLARE_PRIMITIVE(gather_elements)
+
+    enum gather_elements_axis {
+        along_b,
+        along_f,
+        along_x,
+        along_y,
+        along_z,
+        along_w
+    };
+
+    /// @brief Constructs gather_elements primitive.
+    /// @param id This primitive id.
+    /// @param data Input data primitive id.
+    /// @param indices Input indexes primitive id.
+    /// @param output_format Output format.
+    /// @param output_shape Output shape.
+    /// @param axis Gathering axis.
+    gather_elements(const primitive_id& id,
+                    const primitive_id& data,
+                    const primitive_id& indices,
+                    const format& output_format,
+                    const tensor& output_shape,
+                    const gather_elements_axis axis,
+                    const padding& output_padding = padding())
+        : primitive_base(id, {data, indices}, output_padding), output_format(output_format), output_shape(output_shape), axis(axis) {}
+
+    /// @brief Gather Elements output format
+    format output_format;
+    /// @brief Gather Elements output shape
+    tensor output_shape;
+
+    /// @brief Which axis to gather on.
+    gather_elements_axis axis;
+};
+/// @}
+/// @}
+/// @}
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/debug_configuration.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/debug_configuration.hpp
index c16eb97862f..c43abac85e3 100644
--- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/debug_configuration.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/debug_configuration.hpp
@@ -28,6 +28,7 @@ public:
     int print_multi_kernel_perf;    // Print execution time of each kernel in multi-kernel primitimive
     int disable_usm;                // Disable usm usage
     std::string dump_graphs;        // Dump optimized graph
+    std::string dump_sources;       // Dump opencl sources
     std::string dump_layers_path;   // Enable dumping intermediate buffers and set the dest path
     std::string dump_layers;        // Dump intermediate buffers of specified layers only, separated by space
     int dump_layers_dst_only;       // Dump only output of layers
diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory_caps.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory_caps.hpp
index 0eeafa97149..e98aad11cfc 100644
--- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory_caps.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory_caps.hpp
@@ -74,7 +74,10 @@ enum class shared_mem_type {
     shared_mem_vasurface,
 
     /// @brief Structure describes shared D3D11 buffer
-    shared_mem_dxbuffer
+    shared_mem_dxbuffer,
+
+    /// @brief Structure describes shared USM memory.
+    shared_mem_usm
 };
 
 using shared_handle = void*;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
index 7a072d998d4..dbe6bd7004c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
@@ -48,6 +48,7 @@ enum class KernelType {
     ONE_HOT,
     GATHER,
     GATHER_ND,
+    GATHER_ELEMENTS,
     SCATTER_UPDATE,
     SCATTER_ND_UPDATE,
     SCATTER_ELEMENTS_UPDATE,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
index ec7a7d693a4..5f8540f33b1 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
@@ -56,7 +56,20 @@ JitConstants EltwiseKernelRef::GetJitConstants(const eltwise_params& params) con
 
         std::vector<std::string> idx_order;
         if (DataTensor::ChannelsCount(params.output.GetLayout()) == 4) {
-            idx_order = {"d4", "d3", "d2", "d1"};
+            if (!params.layoutBased && !params.int8_quantization && !params.broadcast && !CheckInputsOutputNoPitchSameDims(params)) {
+                auto calc_dim = [&params](Tensor::DataChannelName channel) {
+                    size_t idx = DataTensor::Channelndex(params.output.GetLayout(), channel);
+                    // We increment the index, because fusions dims ordering starts from one
+                    return "d" + std::to_string(idx + 1);
+                };
+
+                idx_order = {calc_dim(Tensor::DataChannelName::BATCH),
+                             calc_dim(Tensor::DataChannelName::FEATURE),
+                             calc_dim(Tensor::DataChannelName::Y),
+                             calc_dim(Tensor::DataChannelName::X)};
+            } else {
+                idx_order = {"d4", "d3", "d2", "d1"};
+            }
         } else if (DataTensor::ChannelsCount(params.output.GetLayout()) == 5) {
             idx_order = {"d5", "d4", "d3", "d2", "d1"};
         } else if (DataTensor::ChannelsCount(params.output.GetLayout()) == 6) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
index 8906a1bf38d..498a1ed2d2d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
@@ -118,10 +118,6 @@ struct TuneParamsSelector {
         return result;
     }
 
-    tune_params Default(functional_case fun) {
-        return Default(fun(params));
-    }
-
     static bool VerifyTuneParams(const fully_connected_params& params, const tune_params& tparams);
 
     const fully_connected_params& params;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_ref.cpp
new file mode 100644
index 00000000000..eb01e12a12f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_ref.cpp
@@ -0,0 +1,154 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gather_elements_kernel_ref.h"
+#include "kernel_selector_utils.h"
+#include <string>
+#include <vector>
+
+namespace kernel_selector {
+static size_t GetGatherElementsChannelIndex(const gather_elements_params& params) {
+    Tensor::DataChannelName name = Tensor::DataChannelName::X;
+
+    size_t inputSize = params.inputs[0].GetDims().size();
+
+    switch (params.axis) {
+        case GatherAxis::X:
+            return inputSize - 1;
+        case GatherAxis::Y:
+            return inputSize - 2;
+        case GatherAxis::Z:
+            return inputSize - 3;
+        case GatherAxis::W:
+            return 2;
+        case GatherAxis::FEATURE:
+            return 1;
+        case GatherAxis::BATCH:
+            return 0;
+        default:
+            break;
+    }
+
+    return DataTensor::Channelndex(params.output.GetLayout(), name);
+}
+
+ParamsKey GatherElementsKernelRef::GetSupportedKey() const {
+    ParamsKey k;
+    k.EnableInputDataType(Datatype::F16);
+    k.EnableInputDataType(Datatype::F32);
+    k.EnableInputDataType(Datatype::INT32);
+    k.EnableOutputDataType(Datatype::F16);
+    k.EnableOutputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::INT32);
+    k.EnableInputLayout(DataLayout::bfyx);
+    k.EnableOutputLayout(DataLayout::bfyx);
+    k.EnableInputLayout(DataLayout::bfzyx);
+    k.EnableOutputLayout(DataLayout::bfzyx);
+    k.EnableInputLayout(DataLayout::bfwzyx);
+    k.EnableOutputLayout(DataLayout::bfwzyx);
+    k.EnableTensorOffset();
+    k.EnableTensorPitches();
+    k.EnableBatching();
+    k.EnableDifferentTypes();
+    return k;
+}
+
+static inline std::vector<std::string> GetDefaultOrder(size_t size) {
+    std::vector<std::string> default_order;
+    if (size <= 4) {
+        default_order = { "b", "f", "y", "x" };
+    } else if (size == 5) {
+        default_order = { "b", "f", "z", "y", "x" };
+    } else if (size == 6) {
+        default_order = { "b", "f", "w", "z", "y", "x" };
+    }
+
+    return default_order;
+}
+
+CommonDispatchData GatherElementsKernelRef::SetDefault(const gather_elements_params& params, const optional_params&) const {
+    CommonDispatchData dispatchData;
+
+    const auto& output = params.output;
+
+    switch (params.inputs[1].GetLayout()) {
+    case DataLayout::bfyx:
+        dispatchData.gws = {output.X().v, output.Y().v, output.Feature().v * output.Batch().v};
+        break;
+
+    case DataLayout::bfzyx:
+        dispatchData.gws = {output.X().v, output.Y().v * output.Z().v, output.Feature().v * output.Batch().v};
+        break;
+
+    case DataLayout::bfwzyx:
+        dispatchData.gws = {output.X().v * output.Y().v, output.Z().v * output.W().v, output.Feature().v * output.Batch().v};
+        break;
+
+    default:
+        throw std::invalid_argument("Unsupported data layout for gather elements primitive");
+        break;
+    }
+
+    dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
+
+    return dispatchData;
+}
+
+JitConstants GatherElementsKernelRef::GetJitConstants(const gather_elements_params& params) const {
+    JitConstants jit = MakeBaseParamsJitConstants(params);
+
+    jit.AddConstant(MakeJitConstant("AXIS", GetGatherElementsChannelIndex(params)));
+
+    if (!params.fused_ops.empty()) {
+        std::vector<std::string> idx_order = GetDefaultOrder(params.inputs[0].GetDims().size());
+        FusedOpsConfiguration conf = { "", idx_order, "val", params.inputs[0].GetDType() };
+        jit.Merge(MakeFusedOpsJitConstants(params, { conf }));
+    }
+
+    return jit;
+}
+
+bool GatherElementsKernelRef::Validate(const Params& p, const optional_params& o) const {
+    if (p.GetType() != KernelType::GATHER_ELEMENTS || o.GetType() != KernelType::GATHER_ELEMENTS) {
+        return false;
+    }
+
+    const gather_elements_params& params = static_cast<const gather_elements_params&>(p);
+    auto input_dims = params.inputs[0].LogicalDims();
+    auto indices_dims = params.inputs[1].LogicalDims();
+
+    if (input_dims.size() != indices_dims.size()) {
+        return false;
+    }
+
+    for (auto& fused_op : params.fused_ops) {
+        if (!IsFusedPrimitiveSupported(fused_op))
+            return false;
+    }
+
+    return true;
+}
+
+KernelsData GatherElementsKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
+    if (!Validate(params, options)) {
+        return {};
+    }
+
+    KernelData kd = KernelData::Default<gather_elements_params>(params);
+    gather_elements_params& newParams = *static_cast<gather_elements_params*>(kd.params.get());
+
+    auto dispatchData = SetDefault(newParams, options);
+    auto cldnn_jit = GetJitConstants(newParams);
+
+    auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
+    auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+    auto& kernel = kd.kernels[0];
+    FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2, GetFusedPrimitiveInputsCount(params));
+    return { kd };
+}
+
+KernelsPriority GatherElementsKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
+    return DONT_USE_IF_HAVE_SOMETHING_ELSE;
+}
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_ref.h
new file mode 100644
index 00000000000..8eec4ae9632
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_ref.h
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "kernel_base_opencl.h"
+
+namespace kernel_selector {
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// gather_elements_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct gather_elements_params : public base_params {
+    gather_elements_params() : base_params(KernelType::GATHER_ELEMENTS), axis(GatherAxis::BATCH) {}
+
+    GatherAxis axis;
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// gather_elements_optional_params
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+struct gather_elements_optional_params : optional_params {
+    gather_elements_optional_params() : optional_params(KernelType::GATHER_ELEMENTS) {}
+};
+
+class GatherElementsKernelRef : public KernelBaseOpenCL {
+public:
+    GatherElementsKernelRef() : KernelBaseOpenCL("gather_elements_ref") {}
+    virtual ~GatherElementsKernelRef() {}
+    virtual JitConstants GetJitConstants(const gather_elements_params& params) const;
+    virtual CommonDispatchData SetDefault(const gather_elements_params& params, const optional_params&) const;
+    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
+    ParamsKey GetSupportedKey() const override;
+    std::vector<FusedOpType> GetSupportedFusedOps() const override {
+        return { FusedOpType::QUANTIZE,
+                 FusedOpType::SCALE,
+                 FusedOpType::ACTIVATION,
+                 FusedOpType::ELTWISE };
+    }
+
+protected:
+    bool Validate(const Params& p, const optional_params& o) const override;
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_selector.cpp
new file mode 100644
index 00000000000..3a451cf574a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_selector.cpp
@@ -0,0 +1,27 @@
+/*
+// Copyright (c) 2021 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "gather_elements_kernel_selector.h"
+#include "gather_elements_kernel_ref.h"
+
+namespace kernel_selector {
+
+gather_elements_kernel_selector::gather_elements_kernel_selector() { Attach<GatherElementsKernelRef>(); }
+
+KernelsData gather_elements_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const {
+    return GetNaiveBestKernel(params, options, KernelType::GATHER_ELEMENTS);
+}
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_selector.h
new file mode 100644
index 00000000000..333298a45de
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_elements_kernel_selector.h
@@ -0,0 +1,35 @@
+/*
+// Copyright (c) 2021 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector {
+class gather_elements_kernel_selector : public kernel_selector_base {
+public:
+    static gather_elements_kernel_selector& Instance() {
+        static gather_elements_kernel_selector instance_;
+        return instance_;
+    }
+
+    gather_elements_kernel_selector();
+
+    virtual ~gather_elements_kernel_selector() {}
+
+    KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+};
+}  // namespace kernel_selector
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.cpp
index 4ffd87439a0..e4ce4e9740a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.cpp
@@ -32,14 +32,6 @@ ParamsKey GatherNDKernelRef::GetSupportedKey() const {
     return k;
 }
 
-static inline std::string GetOrderString(std::vector<std::string>& order) {
-    std::string order_str = order[0];
-    for (size_t i = 1; i < order.size(); i++)
-        order_str += ", " + order[i];
-
-    return order_str;
-}
-
 static inline std::vector<std::string> GetDefaultOrder(size_t size) {
     std::vector<std::string> default_order;
     if (size <= 4) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_ref.cpp
index 20645fb0f82..50027615a04 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_elements_update_kernel_ref.cpp
@@ -55,14 +55,6 @@ ParamsKey ScatterElementsUpdateKernelRef::GetSupportedKey() const {
     return k;
 }
 
-static inline std::string GetOrderString(std::vector<std::string>& order) {
-    std::string order_str = order[0];
-    for (size_t i = 1; i < order.size(); i++)
-        order_str += ", " + order[i];
-
-    return order_str;
-}
-
 static inline std::vector<std::string> GetDefaultOrder(size_t size) {
     std::vector<std::string> default_order;
     if (size <= 4) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_nd_update_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
index a3803a38db7..d625cd414df 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scatter_update/scatter_nd_update_kernel_ref.cpp
@@ -32,14 +32,6 @@ ParamsKey ScatterNDUpdateKernelRef::GetSupportedKey() const {
     return k;
 }
 
-static inline std::string GetOrderString(std::vector<std::string>& order) {
-    std::string order_str = order[0];
-    for (size_t i = 1; i < order.size(); i++)
-        order_str += ", " + order[i];
-
-    return order_str;
-}
-
 static inline std::vector<std::string> GetDefaultOrder(size_t size) {
     std::vector<std::string> default_order;
     if (size <= 4) {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_elements_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_elements_ref.cl
new file mode 100644
index 00000000000..d03c1c85b13
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_elements_ref.cl
@@ -0,0 +1,86 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "include/data_types.cl"
+#include "include/fetch_data.cl"
+
+#define GET_OUTPUT_INDEX(prefix, idx_order) CAT(prefix, _GET_INDEX)(idx_order)
+
+KERNEL(gather_elements_ref)(const __global INPUT0_TYPE* data,
+                   const __global INPUT1_TYPE* indices,
+                   __global OUTPUT_TYPE* output
+#if HAS_FUSED_OPS_DECLS
+                   , FUSED_OPS_DECLS
+#endif
+)
+{
+    const uint dim0 = get_global_id(0);
+    const uint dim1 = get_global_id(1);
+    const uint dim2 = get_global_id(2);
+
+    // Calculate indice index
+#if INPUT1_DIMS == 4
+    #define ORDER b,f,y,x
+    const uint x = dim0;
+    const uint y = dim1;
+#elif INPUT1_DIMS == 5
+    #define ORDER b,f,z,y,x
+    const uint x = dim0;
+    const uint y = dim1 % OUTPUT_SIZE_Y;
+    const uint z = dim1 / OUTPUT_SIZE_Y;
+#else
+    #define ORDER b,f,w,z,y,x
+    const uint x = dim0 % OUTPUT_SIZE_X;
+    const uint y = dim0 / OUTPUT_SIZE_X;
+    const uint z = dim1 % OUTPUT_SIZE_Z;
+    const uint w = dim1 / OUTPUT_SIZE_Z;
+#endif
+    const uint f = dim2 % OUTPUT_FEATURE_NUM;
+    const uint b = dim2 / OUTPUT_FEATURE_NUM;
+
+    const int out_idx = GET_OUTPUT_INDEX(INPUT1, ORDER);
+
+#if INPUT1_DIMS == 4
+    size_t data_shape[4] = {INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Y, INPUT0_SIZE_X};
+    size_t indices_shape[4] = {INPUT1_BATCH_NUM, INPUT1_FEATURE_NUM, INPUT1_SIZE_Y, INPUT1_SIZE_X};
+#elif INPUT1_DIMS == 5
+    size_t data_shape[5] = {INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X};
+    size_t indices_shape[5] = {INPUT1_BATCH_NUM, INPUT1_FEATURE_NUM, INPUT1_SIZE_Z, INPUT1_SIZE_Y, INPUT1_SIZE_X};
+#else
+    size_t data_shape[6] = {INPUT0_BATCH_NUM, INPUT0_FEATURE_NUM, INPUT0_SIZE_W, INPUT0_SIZE_Z, INPUT0_SIZE_Y, INPUT0_SIZE_X};
+    size_t indices_shape[6] = {INPUT1_BATCH_NUM, INPUT1_FEATURE_NUM, INPUT1_SIZE_W, INPUT1_SIZE_Z, INPUT1_SIZE_Y, INPUT1_SIZE_X};
+#endif
+
+    size_t max_inner_sum = 1, max_outer_sum = 1, outer_sum_inc_data = 1, outer_sum_inc_indices = 1;
+    for (size_t i = AXIS + 1; i < INPUT1_DIMS; i++)
+        max_inner_sum *= indices_shape[i];
+
+    for (int i = 0; i < AXIS; i++)
+        max_outer_sum *= indices_shape[i];
+
+    for (size_t i = AXIS; i < INPUT1_DIMS; i++) {
+        outer_sum_inc_data *= data_shape[i];
+    }
+    max_outer_sum *= outer_sum_inc_data;
+
+    for (size_t i = AXIS; i < INPUT1_DIMS; i++) {
+        outer_sum_inc_indices *= indices_shape[i];
+    }
+
+    size_t outer_sum = (out_idx / outer_sum_inc_indices) * outer_sum_inc_data;
+    size_t inner_sum = out_idx % max_inner_sum;
+
+    uint idx = outer_sum + max_inner_sum * indices[out_idx] + inner_sum;
+    INPUT0_TYPE val = data[idx];
+
+#if HAS_FUSED_OPS
+    FUSED_OPS;
+    output[out_idx] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT);
+#else
+    output[out_idx] = ACTIVATION(val, ACTIVATION_PARAMS);
+#endif
+}
+
+#undef ORDER
+#undef GET_OUTPUT_INDEX
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
index deeb31e350e..35c2115e5ac 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
@@ -402,6 +402,8 @@ std::string toString(GatherAxis a) {
     switch (a) {
         case GatherAxis::X:       return "X";
         case GatherAxis::Y:       return "Y";
+        case GatherAxis::Z:       return "Z";
+        case GatherAxis::W:       return "W";
         case GatherAxis::FEATURE: return "FEATURE";
         case GatherAxis::BATCH:   return "BATCH";
         default: return "";
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
index 67b393e6e10..4638de3fbe9 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
@@ -507,7 +507,7 @@ struct FusedOpsConfiguration {
     FusedOpsConfiguration& SetShuffleVarName(std::string val) { shuffle_var_name = val; return *this; }
 };
 
-// Instance of fused_operation_desc is added to fused_ops vector if a node has been fused to current one using program_impl::fuse_nodes
+// Instance of fused_operation_desc is added to fused_ops vector if a node has been fused to current one using program::fuse_nodes
 // method. In order to process fused ops following modifications should be done in a kernel:
 // option 1 - using common generator:
 //     - create FusedOpsConfiguration object that contains configuration for common code generator.
diff --git a/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp b/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp
index 959ca340326..00085cf9d07 100644
--- a/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/debug_configuration.cpp
@@ -99,6 +99,7 @@ debug_configuration::debug_configuration()
         , print_multi_kernel_perf(0)
         , disable_usm(0)
         , dump_graphs(std::string())
+        , dump_sources(std::string())
         , dump_layers_path(std::string())
         , dump_layers(std::string())
         , dump_layers_dst_only(0) {
@@ -107,6 +108,7 @@ debug_configuration::debug_configuration()
     get_gpu_debug_env_var("PrintMultiKernelPerf", print_multi_kernel_perf);
     get_gpu_debug_env_var("DisableUsm", disable_usm);
     get_gpu_debug_env_var("DumpGraphs", dump_graphs);
+    get_gpu_debug_env_var("DumpSources", dump_sources);
     get_gpu_debug_env_var("DumpLayersPath", dump_layers_path);
     get_gpu_debug_env_var("DumpLayers", dump_layers);
     get_gpu_debug_env_var("DumpLayersDstOnly", dump_layers_dst_only);
diff --git a/inference-engine/thirdparty/clDNN/runtime/kernels_cache.cpp b/inference-engine/thirdparty/clDNN/runtime/kernels_cache.cpp
index ed135369767..48347183028 100644
--- a/inference-engine/thirdparty/clDNN/runtime/kernels_cache.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/kernels_cache.cpp
@@ -55,7 +55,7 @@
 namespace {
 std::mutex cacheAccessMutex;
 
-#ifdef ENABLE_UNICODE_PATH_SUPPORT
+#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
 std::wstring multiByteCharToWString(const char* str) {
 #ifdef _WIN32
     int strSize = static_cast<int>(std::strlen(str));
@@ -69,7 +69,7 @@ std::wstring multiByteCharToWString(const char* str) {
     return result;
 #endif  // _WIN32
 }
-#endif  // ENABLE_UNICODE_PATH_SUPPORT
+#endif  // defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
 
 static std::vector<unsigned char> loadBinaryFromFile(std::string path) {
     std::lock_guard<std::mutex> lock(cacheAccessMutex);
@@ -279,12 +279,18 @@ void kernels_cache::build_batch(const engine& build_engine, const batch_program&
     auto& cl_build_engine = dynamic_cast<const ocl::ocl_engine&>(build_engine);
 
     bool dump_sources = !_engine.configuration().sources_dumps_dir.empty() || batch.dump_custom_program;
+    std::string dump_sources_dir = _engine.configuration().sources_dumps_dir;
+    GPU_DEBUG_GET_INSTANCE(debug_config);
+    GPU_DEBUG_IF(!debug_config->dump_sources.empty()) {
+        dump_sources = true;
+        dump_sources_dir = debug_config->dump_sources;
+    }
 
     std::string err_log;  // accumulated build log from all program's parts (only contains messages from parts which
 
     std::string current_dump_file_name = "";
     if (dump_sources) {
-        current_dump_file_name = _engine.configuration().sources_dumps_dir;
+        current_dump_file_name = dump_sources_dir;
         if (!current_dump_file_name.empty() && current_dump_file_name.back() != '/')
             current_dump_file_name += '/';
 
diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.cpp
index c90ee023fd0..0023213fe50 100644
--- a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.cpp
@@ -73,8 +73,14 @@ shared_mem_params gpu_buffer::get_internal_params() const {
         0};
 }
 
-event::ptr gpu_buffer::copy_from(stream& /* stream */, const memory& /* other */) {
-    throw std::runtime_error("[clDNN] copy_from is not implemented for gpu_buffer");
+event::ptr gpu_buffer::copy_from(stream& stream, const memory& other) {
+    auto& cl_stream = downcast<ocl_stream>(stream);
+    auto& mem_inst = downcast<const gpu_buffer>(other);
+    auto ev = stream.create_base_event();
+    cl::Event ev_ocl = std::dynamic_pointer_cast<ocl_event>(ev)->get();
+    cl_stream.get_cl_queue().enqueueCopyBuffer(mem_inst.get_buffer(), get_buffer(), 0, 0, other.size(), nullptr, &ev_ocl);
+
+    return ev;
 }
 
 event::ptr gpu_buffer::copy_from(stream& stream, const void* host_ptr) {
@@ -324,17 +330,26 @@ event::ptr gpu_usm::copy_from(stream& stream, const memory& other) {
     return stream.create_user_event(true);
 }
 
-event::ptr gpu_usm::copy_from(stream& /* stream */, const void* /* host_ptr */) {
-    throw std::runtime_error("[clDNN] copy_from is not implemented for gpu_usm");
+event::ptr gpu_usm::copy_from(stream& stream, const void* host_ptr) {
+    auto& cl_stream = downcast<ocl_stream>(stream);
+    auto ev = stream.create_base_event();
+    auto dst_ptr = get_buffer().get();
+    cl_stream.get_usm_helper().enqueue_memcpy(cl_stream.get_cl_queue(),
+                                              dst_ptr,
+                                              host_ptr,
+                                              _bytes_count,
+                                              true);
+
+    return ev;
 }
 
 shared_mem_params gpu_usm::get_internal_params() const {
     auto cl_engine = downcast<const ocl_engine>(_engine);
     return {
-        shared_mem_type::shared_mem_empty,  // shared_mem_type
+        shared_mem_type::shared_mem_usm,  // shared_mem_type
         static_cast<shared_handle>(cl_engine->get_cl_context().get()),  // context handle
-        nullptr,  // user_device handle
-        nullptr,  // mem handle
+        nullptr,        // user_device handle
+        _buffer.get(),  // mem handle
 #ifdef _WIN32
         nullptr,  // surface handle
 #else
diff --git a/inference-engine/thirdparty/clDNN/src/activation.cpp b/inference-engine/thirdparty/clDNN/src/activation.cpp
index 0987286ab4e..b1c478fd3a0 100644
--- a/inference-engine/thirdparty/clDNN/src/activation.cpp
+++ b/inference-engine/thirdparty/clDNN/src/activation.cpp
@@ -59,7 +59,7 @@ std::string activation_inst::to_string(activation_node const& node) {
     return primitive_description.str();
 }
 
-activation_inst::typed_primitive_inst(network_impl& network, activation_node const& node) : parent(network, node) {
+activation_inst::typed_primitive_inst(network& network, activation_node const& node) : parent(network, node) {
     auto input_arg = node.input().get_output_layout();
     auto output_arg = node.get_output_layout();
 
diff --git a/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp b/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp
index 0d2f1f4beae..a264d8f9ff0 100644
--- a/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp
+++ b/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp
@@ -166,5 +166,5 @@ std::string arg_max_min_inst::to_string(arg_max_min_node const& node) {
     return primitive_description.str();
 }
 
-arg_max_min_inst::typed_primitive_inst(network_impl& network, arg_max_min_node const& node) : parent(network, node) {}
+arg_max_min_inst::typed_primitive_inst(network& network, arg_max_min_node const& node) : parent(network, node) {}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp b/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp
index 3082862f302..121269847c9 100644
--- a/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp
@@ -78,7 +78,7 @@ std::string average_unpooling_inst::to_string(average_unpooling_node const& node
     return primitive_description.str();
 }
 
-average_unpooling_inst::typed_primitive_inst(network_impl& network, average_unpooling_node const& node)
+average_unpooling_inst::typed_primitive_inst(network& network, average_unpooling_node const& node)
     : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/batch_to_space.cpp b/inference-engine/thirdparty/clDNN/src/batch_to_space.cpp
index aa34ab8da53..bfa50bf8115 100644
--- a/inference-engine/thirdparty/clDNN/src/batch_to_space.cpp
+++ b/inference-engine/thirdparty/clDNN/src/batch_to_space.cpp
@@ -86,7 +86,7 @@ std::string batch_to_space_inst::to_string(batch_to_space_node const& node) {
     return primitive_description.str();
 }
 
-batch_to_space_inst::typed_primitive_inst(network_impl& network, batch_to_space_node const& node)
+batch_to_space_inst::typed_primitive_inst(network& network, batch_to_space_node const& node)
     : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp b/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp
index a2ffcdc02ca..27c9ad04b97 100644
--- a/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp
@@ -66,7 +66,7 @@ std::string binary_convolution_inst::to_string(binary_convolution_node const& no
     return primitive_description.str();
 }
 
-binary_convolution_inst::typed_primitive_inst(network_impl& network, binary_convolution_node const& node)
+binary_convolution_inst::typed_primitive_inst(network& network, binary_convolution_node const& node)
     : parent(network, node) {
     auto stride = argument.stride;
 
diff --git a/inference-engine/thirdparty/clDNN/src/border.cpp b/inference-engine/thirdparty/clDNN/src/border.cpp
index cdeca46d8b1..886275d6719 100644
--- a/inference-engine/thirdparty/clDNN/src/border.cpp
+++ b/inference-engine/thirdparty/clDNN/src/border.cpp
@@ -84,7 +84,7 @@ std::string border_inst::to_string(border_node const& node) {
     return primitive_description.str();
 }
 
-border_inst::typed_primitive_inst(network_impl& network, border_node const& node) : parent(network, node) {
+border_inst::typed_primitive_inst(network& network, border_node const& node) : parent(network, node) {
     auto input_layout = node.input().get_output_layout();
 
     const auto input_format = input_layout.format;
diff --git a/inference-engine/thirdparty/clDNN/src/broadcast.cpp b/inference-engine/thirdparty/clDNN/src/broadcast.cpp
index d6cc1506707..f3d97a27ff8 100644
--- a/inference-engine/thirdparty/clDNN/src/broadcast.cpp
+++ b/inference-engine/thirdparty/clDNN/src/broadcast.cpp
@@ -52,7 +52,7 @@ std::string broadcast_inst::to_string(broadcast_node const& node) {
     return primitive_description.str();
 }
 
-broadcast_inst::typed_primitive_inst(network_impl& network, broadcast_node const& node) : parent(network, node) {
+broadcast_inst::typed_primitive_inst(network& network, broadcast_node const& node) : parent(network, node) {
     auto input_layout = node.input().get_output_layout();
 
     const auto& input_sizes = input_layout.size;
diff --git a/inference-engine/thirdparty/clDNN/src/concatenation.cpp b/inference-engine/thirdparty/clDNN/src/concatenation.cpp
index d341566679e..253ff0812e0 100644
--- a/inference-engine/thirdparty/clDNN/src/concatenation.cpp
+++ b/inference-engine/thirdparty/clDNN/src/concatenation.cpp
@@ -66,7 +66,7 @@ std::string concatenation_inst::to_string(concatenation_node const& node) {
     return primitive_description.str();
 }
 
-concatenation_inst::typed_primitive_inst(network_impl& network, concatenation_node const& node)
+concatenation_inst::typed_primitive_inst(network& network, concatenation_node const& node)
     : parent(network, node) {
     auto input_layout = node.input().get_output_layout();
     auto output_layout = node.get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/condition.cpp b/inference-engine/thirdparty/clDNN/src/condition.cpp
index c31e6cb80e7..0b7d2ecc8bb 100644
--- a/inference-engine/thirdparty/clDNN/src/condition.cpp
+++ b/inference-engine/thirdparty/clDNN/src/condition.cpp
@@ -69,10 +69,10 @@ std::string condition_inst::to_string(condition_node const& node) {
 /*
 Condition primitive is resuing memory with the input.
 */
-condition_inst::typed_primitive_inst(network_impl& network, condition_node const& node)
+condition_inst::typed_primitive_inst(network& network, condition_node const& node)
     : parent(network, node),
-      _net_true(network_impl::allocate_network(node.get_program().get_engine(), node.get_branch_true(), true)),
-      _net_false(network_impl::allocate_network(node.get_program().get_engine(), node.get_branch_false(), true)) {
+      _net_true(network::allocate_network(node.get_program().get_engine(), node.get_branch_true(), true)),
+      _net_false(network::allocate_network(node.get_program().get_engine(), node.get_branch_false(), true)) {
     auto compare_tensor = node.compare().get_output_layout().size;
     auto input_tensor = node.input().get_output_layout().size;
     CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(),
diff --git a/inference-engine/thirdparty/clDNN/src/convolution.cpp b/inference-engine/thirdparty/clDNN/src/convolution.cpp
index c145dfc339c..a240a952c13 100644
--- a/inference-engine/thirdparty/clDNN/src/convolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/convolution.cpp
@@ -311,7 +311,7 @@ std::string convolution_inst::to_string(convolution_node const& node) {
     return primitive_description.str();
 }
 
-convolution_inst::typed_primitive_inst(network_impl& network, convolution_node const& node) : parent(network, node) {
+convolution_inst::typed_primitive_inst(network& network, convolution_node const& node) : parent(network, node) {
     auto stride = argument.stride;
 
     auto input_inst = node.input().get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/crop.cpp b/inference-engine/thirdparty/clDNN/src/crop.cpp
index 7d35ad1e04f..fe45ed3baf8 100644
--- a/inference-engine/thirdparty/clDNN/src/crop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/crop.cpp
@@ -68,7 +68,7 @@ std::string crop_inst::to_string(crop_node const& node) {
     return primitive_description.str();
 }
 
-crop_inst::typed_primitive_inst(network_impl& network, crop_node const& node) : parent(network, node) {
+crop_inst::typed_primitive_inst(network& network, crop_node const& node) : parent(network, node) {
     const auto& ref_in_sizes = argument.reference_input;
     const auto in_layout = node.input().get_output_layout();
     const auto& in_sizes = in_layout.size;
diff --git a/inference-engine/thirdparty/clDNN/src/ctc_greedy_decoder.cpp b/inference-engine/thirdparty/clDNN/src/ctc_greedy_decoder.cpp
index 6727a9a702b..14f92ef1a71 100644
--- a/inference-engine/thirdparty/clDNN/src/ctc_greedy_decoder.cpp
+++ b/inference-engine/thirdparty/clDNN/src/ctc_greedy_decoder.cpp
@@ -43,5 +43,5 @@ std::string ctc_greedy_decoder_inst::to_string(ctc_greedy_decoder_node const& no
     return primitive_description.str();
 }
 
-ctc_greedy_decoder_inst::typed_primitive_inst(network_impl& network, ctc_greedy_decoder_node const& node) : parent(network, node) {}
+ctc_greedy_decoder_inst::typed_primitive_inst(network& network, ctc_greedy_decoder_node const& node) : parent(network, node) {}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/cum_sum.cpp b/inference-engine/thirdparty/clDNN/src/cum_sum.cpp
index b7e0b144b90..c04685f4314 100644
--- a/inference-engine/thirdparty/clDNN/src/cum_sum.cpp
+++ b/inference-engine/thirdparty/clDNN/src/cum_sum.cpp
@@ -37,7 +37,7 @@ std::string cum_sum_inst::to_string(cum_sum_node const& node) {
     return primitive_description.str();
 }
 
-cum_sum_inst::typed_primitive_inst(network_impl& network, cum_sum_node const& node)
+cum_sum_inst::typed_primitive_inst(network& network, cum_sum_node const& node)
     : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/custom_gpu_primitive.cpp b/inference-engine/thirdparty/clDNN/src/custom_gpu_primitive.cpp
index 374f9c7158d..66414c04f4c 100644
--- a/inference-engine/thirdparty/clDNN/src/custom_gpu_primitive.cpp
+++ b/inference-engine/thirdparty/clDNN/src/custom_gpu_primitive.cpp
@@ -35,6 +35,6 @@ std::string custom_gpu_primitive_inst::to_string(custom_gpu_primitive_node const
     return primitive_description.str();
 }
 
-custom_gpu_primitive_inst::typed_primitive_inst(network_impl& network, custom_gpu_primitive_node const& node)
+custom_gpu_primitive_inst::typed_primitive_inst(network& network, custom_gpu_primitive_node const& node)
     : parent(network, node) {}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/data.cpp b/inference-engine/thirdparty/clDNN/src/data.cpp
index c08b6a28ce3..babc40a1245 100644
--- a/inference-engine/thirdparty/clDNN/src/data.cpp
+++ b/inference-engine/thirdparty/clDNN/src/data.cpp
@@ -19,7 +19,7 @@ primitive_type_id data::type_id() {
 }
 
 namespace {
-memory::ptr attach_or_copy_data(network_impl& network, memory::ptr mem) {
+memory::ptr attach_or_copy_data(network& network, memory::ptr mem) {
     auto& engine = network.get_engine();
     if (mem->is_allocated_by(engine))
         return mem;
@@ -32,7 +32,7 @@ memory::ptr attach_or_copy_data(network_impl& network, memory::ptr mem) {
 }
 }  // namespace
 
-data_node::typed_program_node(const std::shared_ptr<data> dprim, program_impl& prog)
+data_node::typed_program_node(const std::shared_ptr<data> dprim, program& prog)
     : parent(dprim, prog), mem(dprim->mem) {
     constant = true;
     can_share_buffer(false);
@@ -53,7 +53,7 @@ std::string data_inst::to_string(data_node const& node) {
     return primitive_description.str();
 }
 
-data_inst::typed_primitive_inst(network_impl& network, data_node const& node)
+data_inst::typed_primitive_inst(network& network, data_node const& node)
     : parent(network, node, attach_or_copy_data(network, node.get_attached_memory_ptr())) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
index 6ddc0373f5f..9b79cd81129 100644
--- a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
@@ -145,7 +145,7 @@ std::string deconvolution_inst::to_string(deconvolution_node const& node) {
     return primitive_description.str();
 }
 
-deconvolution_inst::typed_primitive_inst(network_impl& network, deconvolution_node const& node)
+deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node const& node)
     : parent(network, node) {
     auto stride = argument.stride;
 
diff --git a/inference-engine/thirdparty/clDNN/src/deformable_convolution.cpp b/inference-engine/thirdparty/clDNN/src/deformable_convolution.cpp
index 237ed6929f6..7066ca6825f 100644
--- a/inference-engine/thirdparty/clDNN/src/deformable_convolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/deformable_convolution.cpp
@@ -54,7 +54,7 @@ std::string deformable_conv_inst::to_string(deformable_conv_node const& node) {
     return primitive_description.str();
 }
 
-deformable_conv_inst::typed_primitive_inst(network_impl& network, deformable_conv_node const& node) : parent(network, node) {
+deformable_conv_inst::typed_primitive_inst(network& network, deformable_conv_node const& node) : parent(network, node) {
 }
 
 primitive_type_id deformable_interp::type_id() {
@@ -107,7 +107,7 @@ std::string deformable_interp_inst::to_string(deformable_interp_node const& node
     return primitive_description.str();
 }
 
-deformable_interp_inst::typed_primitive_inst(network_impl& network, deformable_interp_node const& node) : parent(network, node) {
+deformable_interp_inst::typed_primitive_inst(network& network, deformable_interp_node const& node) : parent(network, node) {
 }
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/depth_to_space.cpp b/inference-engine/thirdparty/clDNN/src/depth_to_space.cpp
index 09f16b6f7c6..8e329a818c8 100644
--- a/inference-engine/thirdparty/clDNN/src/depth_to_space.cpp
+++ b/inference-engine/thirdparty/clDNN/src/depth_to_space.cpp
@@ -73,7 +73,7 @@ std::string depth_to_space_inst::to_string(depth_to_space_node const& node) {
     return primitive_description.str();
 }
 
-depth_to_space_inst::typed_primitive_inst(network_impl& network, depth_to_space_node const& node)
+depth_to_space_inst::typed_primitive_inst(network& network, depth_to_space_node const& node)
     : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/detection_output.cpp b/inference-engine/thirdparty/clDNN/src/detection_output.cpp
index c4e0fa86e2b..fab8a936de6 100644
--- a/inference-engine/thirdparty/clDNN/src/detection_output.cpp
+++ b/inference-engine/thirdparty/clDNN/src/detection_output.cpp
@@ -4,7 +4,6 @@
 
 #include "detection_output_inst.h"
 #include "primitive_type_base.h"
-#include "network_impl.h"
 #include "cldnn/runtime/error_handler.hpp"
 #include "json_object.h"
 #include <string>
@@ -118,7 +117,7 @@ std::string detection_output_inst::to_string(detection_output_node const& node)
     return primitive_description.str();
 }
 
-detection_output_inst::typed_primitive_inst(network_impl& network, detection_output_node const& node)
+detection_output_inst::typed_primitive_inst(network& network, detection_output_node const& node)
     : parent(network, node) {
     auto location_layout = node.location().get_output_layout();
     auto confidence_layout = node.confidence().get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/eltwise.cpp b/inference-engine/thirdparty/clDNN/src/eltwise.cpp
index a7f544a3a75..a5986521060 100644
--- a/inference-engine/thirdparty/clDNN/src/eltwise.cpp
+++ b/inference-engine/thirdparty/clDNN/src/eltwise.cpp
@@ -194,7 +194,7 @@ std::string eltwise_inst::to_string(eltwise_node const& node) {
     return primitive_description.str();
 }
 
-eltwise_inst::typed_primitive_inst(network_impl& network, eltwise_node const& node) : parent(network, node) {
+eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) : parent(network, node) {
     check_inputs_count(node);
     // check for stride
     auto prim = node.get_primitive();
diff --git a/inference-engine/thirdparty/clDNN/src/embedding_bag.cpp b/inference-engine/thirdparty/clDNN/src/embedding_bag.cpp
index 84e5483dff0..a2a51a583f5 100644
--- a/inference-engine/thirdparty/clDNN/src/embedding_bag.cpp
+++ b/inference-engine/thirdparty/clDNN/src/embedding_bag.cpp
@@ -53,6 +53,6 @@ std::string embedding_bag_inst::to_string(embedding_bag_node const& node) {
     return primitive_description.str();
 }
 
-embedding_bag_inst::typed_primitive_inst(network_impl& network, embedding_bag_node const& node)
+embedding_bag_inst::typed_primitive_inst(network& network, embedding_bag_node const& node)
     : parent(network, node) {}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp b/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp
index fffa064d13b..8e7c067aa11 100644
--- a/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp
+++ b/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp
@@ -52,6 +52,6 @@ std::string extract_image_patches_inst::to_string(extract_image_patches_node con
     return primitive_description.str();
 }
 
-extract_image_patches_inst::typed_primitive_inst(network_impl& network, extract_image_patches_node const& node) : parent(network, node) {}
+extract_image_patches_inst::typed_primitive_inst(network& network, extract_image_patches_node const& node) : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/fully_connected.cpp b/inference-engine/thirdparty/clDNN/src/fully_connected.cpp
index 71130b19a18..c90cbf0afe3 100644
--- a/inference-engine/thirdparty/clDNN/src/fully_connected.cpp
+++ b/inference-engine/thirdparty/clDNN/src/fully_connected.cpp
@@ -128,7 +128,7 @@ std::string fully_connected_inst::to_string(fully_connected_node const& node) {
     return primitive_description.str();
 }
 
-fully_connected_inst::typed_primitive_inst(network_impl& network, fully_connected_node const& node)
+fully_connected_inst::typed_primitive_inst(network& network, fully_connected_node const& node)
     : parent(network, node) {
     auto input_layout = node.input().get_output_layout();
     auto output_layout = node.get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp b/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp
index 9d11b1ad4c5..99bf31885b7 100644
--- a/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp
+++ b/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp
@@ -250,7 +250,7 @@ std::string fused_conv_eltwise_inst::to_string(fused_conv_eltwise_node const& no
     return primitive_description.str();
 }
 
-fused_conv_eltwise_inst::typed_primitive_inst(network_impl& network, fused_conv_eltwise_node const& node)
+fused_conv_eltwise_inst::typed_primitive_inst(network& network, fused_conv_eltwise_node const& node)
     : parent(network, node) {
     auto stride = argument.conv.stride;
 
diff --git a/inference-engine/thirdparty/clDNN/src/gather.cpp b/inference-engine/thirdparty/clDNN/src/gather.cpp
index 5a264d2505b..a8f0d2f97e9 100644
--- a/inference-engine/thirdparty/clDNN/src/gather.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gather.cpp
@@ -49,6 +49,6 @@ std::string gather_inst::to_string(gather_node const& node) {
     return primitive_description.str();
 }
 
-gather_inst::typed_primitive_inst(network_impl& network, gather_node const& node) : parent(network, node) {}
+gather_inst::typed_primitive_inst(network& network, gather_node const& node) : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gather_elements.cpp b/inference-engine/thirdparty/clDNN/src/gather_elements.cpp
new file mode 100644
index 00000000000..74daa47115d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gather_elements.cpp
@@ -0,0 +1,62 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gather_elements_inst.h"
+
+#include "primitive_type_base.h"
+#include "cldnn/runtime/error_handler.hpp"
+#include "json_object.h"
+#include <string>
+
+namespace cldnn {
+primitive_type_id gather_elements::type_id() {
+    static primitive_type_base<gather_elements> instance;
+    return &instance;
+}
+
+layout gather_elements_inst::calc_output_layout(gather_elements_node const& node) {
+    auto op = node.get_primitive();
+
+    auto input_layout_origin = node.input(0).get_output_layout();
+    auto indices_layout_origin = node.input(1).get_output_layout();
+
+    auto input_layout = input_layout_origin.size.sizes(input_layout_origin.format);
+    auto indices_layout = indices_layout_origin.size.sizes(indices_layout_origin.format);
+
+    if (node.has_fused_primitives()) {
+        input_layout_origin.data_type = node.get_fused_output_layout().data_type;
+    }
+
+    auto output_type = indices_layout_origin.data_type;
+    auto output_format = op->output_format;
+    auto output_shape = op->output_shape;
+
+    // calculate initial output shape
+    return layout(output_type, output_format, output_shape);
+}
+
+std::string gather_elements_inst::to_string(gather_elements_node const& node) {
+    auto desc = node.get_primitive();
+    auto node_info = node.desc_to_json();
+    auto& input = node.input();
+
+    std::stringstream primitive_description;
+
+    json_composite gather_elements_info;
+    gather_elements_info.add("input id", input.id());
+    gather_elements_info.add("input shape", node.input(0).get_output_layout().size.to_string());
+    gather_elements_info.add("indices shape", node.input(1).get_output_layout().size.to_string());
+    gather_elements_info.add("output format", calc_output_layout(node).format);
+    gather_elements_info.add("output shape", calc_output_layout(node).size.to_string());
+    gather_elements_info.add("axis", desc->axis);
+
+    node_info->add("gather_elements info", gather_elements_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+gather_elements_inst::typed_primitive_inst(network& network, gather_elements_node const& node) : parent(network, node) {}
+
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gather_nd.cpp b/inference-engine/thirdparty/clDNN/src/gather_nd.cpp
index 998df5a76b2..84c283072c0 100644
--- a/inference-engine/thirdparty/clDNN/src/gather_nd.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gather_nd.cpp
@@ -97,6 +97,6 @@ std::string gather_nd_inst::to_string(gather_nd_node const& node) {
     return primitive_description.str();
 }
 
-gather_nd_inst::typed_primitive_inst(network_impl& network, gather_nd_node const& node) : parent(network, node) {}
+gather_nd_inst::typed_primitive_inst(network& network, gather_nd_node const& node) : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gather_tree.cpp b/inference-engine/thirdparty/clDNN/src/gather_tree.cpp
index 6610df3523c..d26bd44187f 100644
--- a/inference-engine/thirdparty/clDNN/src/gather_tree.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gather_tree.cpp
@@ -29,7 +29,7 @@ std::string gather_tree_inst::to_string(gather_tree_node const& node) {
     return primitive_description.str();
 }
 
-gather_tree_inst::typed_primitive_inst(network_impl& network, gather_tree_node const& node) : parent(network, node) {
+gather_tree_inst::typed_primitive_inst(network& network, gather_tree_node const& node) : parent(network, node) {
     auto input_layout = node.input().get_output_layout();
 
     const auto input_format = input_layout.format;
diff --git a/inference-engine/thirdparty/clDNN/src/gemm.cpp b/inference-engine/thirdparty/clDNN/src/gemm.cpp
index 2652116b90a..9c2cbeabf0c 100644
--- a/inference-engine/thirdparty/clDNN/src/gemm.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gemm.cpp
@@ -73,7 +73,7 @@ std::string gemm_inst::to_string(gemm_node const& node) {
     return primitive_description.str();
 }
 
-gemm_inst::typed_primitive_inst(network_impl& network, gemm_node const& node) : parent(network, node) {
+gemm_inst::typed_primitive_inst(network& network, gemm_node const& node) : parent(network, node) {
     auto input0_layout = node.input(0).get_output_layout();
     auto input1_layout = node.input(1).get_output_layout();
     bool transpose_input0 = node.get_primitive()->transpose_input0;
diff --git a/inference-engine/thirdparty/clDNN/src/generic_layer.cpp b/inference-engine/thirdparty/clDNN/src/generic_layer.cpp
index b1477872d92..faffd92da8e 100644
--- a/inference-engine/thirdparty/clDNN/src/generic_layer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/generic_layer.cpp
@@ -19,12 +19,12 @@ primitive_type_id generic_layer::type_id() {
     return &instance;
 }
 
-generic_layer_node::typed_program_node(const std::shared_ptr<generic_layer> prim, program_impl& prog)
+generic_layer_node::typed_program_node(const std::shared_ptr<generic_layer> prim, program& prog)
     : parent(prim, prog) {
     can_share_buffer(false);
 }
 
-generic_layer_inst::typed_primitive_inst(network_impl& network, generic_layer_node const& node)
+generic_layer_inst::typed_primitive_inst(network& network, generic_layer_node const& node)
     : parent(network, node) {}
 
 std::string generic_layer_inst::to_string(generic_layer_node const& node) {
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp
index 4a36fb44f38..cebf249cd1b 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp
@@ -23,7 +23,7 @@ If not than required reorder is added to the network.
 /*
 Add a reorder in between node and usr
 */
-void add_required_reorders::add_reorder(program_impl& p, program_node* node, program_node* usr) {
+void add_required_reorders::add_reorder(program& p, program_node* node, program_node* usr) {
     layout reorder_layout = node->get_output_layout();
     reorder_layout.format = usr->get_output_layout().format;
     reorder_layout.data_type = usr->get_output_layout().data_type;
@@ -31,7 +31,7 @@ void add_required_reorders::add_reorder(program_impl& p, program_node* node, pro
     auto new_reorder = std::make_shared<reorder>(node->id() + "_reorder_" + usr->id(), node->id(), reorder_layout);
     auto& new_reorder_node = p.get_or_create(new_reorder);
 
-    // ToDo: add a method to program_impl class which adds an intermediate node given a node and its user
+    // ToDo: add a method to program class which adds an intermediate node given a node and its user
     auto it = std::find(usr->get_dependencies().begin(), usr->get_dependencies().end(), node);
     if (it == usr->get_dependencies().end()) {
         throw std::runtime_error("Inconcistency in topology description: user of a node is not present among its dependecies.");
@@ -43,7 +43,7 @@ void add_required_reorders::add_reorder(program_impl& p, program_node* node, pro
     p.add_intermediate(new_reorder_node, *usr, idx);
 }
 
-void add_required_reorders::run(program_impl& p) {
+void add_required_reorders::run(program& p) {
     auto usr_itr = p.get_processing_order().begin();
     while (usr_itr != p.get_processing_order().end()) {
         auto& usr = *usr_itr++;
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/basic_memory_dependencies.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/basic_memory_dependencies.cpp
index 1dd8b0af12a..37acd2f86e0 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/basic_memory_dependencies.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/basic_memory_dependencies.cpp
@@ -7,7 +7,7 @@
 #include "pass_manager.h"
 #include "program_node.h"
 #include "layout_optimizer.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "program_helpers.h"
 #include "runtime/cldnn_itt.hpp"
 #include <vector>
@@ -18,7 +18,7 @@
 
 using namespace cldnn;
 
-void basic_memory_dependencies::run(program_impl& p) {
+void basic_memory_dependencies::run(program& p) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "CLDNN::pass::BasicMemoryDependencies");
     auto itr = p.get_processing_order().begin();
     std::vector<primitive_id> past_outputs;
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp
index 00f74be8593..73aa0cc34f1 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp
@@ -7,12 +7,12 @@
 #include "pass_manager.h"
 #include "prior_box_inst.h"
 #include "program_node.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include <memory>
 
 using namespace cldnn;
 
-void calculate_prior_boxes::run(program_impl& p) {
+void calculate_prior_boxes::run(program& p) {
     auto itr = p.get_processing_order().begin();
     while (itr != p.get_processing_order().end()) {
         auto& node = (*itr++);
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp
index 477c1508fed..e4d0c16c7a3 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp
@@ -21,7 +21,7 @@
 
 using namespace cldnn;
 
-void compile_graph::run(program_impl& p) {
+void compile_graph::run(program& p) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "CLDNN::pass::CompileGraph");
     size_t order_idx = 0;
     for (auto& node : p.get_processing_order()) {
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/concat_input_order.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/concat_input_order.cpp
index 9acd92fc726..6faabb90988 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/concat_input_order.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/concat_input_order.cpp
@@ -10,7 +10,7 @@
 #include "fully_connected_inst.h"
 #include "data_inst.h"
 #include "cldnn/runtime/memory.hpp"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 
 #include <vector>
 #include <tuple>
@@ -111,7 +111,7 @@ void shuffle_features(program_node& node, const std::vector<shuffle_range>& rang
 
 }  // namespace
 
-void concat_input_order::run(program_impl& p) {
+void concat_input_order::run(program& p) {
     for (auto node : p.get_processing_order()) {
         // Check that optimization can be performed:
         // 1. Not an output
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp
index b0c28bd93a6..dd8b3cdfe87 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp
@@ -15,7 +15,7 @@
 
 using namespace cldnn;
 
-void eltwise_remove_stride::conv_stride_extend(program_impl& p, program_node& node, cldnn::tensor& tensor) {
+void eltwise_remove_stride::conv_stride_extend(program& p, program_node& node, cldnn::tensor& tensor) {
     // make sure we have only 1 user
     if (node.get_users().size() > 1)
         return;
@@ -51,7 +51,7 @@ void eltwise_remove_stride::conv_stride_extend(program_impl& p, program_node& no
     }
 }
 
-void eltwise_remove_stride::run(program_impl& p) {
+void eltwise_remove_stride::run(program& p) {
     for (auto& node : p.get_processing_order()) {
         if (node->is_type<eltwise>()) {
             // TODO: make fp16 work
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_shrinking.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_shrinking.cpp
index 9b52e8740ea..b5f513ed968 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_shrinking.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_shrinking.cpp
@@ -8,7 +8,7 @@
 
 using namespace cldnn;
 
-void eltwise_shrinking::run(program_impl& p) {
+void eltwise_shrinking::run(program& p) {
     std::vector<program_node*> convs_to_shrink;
 
     for (auto& node : p.get_processing_order()) {
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp
index dcd198568d6..24d45ecca78 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp
@@ -38,7 +38,7 @@ std::string get_id_string(size_t i) {
     return ss.str();
 }
 
-void graph_initializations::handle_split_node(program_impl& p, split_node& node) {
+void graph_initializations::handle_split_node(program& p, split_node& node) {
     if (!node.get_users().empty()) {
         throw std::logic_error("Split layer cannot be used directly! Please use split output \"" + node.id() +
                                ":<split_output_id>\"!");
@@ -97,7 +97,7 @@ void graph_initializations::handle_split_node(program_impl& p, split_node& node)
     p.nodes_map.erase(node.id());
 }
 
-void graph_initializations::handle_lstm_node(program_impl& p, lstm_node& node) {
+void graph_initializations::handle_lstm_node(program& p, lstm_node& node) {
     // lstm_node& lstm_node = node->as<lstm>();
     bool initial_hidden_term = node.initial_hidden_term();
     bool initial_cell_term = node.initial_cell_term();
@@ -329,7 +329,7 @@ void graph_initializations::handle_lstm_node(program_impl& p, lstm_node& node) {
     p.nodes_map.erase(node.id());
 }
 
-void graph_initializations::handle_dynamic_lstm_node(program_impl& p, lstm_dynamic_node& node) {
+void graph_initializations::handle_dynamic_lstm_node(program& p, lstm_dynamic_node& node) {
     // [0] Prepare helper temp variables.
     // auto& lstm_dynamic_node = node->as<lstm_dynamic>();
     auto& node_id = node.id();
@@ -402,7 +402,7 @@ void graph_initializations::handle_dynamic_lstm_node(program_impl& p, lstm_dynam
     // we dont have to set output since it will be done in next graph_opts step
 }
 
-void graph_initializations::set_outputs(program_impl& p) {
+void graph_initializations::set_outputs(program& p) {
     auto outputs_option = p.get_options().get<build_option_type::outputs>();
     if (!outputs_option->outputs.empty()) {
         for (auto const& output : outputs_option->outputs) {
@@ -419,7 +419,7 @@ void graph_initializations::set_outputs(program_impl& p) {
     }
 }
 
-void graph_initializations::run(program_impl& p) {
+void graph_initializations::run(program& p) {
     auto itr = p.nodes_map.begin();
     while (itr != p.nodes_map.end()) {
         auto node_itr = itr++;
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_input_padding.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_input_padding.cpp
index 84496690536..4b35d4d77f2 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_input_padding.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_input_padding.cpp
@@ -17,7 +17,7 @@ using namespace cldnn;
 // Symmetric padding can be done using input_offset parameter for primitives.
 // Asymmetric padding can be done by adding border primitive before them. It's safe way without modyfing optimized
 // kernels.
-void handle_input_padding::run(program_impl& p) {
+void handle_input_padding::run(program& p) {
     for (auto& node : p.get_processing_order()) {
         if (!node->is_type<convolution>()) {
             continue;
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_reshape.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_reshape.cpp
index 54423a6d758..288f52de7a9 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_reshape.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_reshape.cpp
@@ -22,7 +22,7 @@ using namespace cldnn;
 // primitives with single user
 //- in case of reshape->reorder sequence, the additional reorder before reshape will be added,
 //  if last reorder does not contain padding or mean subtract, it will be removed later in the graph
-void handle_reshape::run(program_impl& p) {
+void handle_reshape::run(program& p) {
     // Remove reshapes that don't change the layout of output
     auto node_itr = p.get_processing_order().begin();
     while (node_itr != p.get_processing_order().end()) {
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/mark_nodes.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/mark_nodes.cpp
index c7b74650a8a..9621e31b3a0 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/mark_nodes.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/mark_nodes.cpp
@@ -5,11 +5,11 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include "pass_manager.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 
 using namespace cldnn;
 
-void mark_nodes::run(program_impl& p) {
+void mark_nodes::run(program& p) {
     for (const auto& node : p.get_processing_order()) {
         p.mark_if_constant(*node);
         p.mark_if_data_flow(*node);
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/oooq_memory_dependencies.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/oooq_memory_dependencies.cpp
index ccf9903701a..248b9b7e57a 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/oooq_memory_dependencies.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/oooq_memory_dependencies.cpp
@@ -7,7 +7,7 @@
 #include "pass_manager.h"
 #include "program_node.h"
 #include "layout_optimizer.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "program_helpers.h"
 #include "runtime/cldnn_itt.hpp"
 #include <vector>
@@ -69,7 +69,7 @@ protected:
 
 }  // namespace
 
-void oooq_memory_dependencies::run(program_impl& p) {
+void oooq_memory_dependencies::run(program& p) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "CLDNN::pass::OooqMemoryDependencies");
     // For oooq memory dependencies nodes A and B can't share memory if
     // processing_num(A) < processing_num(B) and there is no path from A to B.
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp
index bb80f2ed2ed..f30e3720ddb 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp
@@ -18,14 +18,14 @@ If not than required reorder is added to the network.
 /*
 Add a reorder in between node and usr with reorder_layout as layout
 */
-program_node& post_input_reorder::add_reorder(program_impl& p,
+program_node& post_input_reorder::add_reorder(program& p,
                                               program_node* node,
                                               program_node* usr,
                                               const layout& reorder_layout) {
     auto new_reorder = std::make_shared<reorder>(node->id() + "_reorder_" + usr->id(), node->id(), reorder_layout);
     auto& new_reorder_node = p.get_or_create(new_reorder);
 
-    // ToDo: add a method to program_impl class which adds an intermediate node given a node and its user
+    // ToDo: add a method to program class which adds an intermediate node given a node and its user
     auto it = std::find(usr->get_dependencies().begin(), usr->get_dependencies().end(), node);
     if (it == usr->get_dependencies().end()) {
         throw std::runtime_error("Inconcistency in topology description: user of a node is not present among its dependecies.");
@@ -38,7 +38,7 @@ program_node& post_input_reorder::add_reorder(program_impl& p,
     return new_reorder_node;
 }
 
-void post_input_reorder::run(program_impl& p) {
+void post_input_reorder::run(program& p) {
     auto node_itr = p.get_processing_order().begin();
     while (node_itr != p.get_processing_order().end()) {
         auto& node = *node_itr++;
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
index bc32bc1fdfa..2072e2f74cf 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
@@ -34,7 +34,7 @@ post_optimize_weights::weights_bias_offset post_optimize_weights::get_weights_bi
 
 // function which prepares given primitive for weights optimization
 template<typename T>
-void post_optimize_weights::optimize_weights(T& node, program_impl& p) {
+void post_optimize_weights::optimize_weights(T& node, program& p) {
     auto offsets = get_weights_bias_offset(node);
     auto impl = node.get_selected_impl();
     auto output_layout = node.get_output_layout();
@@ -69,7 +69,7 @@ void post_optimize_weights::optimize_weights(T& node, program_impl& p) {
     node.set_output_layout(output_layout, false);
 }
 
-void post_optimize_weights::run(program_impl& p) {
+void post_optimize_weights::run(program& p) {
     for (auto& node : p.get_processing_order()) {
         if (node->type() == convolution::type_id()) {
             optimize_weights(node->as<convolution>(), p);
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_optimize_bias.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_optimize_bias.cpp
index 9314d6ad873..53538750860 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_optimize_bias.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_optimize_bias.cpp
@@ -7,7 +7,7 @@
 #include "pass_manager.h"
 #include "program_node.h"
 #include "layout_optimizer.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "program_helpers.h"
 #include "fully_connected_inst.h"
 
@@ -15,11 +15,11 @@ using namespace cldnn;
 
 pre_optimize_bias::pre_optimize_bias(reorder_factory& rf_ref) : base_pass("pre_optimize_bias"), _rf(rf_ref) {}
 
-void pre_optimize_bias::run(program_impl& p) { run(p, _rf); }
+void pre_optimize_bias::run(program& p) { run(p, _rf); }
 
 // function which prepares given primitive for weights optimization
 template <typename T>
-void pre_optimize_bias::optimize_bias(T& node, reorder_factory& rf, program_impl& p) {
+void pre_optimize_bias::optimize_bias(T& node, reorder_factory& rf, program& p) {
     size_t weights_offset = node.get_primitive()->input.size();
     size_t bias_offset = weights_offset + program_helpers::wrap_if_single(node.get_primitive()->weights).size();
     for (size_t i = bias_offset; i < node.get_dependencies().size() - node.get_fused_inputs_count(); ++i) {
@@ -38,15 +38,15 @@ void pre_optimize_bias::optimize_bias(T& node, reorder_factory& rf, program_impl
 }
 template void pre_optimize_bias::optimize_bias<convolution_node>(convolution_node& node,
                                                                  reorder_factory& rf,
-                                                                 program_impl& p);
+                                                                 program& p);
 template void pre_optimize_bias::optimize_bias<deconvolution_node>(deconvolution_node& node,
                                                                    reorder_factory& rf,
-                                                                   program_impl& p);
+                                                                   program& p);
 template void pre_optimize_bias::optimize_bias<fully_connected_node>(fully_connected_node& node,
                                                                      reorder_factory& rf,
-                                                                     program_impl& p);
+                                                                     program& p);
 
-void pre_optimize_bias::run(program_impl& p, reorder_factory& rf) {
+void pre_optimize_bias::run(program& p, reorder_factory& rf) {
     for (auto& prim : p.get_processing_order()) {
         if (prim->type() == convolution::type_id()) {
             optimize_bias(prim->as<convolution>(), rf, p);
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp
index ccb00d3726e..db2db73e4d5 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp
@@ -18,7 +18,7 @@
 #include <utility>
 #include "cldnn/runtime/error_handler.hpp"
 
-void pre_replace_deconv::run(program_impl& p) {
+void pre_replace_deconv::run(program& p) {
     bool update_processing_order = false;
 
     auto& stream = p.get_stream();
@@ -34,19 +34,13 @@ void pre_replace_deconv::run(program_impl& p) {
 
             auto& deconv_node = node->as<deconvolution>();
             auto& weights_node = deconv_node.weights();
-            auto deconv_prim = node->as<deconvolution>().typed_desc();
+            auto deconv_prim = deconv_node.typed_desc();
             tensor filter_size = weights_node.get_output_layout().size;
-            auto weights = deconv_prim->weights;
-
-            std::vector<primitive_id> weights_vec;
-            for (auto& weights_id : weights)
-                weights_vec.push_back(weights_id);
-
-            for (auto& weights_id : weights_vec) {
-                auto weights_iter = p.nodes_map.find(weights_id);
-                if (weights_iter == p.nodes_map.end())
-                    continue;
-            }
+            auto weights_nodes_id = deconv_prim->weights;
+            auto biases_nodes_id = deconv_prim->bias;
+            auto& input_node = deconv_node.get_dependency(0);
+            const primitive_id deconv_node_id = deconv_node.id();
+            const primitive_id& input_node_id = input_node.id();
 
             // limit optimization to stride = 1
             // iterators shouldn't be used here because of incorrect iterator functionality in mutable_array_ref<>
@@ -55,8 +49,6 @@ void pre_replace_deconv::run(program_impl& p) {
                 unit_stride &= (deconv_prim->stride.spatial[i] == 1);
             }
             if (unit_stride) {
-                primitive_id deconv_id = node->id();
-                auto& input_node = node->get_dependency(0);
                 auto groups = deconv_node.get_groups();
 
                 bool perform_opt = false;
@@ -64,155 +56,132 @@ void pre_replace_deconv::run(program_impl& p) {
                 perform_opt |= cldnn::format::dimension(input_node.get_output_layout().format) == 4 &&
                                (input_node.get_output_layout().data_type == data_types::f32 || input_node.get_output_layout().data_type == data_types::f16) &&
                                !((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_node.get_output_layout().format == format::b_fs_yx_fsv16) &&
-                                _lo.is_format_optimized(node->as<deconvolution>(), format::b_fs_yx_fsv16));
+                                _lo.is_format_optimized(deconv_node, format::b_fs_yx_fsv16));
                 // int8/uint8 input
                 perform_opt |= (input_node.get_output_layout().data_type == data_types::i8 || input_node.get_output_layout().data_type == data_types::u8);
 
                 if (!perform_opt)
                     continue;
 
-                primitive_id input_id = deconv_prim->input[0];
 
                 // setting convolution parameters based on deconvolution params
                 auto stride = deconv_prim->stride;
-                auto biases = deconv_prim->bias;
-                std::vector<primitive_id> bias_vec;
-                for (auto& bias_id : biases)
-                    bias_vec.push_back(bias_id);
                 auto input_offset = deconv_prim->input_offset;
                 auto output_padding = deconv_prim->output_padding;
                 auto grouped_weights_shape = deconv_prim->grouped_weights_shape;
 
                 // remove deconvolution node and its connections to weights and biases, rename it and move to the optimized
                 // list
-                p.remove_connection(node->get_dependency(0), *node);
-                for (auto& weights_id : weights_vec) {
+                p.remove_connection(input_node, deconv_node);
+                std::vector<std::shared_ptr<program_node>> weight_connections;
+                for (auto& weights_id : weights_nodes_id) {
                     auto weights_iter = p.nodes_map.find(weights_id);
                     if (weights_iter == p.nodes_map.end())
                         continue;
 
                     auto weights_node_ptr = weights_iter->second;
-                    p.remove_connection(*weights_node_ptr, *node);
+                    weight_connections.push_back(weights_node_ptr);
+                    p.remove_connection(*weights_node_ptr, deconv_node);
                 }
 
                 input_offset.spatial[0] = std::abs(input_offset.spatial[0]) - (filter_size.spatial[0] - 1);
                 input_offset.spatial[1] = std::abs(input_offset.spatial[1]) - (filter_size.spatial[1] - 1);
                 input_offset.spatial[2] = std::abs(input_offset.spatial[2]) - (filter_size.spatial[2] - 1);
 
-                if (!bias_vec.empty()) {
-                    for (auto& bias_id : bias_vec) {
-                        auto bias_iter = p.nodes_map.find(bias_id);
-                        if (bias_iter == p.nodes_map.end())
-                            continue;
+                std::vector<std::shared_ptr<program_node>> bias_connections;
+                for (auto& bias_id : biases_nodes_id) {
+                    auto bias_iter = p.nodes_map.find(bias_id);
+                    if (bias_iter == p.nodes_map.end())
+                        continue;
 
-                        auto bias_id_node_ptr = bias_iter->second;
-                        p.remove_connection(*bias_id_node_ptr, *node);
-                    }
+                    auto bias_id_node_ptr = bias_iter->second;
+                    bias_connections.push_back(bias_id_node_ptr);
+                    p.remove_connection(*bias_id_node_ptr, deconv_node);
                 }
-                auto rename_id = deconv_id + "_tmp";
-                auto was_output = node->is_output();
+                auto was_output = deconv_node.is_output();
                 if (was_output) {
-                    node->set_output(false);
+                    deconv_node.set_output(false);
                     auto& outputs = p.get_outputs();
                     outputs.erase(std::remove(outputs.begin(), outputs.end(), node.get()), outputs.end());
                 }
-                p.rename(*node, rename_id);
+                auto rename_id = deconv_node_id + "_tmp";
+                p.rename(deconv_node, rename_id);
 
                 // create convolution primitive
-                if (!biases.empty()) {
-                    auto conv_prim = std::make_shared<convolution>(deconv_id,
-                                                                   input_id,
-                                                                   weights_vec,
-                                                                   bias_vec,
-                                                                   groups,
-                                                                   stride,
-                                                                   input_offset,
-                                                                   tensor{ 1, 1, 1, 1 },
-                                                                   grouped_weights_shape,
-                                                                   output_padding);
-                    p.get_or_create(conv_prim);
+                std::shared_ptr<convolution> conv_prim;
+                if (!biases_nodes_id.empty()) {
+                    conv_prim = std::make_shared<convolution>(deconv_node_id,
+                                                              input_node_id,
+                                                              weights_nodes_id,
+                                                              biases_nodes_id,
+                                                              groups,
+                                                              stride,
+                                                              input_offset,
+                                                              tensor{ 1, 1, 1, 1 },
+                                                              grouped_weights_shape,
+                                                              output_padding);
                 } else {
-                    auto conv_prim = std::make_shared<convolution>(deconv_id,
-                                                                   input_id,
-                                                                   weights_vec,
-                                                                   groups,
-                                                                   stride,
-                                                                   input_offset,
-                                                                   tensor{ 1, 1, 1, 1 },
-                                                                   grouped_weights_shape,
-                                                                   output_padding);
-                    p.get_or_create(conv_prim);
+                    conv_prim = std::make_shared<convolution>(deconv_node_id,
+                                                              input_node_id,
+                                                              weights_nodes_id,
+                                                              groups,
+                                                              stride,
+                                                              input_offset,
+                                                              tensor{ 1, 1, 1, 1 },
+                                                              grouped_weights_shape,
+                                                              output_padding);
                 }
+                program_node& new_node = p.get_or_create(conv_prim);
 
-                auto conv_node_itr = p.nodes_map.find(deconv_id);
-                if (conv_node_itr == p.nodes_map.end())
-                    continue;
-
-                auto conv_node_ptr = conv_node_itr->second;
-                auto conv_node = &conv_node_ptr->as<convolution>();
-                conv_node->set_transposed(true);
+                auto& conv_node = new_node.as<convolution>();
+                conv_node.set_transposed(true);
 
                 // add connections input->convolution, weights->convolution and bias->convolution
-                p.add_connection(input_node, *conv_node_ptr);
+                p.add_connection(input_node, conv_node);
 
-                for (auto& weights_id : weights_vec) {
-                    auto weights_node_itr = p.nodes_map.find(weights_id);
-                    if (weights_node_itr == p.nodes_map.end())
-                        continue;
-
-                    auto weights_node_ptr = weights_node_itr->second;
-                    p.add_connection(*weights_node_ptr, *conv_node_ptr);
+                for (auto& weight_node : weight_connections) {
+                    p.add_connection(*weight_node, conv_node);
                 }
 
-                if (!bias_vec.empty()) {
-                    for (auto& bias_id : bias_vec) {
-                        auto bias_id_node_itr = p.nodes_map.find(bias_id);
-                        if (bias_id_node_itr == p.nodes_map.end())
-                            continue;
-
-                        auto bias_id_node_ptr = bias_id_node_itr->second;
-                        p.add_connection(*bias_id_node_ptr, *conv_node_ptr);
-                    }
+                for (auto& bias_node : bias_connections) {
+                    p.add_connection(*bias_node, conv_node);
                 }
 
                 auto deconv_node_itr = p.nodes_map.find(rename_id);
                 if (deconv_node_itr != p.nodes_map.end()) {
                     auto deconv_node_ptr = deconv_node_itr->second;
-                    p.replace_all_usages(*deconv_node_ptr, *conv_node_ptr);
+                    p.replace_all_usages(*deconv_node_ptr, conv_node);
                     p.optimized_out.push_back(rename_id);
                     p.nodes_map.erase(rename_id);
                 }
 
                 if (was_output) {
-                    conv_node->set_output(true);
-                    p.get_outputs().push_back(conv_node);
+                    conv_node.set_output(true);
+                    p.get_outputs().push_back(&conv_node);
                 }
 
-                p.mark_if_data_flow(*conv_node);
-                conv_node->recalc_output_layout(true);
+                p.mark_if_data_flow(conv_node);
+                conv_node.recalc_output_layout(true);
 
                 update_processing_order = true;
             // current optimization only available for specific deconvolution parameters
-            } else if (node->is_output() == false &&
-               node->get_output_layout().size.feature[0] == 1 &&
+            } else if (deconv_node.is_output() == false &&
+               deconv_node.get_output_layout().size.feature[0] == 1 &&
                deconv_prim->stride.spatial[0] == 2 && deconv_prim->stride.spatial[1] == 2 &&
                filter_size.spatial[0] == 9 && filter_size.spatial[1] == 9 &&
                deconv_prim->input_offset.spatial[0] == -4 && deconv_prim->input_offset.spatial[1] == -4 &&
-               weights_vec.size() == 1 && deconv_prim->bias.size() == 1 &&
-               node->get_dependency(0).get_output_layout().format == format::bfyx) {
-                primitive_id deconv_id = node->id();
-                auto& input_node = node->get_dependency(0);
-                primitive_id input_id = deconv_prim->input[0];
+               weights_nodes_id.size() == 1 && biases_nodes_id.size() == 1 &&
+               input_node.get_output_layout().format == format::bfyx) {
+                const auto scale_factor = deconv_prim->stride.spatial[0];
 
-                auto scale_factor = deconv_prim->stride.spatial[0];
+                const auto& weight_node_id = weights_nodes_id.front();
+                auto weights_node_ptr = p.nodes_map.find(weight_node_id)->second;
+                const auto& weights_layout = weights_node_ptr->get_output_layout();
+                const auto& weights_data_type = weights_layout.data_type;
 
-                auto cur_weights_node_ptr = p.nodes_map.find(weights_vec[0])->second;
-                auto weights_layout = cur_weights_node_ptr->get_output_layout();
-                auto weights_data_type = weights_layout.data_type;
-
-                auto biases = deconv_prim->bias[0];
-                auto bias_id_node_ptr = p.nodes_map.find(biases)->second;
-                auto bias_data_type = bias_id_node_ptr->get_output_layout().data_type;
+                const auto& bias_node_id = biases_nodes_id.front();
+                auto bias_id_node_ptr = p.nodes_map.find(bias_node_id)->second;
+                const auto& bias_data_type = bias_id_node_ptr->get_output_layout().data_type;
 
                 // enable only for fp32 and fp16
                 if (weights_data_type != data_types::f16 &&
@@ -229,14 +198,13 @@ void pre_replace_deconv::run(program_impl& p) {
 
                 // remove deconvolution node and its connections to weights and biases,
                 // rename it and move to the optimized list
-                p.remove_connection(node->get_dependency(0), *node);
+                p.remove_connection(input_node, deconv_node);
 
-                auto weights_node_ptr = p.nodes_map.find(weights_vec[0])->second;
-                p.remove_connection(*weights_node_ptr, *node);
-                p.remove_connection(*bias_id_node_ptr, *node);
+                p.remove_connection(*weights_node_ptr, deconv_node);
+                p.remove_connection(*bias_id_node_ptr, deconv_node);
 
-                auto rename_id = deconv_id + "_tmp";
-                p.rename(*node, rename_id);
+                auto rename_id = deconv_node_id + "_tmp";
+                p.rename(deconv_node, rename_id);
 
                 // reshape weights
                 int pixel_shuffle_size = scale_factor * scale_factor;
@@ -244,17 +212,18 @@ void pre_replace_deconv::run(program_impl& p) {
                 tensor target_weights_size = { pixel_shuffle_size, filter_size.feature[0], kernel_size, kernel_size };
                 auto target_weights_layout = layout{ weights_layout.data_type, weights_layout.format, target_weights_size };
 
+                const primitive_id weight_replace_node_id = weight_node_id + "_conv_rpl";
                 {
                      memory::ptr data_to_allocate = p.get_engine().allocate_memory(target_weights_layout);
 
                      std::vector<float> weights_vec_float;
 
                      if (weights_data_type == data_types::f16) {
-                         mem_lock<half_t> src{ cur_weights_node_ptr->as<data>().get_attached_memory_ptr(), stream };
+                         mem_lock<half_t> src{ weights_node_ptr->as<data>().get_attached_memory_ptr(), stream };
                          for (uint32_t i = 0; i < weights_layout.size.count(); i++)
                              weights_vec_float.push_back(static_cast<float>(src.data()[i]));
                      } else {
-                         mem_lock<float> src{ cur_weights_node_ptr->as<data>().get_attached_memory_ptr(), stream };
+                         mem_lock<float> src{ weights_node_ptr->as<data>().get_attached_memory_ptr(), stream };
                          for (uint32_t i = 0; i < weights_layout.size.count(); i++)
                              weights_vec_float.push_back(src.data()[i]);
                      }
@@ -278,12 +247,36 @@ void pre_replace_deconv::run(program_impl& p) {
                          throw std::logic_error("Not supported data type.");
                      }
 
-                     auto data_node_weights_replace = std::make_shared<data>(weights_vec[0] + "_conv_rpl", data_to_allocate);
-                     p.get_or_create(data_node_weights_replace);
-                     auto data_node_weights_replace_node_ptr = p.nodes_map.find(weights_vec[0] + "_conv_rpl")->second;
-                     auto& data_node = data_node_weights_replace_node_ptr->as<data>();
+                     auto data_node_weights_replace = std::make_shared<data>(weight_replace_node_id, data_to_allocate);
+                     program_node& weights_replace_node = p.get_or_create(data_node_weights_replace);
+                     auto& data_node = weights_replace_node.as<data>();
                      data_node.set_output_layout(target_weights_layout, false);
                 }
+
+                auto deconv_id_conv = deconv_node_id + "_conv";
+
+                // create convolution primitive
+                auto conv_prim = std::make_shared<convolution>(deconv_id_conv,
+                                                               input_node_id,
+                                                               std::vector<primitive_id>{ weight_replace_node_id },
+                                                               stride,
+                                                               input_offset,
+                                                               tensor{ 1, 1, 1, 1 },
+                                                               grouped_weights_shape,
+                                                               output_padding);
+                program_node& created_node = p.get_or_create(conv_prim);
+
+                auto& conv_node = created_node.as<convolution>();
+
+                // add connections input->convolution, weights->convolution and bias->convolution
+                p.add_connection(input_node, conv_node);
+
+                {
+                    auto weights_node_conv_rpl_ptr = p.nodes_map.find(weight_replace_node_id)->second;
+                    p.add_connection(*weights_node_conv_rpl_ptr, conv_node);
+                    p.inputs.push_back(weights_node_conv_rpl_ptr.get());
+                }
+
                 float bias = 0;
 
                 if (bias_data_type == data_types::f16) {
@@ -293,52 +286,22 @@ void pre_replace_deconv::run(program_impl& p) {
                     mem_lock<float> src{ bias_id_node_ptr->as<data>().get_attached_memory_ptr(), stream };
                     bias = src.data()[0];
                 }
+                auto pixel_shuffle_prim = std::make_shared<depth_to_space>(deconv_node_id, deconv_id_conv, 2, depth_to_space_mode::blocks_first);
 
-                auto deconv_id_conv = deconv_id + "_conv";
-
-                // create convolution primitive
-                auto conv_prim = std::make_shared<convolution>(deconv_id_conv,
-                    input_id,
-                    std::vector<primitive_id>{ weights_vec[0] + "_conv_rpl" },
-                    stride,
-                    input_offset,
-                    tensor{ 1, 1, 1, 1 },
-                    grouped_weights_shape,
-                    output_padding);
-                p.get_or_create(conv_prim);
-
-                auto conv_node_itr = p.nodes_map.find(deconv_id_conv);
-                if (conv_node_itr == p.nodes_map.end()) continue;
-
-                auto conv_node_ptr = conv_node_itr->second;
-                auto conv_node = &conv_node_ptr->as<convolution>();
-
-                // add connections input->convolution, weights->convolution and bias->convolution
-                p.add_connection(input_node, *conv_node_ptr);
-
-                {
-                    auto weights_node_conv_rpl_ptr = p.nodes_map.find(weights_vec[0] + "_conv_rpl")->second;
-                    p.add_connection(*weights_node_conv_rpl_ptr, *conv_node_ptr);
-                    p.inputs.push_back(weights_node_conv_rpl_ptr.get());
-                }
-
-                auto pixel_shuffle_prim = std::make_shared<depth_to_space>(deconv_id, deconv_id_conv, 2, depth_to_space_mode::blocks_first);
-
-                p.get_or_create(pixel_shuffle_prim);
-                auto pixel_shuffle_node_ptr = p.nodes_map.find(deconv_id)->second;
-                pixel_shuffle_node_ptr->add_fused_activation(activation_func::linear, { 1, bias });
+                program_node& pixel_shuffle_node = p.get_or_create(pixel_shuffle_prim);
+                pixel_shuffle_node.add_fused_activation(activation_func::linear, { 1, bias });
 
                 // add connections input->convolution, weights->convolution
-                p.add_connection(*conv_node_ptr, *pixel_shuffle_node_ptr);
+                p.add_connection(conv_node, pixel_shuffle_node);
 
                 auto deconv_node_ptr = p.nodes_map.find(rename_id);
                 if (deconv_node_ptr != p.nodes_map.end()) {
-                    p.replace_all_usages(*deconv_node_ptr->second, *pixel_shuffle_node_ptr);
+                    p.replace_all_usages(*deconv_node_ptr->second, pixel_shuffle_node);
                     p.optimized_out.push_back(rename_id);
                     p.nodes_map.erase(rename_id);
                 }
-                p.mark_if_data_flow(*conv_node);
-                conv_node->recalc_output_layout(true);
+                p.mark_if_data_flow(conv_node);
+                conv_node.recalc_output_layout(true);
 
                 update_processing_order = true;
             }
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp
index c57e72ad5f4..29f653e55f2 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp
@@ -240,7 +240,7 @@ void concat_in_place_optimization::optimize_cascade(concatenation_node& node, st
 }  // namespace
 
 // ToDo remove friendship relation from  program_node
-void prepare_buffer_fusing::run(program_impl& p) {
+void prepare_buffer_fusing::run(program& p) {
     bool is_debug = p.get_options().get<build_option_type::debug>()->enabled();
     /*
     We need to take care of proper ordering by types.
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp
index 6108607dcaf..d6bb80a8364 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp
@@ -13,7 +13,7 @@
 
 using namespace cldnn;
 
-void prepare_padding::run(program_impl& p) {
+void prepare_padding::run(program& p) {
     if (output_size_handling_enabled) {
         // Prepare upper padding for primitives that support output_size parameter.
         for (const auto& node : p.get_processing_order()) {
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
index ffabb96f2e4..d58593acde8 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
@@ -32,6 +32,7 @@
 #include "space_to_depth_inst.h"
 #include "gather_inst.h"
 #include "gather_nd_inst.h"
+#include "gather_elements_inst.h"
 #include "scatter_update_inst.h"
 #include "scatter_nd_update_inst.h"
 #include "scatter_elements_update_inst.h"
@@ -52,7 +53,7 @@
 #include <deque>
 #include "cldnn/runtime/error_handler.hpp"
 
-void prepare_primitive_fusing::run(program_impl& p) {
+void prepare_primitive_fusing::run(program& p) {
     fuse_reorders(p);
     fuse_sigmoid_mul_to_swish(p);
     fuse_bias(p);
@@ -61,7 +62,7 @@ void prepare_primitive_fusing::run(program_impl& p) {
     optimize_fused_ops(p);
 }
 
-void prepare_primitive_fusing::fuse_sigmoid_mul_to_swish(program_impl &p) {
+void prepare_primitive_fusing::fuse_sigmoid_mul_to_swish(program &p) {
     auto itr = p.get_processing_order().begin();
     while (itr != p.get_processing_order().end()) {
         auto node_itr = itr++;
@@ -126,7 +127,7 @@ void prepare_primitive_fusing::fuse_sigmoid_mul_to_swish(program_impl &p) {
     }
 }
 
-void prepare_primitive_fusing::fuse_reorders(program_impl &p) {
+void prepare_primitive_fusing::fuse_reorders(program &p) {
     // This loop tries fusing several reorders one by one (if present) into one reorder
     auto itr = p.get_processing_order().begin();
     while (itr != p.get_processing_order().end()) {
@@ -163,7 +164,7 @@ void prepare_primitive_fusing::fuse_reorders(program_impl &p) {
     }
 }
 
-void prepare_primitive_fusing::fuse_activations(program_impl &p) {
+void prepare_primitive_fusing::fuse_activations(program &p) {
     bool is_debug = p.get_options().get<build_option_type::debug>()->enabled();
     std::map<primitive_id, std::vector<primitive_id>> fusing_history;
     auto itr = p.get_processing_order().begin();
@@ -200,6 +201,7 @@ void prepare_primitive_fusing::fuse_activations(program_impl &p) {
                  !input.is_type<space_to_batch>() && !input.is_type<gather>() && !input.is_type<scatter_update>() && !input.is_type<shuffle_channels>() &&
                  !input.is_type<scatter_nd_update>() &&
                  !input.is_type<gather_nd>() &&
+                 !input.is_type<gather_elements>() &&
                  !input.is_type<strided_slice>() && !input.is_type<cum_sum>() && !input.is_type<reverse_sequence>() &&
                  !input.is_type<embedding_bag>() && !input.is_type<extract_image_patches>() &&
                  !input.is_type<fused_conv_eltwise>() && !input.is_type<activation>()))
@@ -236,7 +238,7 @@ void prepare_primitive_fusing::fuse_activations(program_impl &p) {
     }
 }
 
-void prepare_primitive_fusing::fuse_bias(program_impl &p) {
+void prepare_primitive_fusing::fuse_bias(program &p) {
     auto itr = p.get_processing_order().begin();
     while (itr != p.get_processing_order().end()) {
         auto node_itr = itr++;
@@ -350,7 +352,7 @@ void prepare_primitive_fusing::fuse_bias(program_impl &p) {
     }
 }
 
-void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
+void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
     bool recalc_processing_order = false;
     std::map<primitive_id, std::vector<primitive_id>> fusing_history;
 
@@ -609,6 +611,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
 
             should_fuse |= input_data.is_type<gather_nd>();
 
+            should_fuse |= input_data.is_type<gather_elements>();
+
             should_fuse |= input_data.is_type<scatter_update>();
 
             should_fuse |= input_data.is_type<scatter_nd_update>();
@@ -677,6 +681,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
 
             should_fuse |= input_data.is_type<gather_nd>();
 
+            should_fuse |= input_data.is_type<gather_elements>();
+
             should_fuse |= input_data.is_type<scatter_update>();
 
             should_fuse |= input_data.is_type<scatter_nd_update>();
@@ -767,6 +773,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
 
             should_fuse |= input_data.is_type<gather_nd>() && quantize_node.get_scale_shift_opt();
 
+            should_fuse |= input_data.is_type<gather_elements>() && quantize_node.get_scale_shift_opt();
+
             should_fuse |= input_data.is_type<scatter_update>() && quantize_node.get_scale_shift_opt();
 
             should_fuse |= input_data.is_type<scatter_nd_update>() && quantize_node.get_scale_shift_opt();
@@ -829,6 +837,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
                                       (parents[i]->is_type<eltwise>() && eltwise_supports_fusings(parents[i]->as<eltwise>())) ||
                                       (parents[i]->is_type<scale>()) ||
                                       (parents[i]->is_type<gather_nd>()) ||
+                                      (parents[i]->is_type<gather_elements>()) ||
                                       (parents[i]->is_type<scatter_nd_update>()) ||
                                       (parents[i]->is_type<scatter_elements_update>()) ||
                                       (parents[i]->is_type<pooling>() && pooling_supports_fusings(parents[i]->as<pooling>())) ||
@@ -1001,7 +1010,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
         p.get_processing_order().calc_processing_order(p);
 }
 
-void prepare_primitive_fusing::optimize_fused_ops(program_impl& p) {
+void prepare_primitive_fusing::optimize_fused_ops(program& p) {
     auto itr = p.get_processing_order().begin();
     while (itr != p.get_processing_order().end()) {
         auto node_itr = itr++;
@@ -1061,7 +1070,7 @@ void prepare_primitive_fusing::optimize_fused_ops(program_impl& p) {
     }
 }
 
-void prepare_conv_eltw_fusing::fuse_conv_depth_to_space(program_impl& p, program_node* node) {
+void prepare_conv_eltw_fusing::fuse_conv_depth_to_space(program& p, program_node* node) {
     std::map<primitive_id, std::vector<primitive_id>> fusing_history;
     // make sure this convolution have only 1 user and it's depth_to_space
     // make sure convolution is not an output
@@ -1094,7 +1103,7 @@ void prepare_conv_eltw_fusing::fuse_conv_depth_to_space(program_impl& p, program
     p.fuse_nodes(*conv_node, *d_t_s_node, &fusing_history);
 }
 
-void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node* node) {
+void prepare_conv_eltw_fusing::fuse_conv_eltwise(program& p, program_node* node) {
     // make sure this convolution have only 1 user and it's eltwise
     // make sure convolution is not an output
     if (node->get_users().size() != 1 || node->is_output())
@@ -1287,7 +1296,7 @@ void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node*
     p.add_optimized_primitive_info(eltw_id, {new_node.id()});
 }
 
-void prepare_conv_eltw_fusing::run(program_impl& p) {
+void prepare_conv_eltw_fusing::run(program& p) {
     std::list<program_node*> conv_nodes;
     // note we need to use iterators since currently processed element can be removed
     auto itr = p.get_processing_order().begin();
@@ -1315,7 +1324,7 @@ void prepare_conv_eltw_fusing::run(program_impl& p) {
     }
 }
 
-void prepare_conv_eltw_read_write_opt::conv_eltwise_read_write_opt(program_impl& p, program_node* node) {
+void prepare_conv_eltw_read_write_opt::conv_eltwise_read_write_opt(program& p, program_node* node) {
     fused_conv_eltwise_node* fused_conv_eltw_node = static_cast<fused_conv_eltwise_node*>(node);
     program_node* second_input_node = &fused_conv_eltw_node->get_dependency(1);
     // output layouts must match
@@ -1374,7 +1383,7 @@ void prepare_conv_eltw_read_write_opt::conv_eltwise_read_write_opt(program_impl&
     prim->second_input_in_output = true;
 }
 
-void prepare_conv_eltw_read_write_opt::run(program_impl& p) {
+void prepare_conv_eltw_read_write_opt::run(program& p) {
     std::list<program_node*> fused_conv_eltw_nodes;
     auto itr = p.get_processing_order()
                    .begin();  // note we need to use iterators since currently processed element can be removed
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
index 277676f87ac..819cb87dd40 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp
@@ -21,7 +21,7 @@
 #include <vector>
 
 template<typename T>
-bool check_binarization(memory::ptr mem_input_low, memory::ptr mem_input_high, program_impl& p) {
+bool check_binarization(memory::ptr mem_input_low, memory::ptr mem_input_high, program& p) {
     bool is_binarization = true;
     const auto& stream = p.get_stream();
     mem_lock<T> data_input_low_lock{mem_input_low, stream};
@@ -39,7 +39,7 @@ bool check_binarization(memory::ptr mem_input_low, memory::ptr mem_input_high, p
 }
 
 
-void  prepare_quantization::prepare_scale_shift_opt(program_impl &p, quantize_node& quantize_node) {
+void  prepare_quantization::prepare_scale_shift_opt(program &p, quantize_node& quantize_node) {
     const auto& stream = p.get_stream();
     program_node &input_low_node = quantize_node.get_dependency(1);
     program_node &input_high_node = quantize_node.get_dependency(2);
@@ -286,7 +286,7 @@ void  prepare_quantization::prepare_scale_shift_opt(program_impl &p, quantize_no
     }
 }
 
-void prepare_quantization::handle_quantize_node(program_impl& p, quantize_node& quantize_node) {
+void prepare_quantization::handle_quantize_node(program& p, quantize_node& quantize_node) {
     if (quantize_node.get_primitive()->levels == 2) {
         prepare_packed_quantize(p, quantize_node);
     } else if (quantize_node.get_primitive()->levels <= 256 && !quantize_node.get_scale_shift_opt() && !quantize_node.is_constant()) {
@@ -294,7 +294,7 @@ void prepare_quantization::handle_quantize_node(program_impl& p, quantize_node&
     }
 }
 
-void prepare_quantization::prepare_packed_quantize(program_impl& p, quantize_node& quantize_node) {
+void prepare_quantization::prepare_packed_quantize(program& p, quantize_node& quantize_node) {
     program_node &input_low_node = quantize_node.get_dependency(1);
     program_node &input_high_node = quantize_node.get_dependency(2);
 
@@ -331,7 +331,7 @@ void prepare_quantization::prepare_packed_quantize(program_impl& p, quantize_nod
     quantize_node.recalc_output_layout();
 }
 
-void prepare_quantization::prepare_dequantize_merge(program_impl& p, eltwise_node& eltwise_node) {
+void prepare_quantization::prepare_dequantize_merge(program& p, eltwise_node& eltwise_node) {
     for (size_t i = 1; i < eltwise_node.get_dependencies().size(); i++) {
         if (!eltwise_node.get_dependency(i).is_type<data>()) {
             return;
@@ -403,7 +403,7 @@ void prepare_quantization::prepare_dequantize_merge(program_impl& p, eltwise_nod
     }
 }
 
-void prepare_quantization::remove_fake_reorders(program_impl& p, reorder_node& reorder_node) {
+void prepare_quantization::remove_fake_reorders(program& p, reorder_node& reorder_node) {
     if (!reorder_node.is_in_data_flow() || reorder_node.get_users().size() != 1 || reorder_node.get_dependencies().size() != 1) {
         return;
     }
@@ -447,7 +447,7 @@ void fill_compensation_typed(W_T* w, AZP_T* azp, W_T* wzp, float* comp, const in
     }
 }
 
-void prepare_quantization::prepare_asymmetric_quantization(program_impl &p, convolution_node& convolution_node) {
+void prepare_quantization::prepare_asymmetric_quantization(program &p, convolution_node& convolution_node) {
     // Detects if given eltwise node performs zero point subtraction
     auto is_zero_point_node = [](const eltwise_node& node) -> bool {
         auto prim = node.get_primitive();
@@ -718,7 +718,7 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p, conv
     new_conv_node.recalc_output_layout();
 }
 
-void prepare_quantization::run(program_impl& p) {
+void prepare_quantization::run(program& p) {
     auto itr = p.get_processing_order().begin();
     while (itr != p.get_processing_order().end()) {
         auto &node = (*itr++);
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp
index 83e2742063f..ad78e700f49 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp
@@ -7,8 +7,8 @@
 #include "pass_manager.h"
 #include "program_node.h"
 #include "cldnn/runtime/engine.hpp"
-#include "program_impl.h"
-#include "network_impl.h"
+#include "cldnn/graph/program.hpp"
+#include "cldnn/graph/network.hpp"
 #include "data_inst.h"
 #include "runtime/cldnn_itt.hpp"
 #include <vector>
@@ -18,8 +18,8 @@
 
 using namespace cldnn;
 
-// ToDo remove friendship relation from  program_node and program_impl
-void propagate_constants::run(program_impl& p) {
+// ToDo remove friendship relation from  program_node and program
+void propagate_constants::run(program& p) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "CLDNN::pass::PropagateConstants");
     for (auto& node : p.get_processing_order()) {
         if (node->is_constant())
@@ -113,7 +113,7 @@ std::list<std::pair<primitive_id, memory::ptr>> propagate_constants::calculate(e
 
     bo.set_option(build_option::optimize_data(false));
     bo.set_option(build_option::outputs(const_outputs));
-    network_impl::ptr net = network_impl::build_network(engine, nodes, bo, true);
+    network::ptr net = network::build_network(engine, nodes, bo, true);
     for (auto& cin : const_inputs)
         net->set_input_data(cin->id(), cin->get_attached_memory_ptr());
 
@@ -127,7 +127,7 @@ std::list<std::pair<primitive_id, memory::ptr>> propagate_constants::calculate(e
     return ret;
 }
 
-void propagate_constants::handle_constant(program_impl& prog, program_node& node) {
+void propagate_constants::handle_constant(program& prog, program_node& node) {
     if (!node.is_type<data>()) {
         add_constant(prog, node);
         if (has_non_const_user(node))
@@ -135,7 +135,7 @@ void propagate_constants::handle_constant(program_impl& prog, program_node& node
     }
 }
 
-void propagate_constants::add_constant(program_impl& prog, program_node& node) {
+void propagate_constants::add_constant(program& prog, program_node& node) {
     if (node.is_type<data>())
         return;
     nodes.insert(prog.get_node_ptr(node.get_primitive()->id));
@@ -149,7 +149,7 @@ void propagate_constants::add_constant(program_impl& prog, program_node& node) {
     add_deps_to_tpl(prog, node.get_dependencies());
 }
 
-void propagate_constants::add_deps_to_tpl(program_impl& prog, const std::vector<program_node*>& deps) {
+void propagate_constants::add_deps_to_tpl(program& prog, const std::vector<program_node*>& deps) {
     /*
     Nodes can share dependencies, if we already have dep in tpl, don't add it again.
     example:
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
index bb6563ee617..fc7ce14f260 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
@@ -22,7 +22,7 @@ remove_redundant_reorders::remove_redundant_reorders(layout_optimizer& lo_ref, b
     : base_pass("remove_redundant_reorders"), lo(lo_ref), enable_reorder_fusing(enable_reorder_fusing), update_implementations(update_implementations),
     remove_output_reorders(remove_output_reorders) {}
 
-void remove_redundant_reorders::run(program_impl& p) {
+void remove_redundant_reorders::run(program& p) {
     auto update_implementation = [&](program_node& node) {
         if (!update_implementations)
             return;
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
index 98cd58b5062..5adf2194015 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
@@ -7,7 +7,7 @@
 #include "pass_manager.h"
 #include "program_node.h"
 #include "layout_optimizer.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "program_helpers.h"
 #include "binary_convolution_inst.h"
 #include "mvn_inst.h"
@@ -43,15 +43,15 @@
 
 using namespace cldnn;
 
-// ToDo remove friendship relation from program_impl
+// ToDo remove friendship relation from program
 
 reorder_inputs::reorder_inputs(layout_optimizer& lo_ref, reorder_factory& rf_ref) : base_pass("reorder_inputs"), _lo(lo_ref), _rf(rf_ref) {}
 
-void reorder_inputs::run(program_impl& p) { run(p, _lo, _rf); }
+void reorder_inputs::run(program& p) { run(p, _lo, _rf); }
 
 namespace {
 
-std::map<program_node*, format::type> get_preferred_formats(program_impl& p, layout_optimizer& lo) {
+std::map<program_node*, format::type> get_preferred_formats(program& p, layout_optimizer& lo) {
     std::map<program_node*, format::type> fmt_map;
     for (auto n : p.get_processing_order()) {
         if (!n->is_in_data_flow())
@@ -200,7 +200,7 @@ void propagate_formats_in_dir(std::map<program_node*, format::type>& fmt_map,
     }
 }
 
-void propagate_formats(program_impl& p, std::map<program_node*, format::type>& fmt_map, layout_optimizer& lo) {
+void propagate_formats(program& p, std::map<program_node*, format::type>& fmt_map, layout_optimizer& lo) {
     auto it = p.get_processing_order().begin();
     while (it != p.get_processing_order().end()) {
         auto node = *it++;
@@ -251,7 +251,7 @@ reorder_cnt count_reorders(const std::map<program_node*, format::type>& fmt_map,
     return { fwd.number + bwd.number, fwd.total_sizes + bwd.total_sizes };
 }
 
-void minimize_local_reorders(program_impl& p, std::map<program_node*, format::type>& fmt_map, layout_optimizer& lo) {
+void minimize_local_reorders(program& p, std::map<program_node*, format::type>& fmt_map, layout_optimizer& lo) {
     for (auto node : p.get_processing_order()) {
         if (!node->is_in_data_flow())
             continue;
@@ -334,7 +334,7 @@ void minimize_local_reorders(program_impl& p, std::map<program_node*, format::ty
 }
 
 template <direction_e dir>
-void insert_reorders_in_dir(program_impl& p, const std::map<program_node*, format::type>& fmt_map, reorder_factory& rf, program_node* node) {
+void insert_reorders_in_dir(program& p, const std::map<program_node*, format::type>& fmt_map, reorder_factory& rf, program_node* node) {
     auto fmt = fmt_map.at(node);
 
     auto next_cpy = travel_direction_wrapper<dir>::next_nodes(node);
@@ -369,7 +369,7 @@ void insert_reorders_in_dir(program_impl& p, const std::map<program_node*, forma
     }
 }
 
-void insert_reorders(program_impl& p, const std::map<program_node*, format::type>& fmt_map, reorder_factory& rf) {
+void insert_reorders(program& p, const std::map<program_node*, format::type>& fmt_map, reorder_factory& rf) {
     auto fwd_it = p.get_processing_order().begin();
     while (fwd_it != p.get_processing_order().end()) {
         auto node = *(fwd_it++);
@@ -401,7 +401,7 @@ void insert_reorders(program_impl& p, const std::map<program_node*, format::type
 
 }  // namespace
 
-void reorder_inputs::run(program_impl& p, layout_optimizer& lo, reorder_factory& rf) {
+void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf) {
     auto fmt_map = get_preferred_formats(p, lo);
 #if CLDNN_REORDER_INPUTS_VERBOSE_PREFERRED
     {
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reverse_optional_nodes_outputs.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reverse_optional_nodes_outputs.cpp
index fc26305a752..a6e50cfd290 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reverse_optional_nodes_outputs.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reverse_optional_nodes_outputs.cpp
@@ -16,7 +16,7 @@ using namespace cldnn;
     Pass made for nodes, which has optional outputs (and had to reverse connections so
     the processing order was valid).
 */
-void reverse_optional_nodes_outputs::run(program_impl& p) {
+void reverse_optional_nodes_outputs::run(program& p) {
     for (auto& node : p.get_processing_order()) {
         if (node->is_type<lstm_dynamic_timeloop>()) {
             auto& typed_node = node->as<lstm_dynamic_timeloop>();
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/skipped_branch_memory_dependencies.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/skipped_branch_memory_dependencies.cpp
index a3ac6478673..73cd13d2177 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/skipped_branch_memory_dependencies.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/skipped_branch_memory_dependencies.cpp
@@ -7,7 +7,7 @@
 #include "pass_manager.h"
 #include "program_node.h"
 #include "layout_optimizer.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "program_helpers.h"
 #include "runtime/cldnn_itt.hpp"
 #include <vector>
@@ -18,7 +18,7 @@
 
 using namespace cldnn;
 
-void skipped_branch_memory_dependencies::run(program_impl& p) {
+void skipped_branch_memory_dependencies::run(program& p) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "CLDNN::pass::SkippedBranchMemoryDependencies");
     // Primitive A can't use primitive B buffer if processing_num(B) < processing_num(A) and for any usr - the user of B
     // processing_num(usr) > processing_num(A) Otherwise it could override data that has to be used in the future.
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/strided_slice_optimize.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/strided_slice_optimize.cpp
index 3546a7d427c..9ea65a9b3e5 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/strided_slice_optimize.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/strided_slice_optimize.cpp
@@ -15,7 +15,7 @@
 
 using namespace cldnn;
 
-void strided_slice_optimize::run(program_impl& p) {
+void strided_slice_optimize::run(program& p) {
     auto node_itr = p.get_processing_order().begin();
     while (node_itr != p.get_processing_order().end()) {
         auto& node = (*node_itr++);
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/trim_to_outputs.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/trim_to_outputs.cpp
index a981b93b886..482e3a2fdbf 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/trim_to_outputs.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/trim_to_outputs.cpp
@@ -16,7 +16,7 @@
 using namespace cldnn;
 
 // This pass optimizes out nodes which have no impact on outputs
-void trim_to_outputs::run(program_impl& p) {
+void trim_to_outputs::run(program& p) {
     const size_t actual_nodes = p.get_processing_order().size();
     if (actual_nodes == 0 || actual_nodes == p.get_outputs().size()) {
         return;
@@ -58,4 +58,4 @@ void trim_to_outputs::run(program_impl& p) {
             to_rem.push_back(node);
     }
     p.remove_nodes(to_rem);
-}
\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/update_loop_primitive_map.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/update_loop_primitive_map.cpp
index b34762d2ed6..d8880116a92 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/update_loop_primitive_map.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/update_loop_primitive_map.cpp
@@ -14,7 +14,7 @@
 
 using namespace cldnn;
 
-void update_loop_primitive_map::run(program_impl& p) {
+void update_loop_primitive_map::run(program& p) {
     for (auto& node : p.get_processing_order()) {
         if (!node->is_type<loop>()) {
             continue;
diff --git a/inference-engine/thirdparty/clDNN/src/grn.cpp b/inference-engine/thirdparty/clDNN/src/grn.cpp
index 23e6b54aa8b..2c22d8dc716 100644
--- a/inference-engine/thirdparty/clDNN/src/grn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/grn.cpp
@@ -38,5 +38,5 @@ std::string grn_inst::to_string(grn_node const& node) {
     return primitive_description.str();
 }
 
-grn_inst::typed_primitive_inst(network_impl& network, grn_node const& node) : parent(network, node) {}
+grn_inst::typed_primitive_inst(network& network, grn_node const& node) : parent(network, node) {}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/common/condition.cpp b/inference-engine/thirdparty/clDNN/src/impls/common/condition.cpp
index 10071a3cfb6..6a509b0fc27 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/common/condition.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/condition.cpp
@@ -3,7 +3,6 @@
 //
 
 #include "condition_inst.h"
-#include "network_impl.h"
 #include "impls/implementation_map.hpp"
 #include "register.hpp"
 
@@ -106,7 +105,7 @@ private:
         return true;
     }
 
-    memory::ptr execute_branch(network_impl::ptr branch,
+    memory::ptr execute_branch(network::ptr branch,
                            const primitive_id& input_id,
                            memory::ptr input_memory) const {
         branch->set_input_data(input_id, input_memory);
diff --git a/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp b/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
index 3a185f71eda..734e8c50e52 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
@@ -4,7 +4,6 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include "loop_inst.h"
-#include "network_impl.h"
 #include "impls/implementation_map.hpp"
 #include "register.hpp"
 #include "mutable_data_inst.h"
diff --git a/inference-engine/thirdparty/clDNN/src/impls/common/wait_for_events.cpp b/inference-engine/thirdparty/clDNN/src/impls/common/wait_for_events.cpp
index 9298c3e75e8..5f3d5e59651 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/common/wait_for_events.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/wait_for_events.cpp
@@ -8,7 +8,6 @@
 #include "input_layout_inst.h"
 #include "impls/implementation_map.hpp"
 #include "register.hpp"
-#include "network_impl.h"
 #include <vector>
 
 namespace cldnn {
diff --git a/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp b/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp
index ba1d145e571..ea879410b69 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp
@@ -3,7 +3,6 @@
 //
 
 #include "detection_output_inst.h"
-#include "network_impl.h"
 #include "impls/implementation_map.hpp"
 #include "math_utils.h"
 #include "register.hpp"
diff --git a/inference-engine/thirdparty/clDNN/src/impls/cpu/non_max_suppression.cpp b/inference-engine/thirdparty/clDNN/src/impls/cpu/non_max_suppression.cpp
index 66ed53df586..3841f458315 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/cpu/non_max_suppression.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/non_max_suppression.cpp
@@ -4,9 +4,9 @@
 
 #include "non_max_suppression_inst.h"
 #include "primitive_inst.h"
-#include "network_impl.h"
 #include "register.hpp"
 #include "cpu_impl_helpers.hpp"
+#include "impls/implementation_map.hpp"
 
 #include <vector>
 #include <queue>
diff --git a/inference-engine/thirdparty/clDNN/src/impls/cpu/proposal.cpp b/inference-engine/thirdparty/clDNN/src/impls/cpu/proposal.cpp
index 38efd84fe67..dea18f173de 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/cpu/proposal.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/proposal.cpp
@@ -5,7 +5,6 @@
 #include "proposal_inst.h"
 #include "cldnn/runtime/engine.hpp"
 #include "impls/implementation_map.hpp"
-#include "network_impl.h"
 #include "cldnn/runtime/error_handler.hpp"
 #include "register.hpp"
 
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/custom_primitive.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/custom_primitive.cpp
index 24e2373e5ec..0a574125a12 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/custom_primitive.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/custom_primitive.cpp
@@ -6,7 +6,6 @@
 #include "cldnn/runtime/engine.hpp"
 #include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
-#include "network_impl.h"
 #include "jitter.h"
 #include "cldnn/runtime/error_handler.hpp"
 #include "register.hpp"
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/fully_connected.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/fully_connected.cpp
index a0a17c48df7..8a98a16e0f4 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/fully_connected.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/fully_connected.cpp
@@ -11,7 +11,6 @@
 #include "fully_connected/fully_connected_kernel_selector.h"
 #include "fully_connected/fully_connected_params.h"
 
-#include "network_impl.h"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_runner.h"
 
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_elements.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_elements.cpp
new file mode 100644
index 00000000000..968eb6bbb7d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_elements.cpp
@@ -0,0 +1,86 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gather_elements_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "gather/gather_elements_kernel_selector.h"
+#include "gather/gather_elements_kernel_ref.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+kernel_selector::gather_elements_axis convert_axis(gather_elements::gather_elements_axis axis) {
+    switch (axis) {
+        case gather_elements::along_x:
+            return kernel_selector::gather_elements_axis::X;
+        case gather_elements::along_y:
+            return kernel_selector::gather_elements_axis::Y;
+        case gather_elements::along_z:
+            return kernel_selector::gather_elements_axis::Z;
+        case gather_elements::along_w:
+            return kernel_selector::gather_elements_axis::W;
+        case gather_elements::along_f:
+            return kernel_selector::gather_elements_axis::FEATURE;
+        case gather_elements::along_b:
+            return kernel_selector::gather_elements_axis::BATCH;
+        default:
+            return kernel_selector::gather_elements_axis::BATCH;
+    }
+}
+
+struct gather_elements_impl : typed_primitive_impl_ocl<gather_elements> {
+    using parent = typed_primitive_impl_ocl<gather_elements>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<gather_elements_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const gather_elements_node& arg) {
+        auto gather_elements_params = get_default_params<kernel_selector::gather_elements_params>(arg);
+        auto gather_elements_optional_params =
+            get_default_optional_params<kernel_selector::gather_elements_optional_params>(arg.get_program());
+
+        gather_elements_params.axis = convert_axis(arg.get_primitive()->axis);
+
+        gather_elements_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
+
+        auto& kernel_selector = kernel_selector::gather_elements_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(gather_elements_params, gather_elements_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto gather_elements = new gather_elements_impl(arg, best_kernels[0]);
+
+        return gather_elements;
+    }
+};
+
+namespace detail {
+
+attach_gather_elements_impl::attach_gather_elements_impl() {
+    implementation_map<gather_elements>::add(impl_types::ocl, gather_elements_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/generic_layer.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/generic_layer.cpp
index f4e4cbf7713..037ae850fc8 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/generic_layer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/generic_layer.cpp
@@ -6,7 +6,6 @@
 #include "cldnn/runtime/engine.hpp"
 #include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
-#include "network_impl.h"
 #include "register.hpp"
 #include <vector>
 
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_input.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_input.cpp
index 937d8390ea5..732a012ae0a 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_input.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_input.cpp
@@ -10,7 +10,6 @@
 #include "kernel_selector_helper.h"
 #include "lstm_dynamic/lstm_dynamic_input_kernel_selector.h"
 #include "lstm_dynamic/lstm_dynamic_input_kernel_base.h"
-#include "network_impl.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_timeloop.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_timeloop.cpp
index 8834065f369..400e3596566 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_timeloop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_timeloop.cpp
@@ -10,7 +10,6 @@
 #include "kernel_selector_helper.h"
 #include "lstm_dynamic/lstm_dynamic_timeloop_kernel_selector.h"
 #include "lstm_dynamic/lstm_dynamic_timeloop_kernel_base.h"
-#include "network_impl.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_elt.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_elt.cpp
index 9a7e30ed2ce..d7abfcf3db0 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_elt.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_elt.cpp
@@ -10,7 +10,6 @@
 #include "kernel_selector_helper.h"
 #include "lstm/lstm_elt_kernel_selector.h"
 #include "lstm/lstm_elt_kernel_base.h"
-#include "network_impl.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_gemm.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_gemm.cpp
index 540067ed7b9..fc5f01fff16 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_gemm.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_gemm.cpp
@@ -10,7 +10,6 @@
 #include "kernel_selector_helper.h"
 #include "lstm/lstm_gemm_kernel_selector.h"
 #include "lstm/lstm_gemm_kernel_base.h"
-#include "network_impl.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/max_unpooling.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/max_unpooling.cpp
index ebf9cf74897..169ec3b24a0 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/max_unpooling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/max_unpooling.cpp
@@ -6,7 +6,7 @@
 #include "primitive_base.hpp"
 #include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
-#include "network_impl.h"
+#include "cldnn/graph/network.hpp"
 #include "kernel_selector_helper.h"
 #include "max_unpooling/max_unpooling_kernel_selector.h"
 #include "max_unpooling/max_unpooling_kernel_base.h"
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.hpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.hpp
index 3f6f82c57bd..4315156be13 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.hpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.hpp
@@ -7,10 +7,10 @@
 
 #include <thread>
 #include "primitive_inst.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
-#include "network_impl.h"
+#include "cldnn/graph/network.hpp"
 #include "register.hpp"
 #include <vector>
 #include <list>
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/pyramid_roi_align.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/pyramid_roi_align.cpp
index b93bf0ec88c..8e94639697e 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/pyramid_roi_align.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/pyramid_roi_align.cpp
@@ -9,7 +9,6 @@
 #include "pyramid_roi_align/pyramid_roi_align_kernel_base.h"
 #include "cldnn/runtime/error_handler.hpp"
 #include "pyramid_roi_align_inst.h"
-#include "network_impl.h"
 
 #include <cmath>
 
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
index c62b64de62d..86a423a8471 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
@@ -30,6 +30,7 @@ void register_implementations() {
     REGISTER_OCL(eltwise);
     REGISTER_OCL(fully_connected);
     REGISTER_OCL(gather);
+    REGISTER_OCL(gather_elements);
     REGISTER_OCL(gather_nd);
     REGISTER_OCL(gemm);
     REGISTER_OCL(lrn);
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp
index 036162ed8d8..dcd58574e52 100644
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp
@@ -22,6 +22,7 @@
 #include "cldnn/primitives/fully_connected.hpp"
 #include "cldnn/primitives/gather.hpp"
 #include "cldnn/primitives/gather_nd.hpp"
+#include "cldnn/primitives/gather_elements.hpp"
 #include "cldnn/primitives/gemm.hpp"
 #include "cldnn/primitives/lrn.hpp"
 #include "cldnn/primitives/lstm.hpp"
@@ -94,6 +95,7 @@ REGISTER_OCL(embed);
 REGISTER_OCL(fully_connected);
 REGISTER_OCL(gather);
 REGISTER_OCL(gather_nd);
+REGISTER_OCL(gather_elements);
 REGISTER_OCL(gemm);
 REGISTER_OCL(lrn);
 REGISTER_OCL(lstm_gemm);
diff --git a/inference-engine/thirdparty/clDNN/src/include/activation_inst.h b/inference-engine/thirdparty/clDNN/src/include/activation_inst.h
index 989b2b55724..38c3b17c5ec 100644
--- a/inference-engine/thirdparty/clDNN/src/include/activation_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/activation_inst.h
@@ -16,7 +16,7 @@ namespace cldnn {
 template <>
 struct typed_program_node<activation> : public typed_program_node_base<activation> {
     using parent = typed_program_node_base<activation>;
-    typed_program_node(const std::shared_ptr<activation> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<activation> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
 
@@ -48,7 +48,7 @@ public:
     static std::string to_string(activation_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, activation_node const& node);
+    typed_primitive_inst(network& network, activation_node const& node);
 
     memory::ptr slope_memory() const { return dep_memory_ptr(1); }
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/arg_max_min_inst.h b/inference-engine/thirdparty/clDNN/src/include/arg_max_min_inst.h
index 605a6bb7e95..2cf1481f847 100644
--- a/inference-engine/thirdparty/clDNN/src/include/arg_max_min_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/arg_max_min_inst.h
@@ -17,7 +17,7 @@ struct typed_program_node<arg_max_min> : public typed_program_node_base<arg_max_
     using parent = typed_program_node_base<arg_max_min>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog) : parent(prim, prog) {}
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog) : parent(prim, prog) {}
     program_node& input() const { return get_dependency(0); }
 };
 
@@ -32,7 +32,7 @@ public:
     static std::string to_string(arg_max_min_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, arg_max_min_node const& node);
+    typed_primitive_inst(network& network, arg_max_min_node const& node);
 };
 
 using arg_max_min_inst = typed_primitive_inst<arg_max_min>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/average_unpooling_inst.h b/inference-engine/thirdparty/clDNN/src/include/average_unpooling_inst.h
index 0eef5fe75e2..4998c8b04d8 100644
--- a/inference-engine/thirdparty/clDNN/src/include/average_unpooling_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/average_unpooling_inst.h
@@ -27,7 +27,7 @@ class typed_primitive_inst<average_unpooling> : public typed_primitive_inst_base
     using parent = typed_primitive_inst_base<average_unpooling>;
 
 public:
-    typed_primitive_inst(network_impl& network, average_unpooling_node const& desc);
+    typed_primitive_inst(network& network, average_unpooling_node const& desc);
     static layout calc_output_layout(average_unpooling_node const& node);
     static std::string to_string(average_unpooling_node const& node);
 };
diff --git a/inference-engine/thirdparty/clDNN/src/include/batch_to_space_inst.h b/inference-engine/thirdparty/clDNN/src/include/batch_to_space_inst.h
index 1b0feedd2f3..860c5bad5d0 100644
--- a/inference-engine/thirdparty/clDNN/src/include/batch_to_space_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/batch_to_space_inst.h
@@ -31,7 +31,7 @@ public:
     static std::string to_string(batch_to_space_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, batch_to_space_node const& desc);
+    typed_primitive_inst(network& network, batch_to_space_node const& desc);
 };
 
 using batch_to_space_inst = typed_primitive_inst<batch_to_space>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/binary_convolution_inst.h b/inference-engine/thirdparty/clDNN/src/include/binary_convolution_inst.h
index ef7f7afb2a8..ae008ac3b5d 100644
--- a/inference-engine/thirdparty/clDNN/src/include/binary_convolution_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/binary_convolution_inst.h
@@ -18,7 +18,7 @@ struct typed_program_node<binary_convolution> : public typed_program_node_base<b
     using parent = typed_program_node_base<binary_convolution>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
         : parent(prim, prog), split(this->get_primitive()->split()), depthwise_sep_opt(false) {}
 
     void set_split(int32_t node_split) { split = node_split; }
@@ -60,7 +60,7 @@ public:
     static std::string to_string(binary_convolution_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, binary_convolution_node const& node);
+    typed_primitive_inst(network& network, binary_convolution_node const& node);
 
     memory::ptr weights_memory(size_t index) const {
         if (static_cast<int32_t>(index) >= node.get_split())
diff --git a/inference-engine/thirdparty/clDNN/src/include/border_inst.h b/inference-engine/thirdparty/clDNN/src/include/border_inst.h
index ed181ed0b16..a6aeb22d4d2 100644
--- a/inference-engine/thirdparty/clDNN/src/include/border_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/border_inst.h
@@ -19,7 +19,7 @@ private:
 public:
     using parent::parent;
 
-    typed_program_node(const std::shared_ptr<border> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<border> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
     program_node& input() const { return get_dependency(0); }
@@ -34,7 +34,7 @@ class typed_primitive_inst<border> : public typed_primitive_inst_base<border> {
 public:
     static layout calc_output_layout(border_node const& node);
     static std::string to_string(border_node const& node);
-    typed_primitive_inst(network_impl& network, border_node const& node);
+    typed_primitive_inst(network& network, border_node const& node);
 };
 
 using border_inst = typed_primitive_inst<border>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h b/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h
index 75be67373c7..75f3efc1a36 100644
--- a/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h
@@ -20,7 +20,7 @@ private:
 public:
     using parent::parent;
 
-    typed_program_node(const std::shared_ptr<broadcast> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<broadcast> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
     program_node& input() const { return get_dependency(0); }
@@ -35,7 +35,7 @@ class typed_primitive_inst<broadcast> : public typed_primitive_inst_base<broadca
 public:
     static layout calc_output_layout(broadcast_node const& node);
     static std::string to_string(broadcast_node const& node);
-    typed_primitive_inst(network_impl& network, broadcast_node const& node);
+    typed_primitive_inst(network& network, broadcast_node const& node);
 };
 
 using broadcast_inst = typed_primitive_inst<broadcast>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h b/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h
index 82d3d39676f..6d323fa4fef 100644
--- a/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h
@@ -15,7 +15,7 @@ namespace cldnn {
 template <>
 struct typed_program_node<concatenation> : public typed_program_node_base<concatenation> {
     using parent = typed_program_node_base<concatenation>;
-    typed_program_node(const std::shared_ptr<concatenation> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<concatenation> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
 
@@ -38,7 +38,7 @@ public:
     static std::string to_string(concatenation_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, concatenation_node const& node);
+    typed_primitive_inst(network& network, concatenation_node const& node);
 };
 
 using concatenation_inst = typed_primitive_inst<concatenation>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/condition_inst.h b/inference-engine/thirdparty/clDNN/src/include/condition_inst.h
index 4bff69b6e49..9906d4113e9 100644
--- a/inference-engine/thirdparty/clDNN/src/include/condition_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/condition_inst.h
@@ -6,7 +6,6 @@
 #pragma once
 
 #include <cldnn/primitives/condition.hpp>
-#include "network_impl.h"
 #include "primitive_inst.h"
 
 #include <string>
@@ -22,26 +21,26 @@ private:
 
     class branch {
     public:
-        explicit branch(topology_impl& tpl) : _topology(tpl) {}
+        explicit branch(const topology& tpl) : _topology(tpl) {}
 
         void set(const program_node& node) {
             add_or_change_input_layout(node);
-            _program = program_impl::build_program(node.get_program().get_engine(),
-                                                   _topology,
-                                                   node.get_program().get_options(),
-                                                   true);  // rebuild program
+            _program = program::build_program(node.get_program().get_engine(),
+                                              _topology,
+                                              node.get_program().get_options(),
+                                              true);  // rebuild program
         }
-        program_impl::ptr get() const { return _program; }
+        program::ptr get() const { return _program; }
 
     private:
-        topology_impl& _topology;
-        program_impl::ptr _program = (program_impl::ptr) nullptr;
+        topology _topology;
+        program::ptr _program = nullptr;
 
         void add_or_change_input_layout(const program_node& node) {
             auto layout = node.get_dependency(0).get_output_layout();
             auto input_id = node.as<condition>().result_id();
             if (_topology.get_primitives().count(input_id) == 0) {
-                _topology.add(std::make_shared<input_layout>(input_id, layout));
+                _topology.add_primitive(std::make_shared<input_layout>(input_id, layout));
                 for (auto& prim : _topology.get_primitives()) {
                     for (auto& inp : prim.second->input) {
                         if (inp == node.id())
@@ -57,10 +56,10 @@ private:
 public:
     using parent::parent;
 
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
         : parent(prim, prog),
-          _branch_true(*this->get_primitive()->topology_true.get()),
-          _branch_false(*this->get_primitive()->topology_false.get()) {}
+          _branch_true(this->get_primitive()->topology_true),
+          _branch_false(this->get_primitive()->topology_false) {}
 
     program_node& input() const { return get_dependency(0); }
     program_node& compare() const { return get_dependency(1); }
@@ -70,8 +69,8 @@ public:
         _branch_true.set(*this);
         _branch_false.set(*this);
     }
-    program_impl::ptr get_branch_true() const { return _branch_true.get(); }
-    program_impl::ptr get_branch_false() const { return _branch_false.get(); }
+    program::ptr get_branch_true() const { return _branch_true.get(); }
+    program::ptr get_branch_false() const { return _branch_false.get(); }
     primitive_id result_id() const { return id() + ":result"; }
 
 private:
@@ -88,19 +87,19 @@ class typed_primitive_inst<condition> : public typed_primitive_inst_base<conditi
 public:
     static layout calc_output_layout(condition_node const& node);
     static std::string to_string(condition_node const& node);
-    typed_primitive_inst(network_impl& network, condition_node const& node);
+    typed_primitive_inst(network& network, condition_node const& node);
 
     memory::ptr input_memory_ptr() const { return dep_memory_ptr(0); }
     memory::ptr compare_memory_ptr() const { return dep_memory_ptr(1); }
     memory& input_memory() const { return dep_memory(0); }
     memory& compare_memory() const { return dep_memory(1); }
-    network_impl::ptr get_net_true() const { return _net_true; }
-    network_impl::ptr get_net_false() const { return _net_false; }
+    network::ptr get_net_true() const { return _net_true; }
+    network::ptr get_net_false() const { return _net_false; }
     primitive_id result_id() const { return node.result_id(); }
 
 private:
-    network_impl::ptr _net_true;
-    network_impl::ptr _net_false;
+    network::ptr _net_true;
+    network::ptr _net_false;
 };
 
 using condition_inst = typed_primitive_inst<condition>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h b/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h
index 899ad44dc9b..72d1b66e65b 100644
--- a/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h
@@ -18,7 +18,7 @@ struct typed_program_node<convolution> : public typed_program_node_base<convolut
     using parent = typed_program_node_base<convolution>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
         : parent(prim, prog),
           split(this->get_primitive()->split()),
           depthwise_sep_opt(false),
@@ -118,7 +118,7 @@ public:
     static std::string to_string(convolution_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, convolution_node const& node);
+    typed_primitive_inst(network& network, convolution_node const& node);
 
     memory::ptr weights_memory(size_t index) const {
         if (node.get_groups() == 1) {
diff --git a/inference-engine/thirdparty/clDNN/src/include/crop_inst.h b/inference-engine/thirdparty/clDNN/src/include/crop_inst.h
index e8842e5bed9..a288bc437f5 100644
--- a/inference-engine/thirdparty/clDNN/src/include/crop_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/crop_inst.h
@@ -20,7 +20,7 @@ private:
 public:
     using parent::parent;
 
-    typed_program_node(const std::shared_ptr<crop> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<crop> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
     program_node& input() const { return get_dependency(0); }
@@ -35,7 +35,7 @@ class typed_primitive_inst<crop> : public typed_primitive_inst_base<crop> {
 public:
     static layout calc_output_layout(crop_node const& node);
     static std::string to_string(crop_node const& node);
-    typed_primitive_inst(network_impl& network, crop_node const& node);
+    typed_primitive_inst(network& network, crop_node const& node);
 
 private:
     void on_execute() override;
diff --git a/inference-engine/thirdparty/clDNN/src/include/ctc_greedy_decoder_inst.h b/inference-engine/thirdparty/clDNN/src/include/ctc_greedy_decoder_inst.h
index ea5d2bcda58..6d67c4ef87b 100644
--- a/inference-engine/thirdparty/clDNN/src/include/ctc_greedy_decoder_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/ctc_greedy_decoder_inst.h
@@ -36,7 +36,7 @@ public:
     static std::string to_string(ctc_greedy_decoder_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, ctc_greedy_decoder_node const& node);
+    typed_primitive_inst(network& network, ctc_greedy_decoder_node const& node);
 };
 
 using ctc_greedy_decoder_inst = typed_primitive_inst<ctc_greedy_decoder>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/cum_sum_inst.h b/inference-engine/thirdparty/clDNN/src/include/cum_sum_inst.h
index fb9cecf7fc5..cc0f6c73bf3 100644
--- a/inference-engine/thirdparty/clDNN/src/include/cum_sum_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/cum_sum_inst.h
@@ -28,7 +28,7 @@ class typed_primitive_inst<cum_sum> : public typed_primitive_inst_base<cum_sum>
 public:
     static layout calc_output_layout(cum_sum_node const& node);
     static std::string to_string(cum_sum_node const& node);
-    typed_primitive_inst(network_impl& network, cum_sum_node const& desc);
+    typed_primitive_inst(network& network, cum_sum_node const& desc);
 };
 
 using cum_sum_inst = typed_primitive_inst<cum_sum>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h b/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h
index 20c40ecdf74..f75cc09afa2 100644
--- a/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h
@@ -45,7 +45,7 @@ public:
     static std::string to_string(custom_gpu_primitive_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, custom_gpu_primitive_node const& node);
+    typed_primitive_inst(network& network, custom_gpu_primitive_node const& node);
 };
 
 using custom_gpu_primitive_inst = typed_primitive_inst<custom_gpu_primitive>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/data_inst.h b/inference-engine/thirdparty/clDNN/src/include/data_inst.h
index e1cafad9c72..3071a53e69a 100644
--- a/inference-engine/thirdparty/clDNN/src/include/data_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/data_inst.h
@@ -16,7 +16,7 @@ template <>
 struct typed_program_node<data> : public typed_program_node_base<data> {
     using parent = typed_program_node_base<data>;
 
-    typed_program_node(const std::shared_ptr<data> prim, program_impl& prog);
+    typed_program_node(const std::shared_ptr<data> prim, program& prog);
 
     memory& get_attached_memory() const { return *mem; }
     memory::ptr get_attached_memory_ptr() const { return mem; }
@@ -37,7 +37,7 @@ public:
     static std::string to_string(data_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, data_node const& node);
+    typed_primitive_inst(network& network, data_node const& node);
 };
 
 using data_inst = typed_primitive_inst<data>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h b/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h
index f8d93300244..8cbff911bdd 100644
--- a/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h
@@ -18,7 +18,7 @@ struct typed_program_node<deconvolution> : public typed_program_node_base<deconv
     using parent = typed_program_node_base<deconvolution>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
         : parent(prim, prog),
           split(this->get_primitive()->split()),
           depthwise_sep_opt(false),
@@ -90,7 +90,7 @@ public:
     static std::string to_string(deconvolution_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, deconvolution_node const& node);
+    typed_primitive_inst(network& network, deconvolution_node const& node);
 
     memory::ptr weights_memory(size_t index) const {
         if (node.get_groups() == 1) {
diff --git a/inference-engine/thirdparty/clDNN/src/include/deformable_convolution_inst.h b/inference-engine/thirdparty/clDNN/src/include/deformable_convolution_inst.h
index c7889f78600..bb3e6cecd5b 100644
--- a/inference-engine/thirdparty/clDNN/src/include/deformable_convolution_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/deformable_convolution_inst.h
@@ -17,7 +17,7 @@ struct typed_program_node<deformable_conv> : public typed_program_node_base<defo
     using parent = typed_program_node_base<deformable_conv>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
             : parent(prim, prog),
               split(this->get_primitive()->split()),
               depthwise_sep_opt(false),
@@ -73,7 +73,7 @@ public:
     static std::string to_string(deformable_conv_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, deformable_conv_node const& node);
+    typed_primitive_inst(network& network, deformable_conv_node const& node);
 
     memory::ptr weights_memory(size_t index) const {
         if (node.get_groups() == 1) {
@@ -105,7 +105,7 @@ struct typed_program_node<deformable_interp> : public typed_program_node_base<de
     using parent = typed_program_node_base<deformable_interp>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
             : parent(prim, prog),
               split(1),
               depthwise_sep_opt(false),
@@ -152,7 +152,7 @@ public:
     static std::string to_string(deformable_interp_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, deformable_interp_node const& node);
+    typed_primitive_inst(network& network, deformable_interp_node const& node);
 
     memory& trans_memory() const { return dep_memory(1); }
 };
diff --git a/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h b/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h
index f551ee7ae34..5042b969fd4 100644
--- a/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h
@@ -36,7 +36,7 @@ public:
     static std::string to_string(depth_to_space_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, depth_to_space_node const& desc);
+    typed_primitive_inst(network& network, depth_to_space_node const& desc);
 };
 
 using depth_to_space_inst = typed_primitive_inst<depth_to_space>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h b/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h
index 6ce0a82eb4b..f2df0539455 100644
--- a/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h
@@ -6,7 +6,6 @@
 #pragma once
 #include "cldnn/primitives/detection_output.hpp"
 #include "primitive_inst.h"
-#include "topology_impl.h"
 
 #include <string>
 
@@ -40,7 +39,7 @@ public:
     static std::string to_string(detection_output_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, detection_output_node const& node);
+    typed_primitive_inst(network& network, detection_output_node const& node);
 
     memory::ptr location_memory() const { return dep_memory_ptr(0); }
     memory::ptr confidence_memory() const { return dep_memory_ptr(1); }
diff --git a/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h b/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h
index 9b62e0b73b2..fe69b31b3d3 100644
--- a/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h
@@ -6,7 +6,6 @@
 #pragma once
 #include "cldnn/primitives/eltwise.hpp"
 #include "primitive_inst.h"
-#include "topology_impl.h"
 #include "kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h"
 
 #include <memory>
@@ -64,7 +63,7 @@ struct typed_program_node<eltwise> : public typed_program_node_base<eltwise> {
     using parent = typed_program_node_base<eltwise>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
         : parent(prim, prog) {
         support_padding_all(true);
     }
@@ -90,7 +89,7 @@ public:
     static std::string to_string(eltwise_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, eltwise_node const& node);
+    typed_primitive_inst(network& network, eltwise_node const& node);
 };
 
 using eltwise_inst = typed_primitive_inst<eltwise>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/embedding_bag_inst.h b/inference-engine/thirdparty/clDNN/src/include/embedding_bag_inst.h
index 79151251ce9..2644a34f289 100644
--- a/inference-engine/thirdparty/clDNN/src/include/embedding_bag_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/embedding_bag_inst.h
@@ -30,7 +30,7 @@ class typed_primitive_inst<embedding_bag> : public typed_primitive_inst_base<emb
 public:
     static layout calc_output_layout(embedding_bag_node const& node);
     static std::string to_string(embedding_bag_node const& node);
-    typed_primitive_inst(network_impl& network, embedding_bag_node const& desc);
+    typed_primitive_inst(network& network, embedding_bag_node const& desc);
 };
 
 using embedding_bag_inst = typed_primitive_inst<embedding_bag>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h b/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h
index 2eb4dd7f792..9602615f638 100644
--- a/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h
@@ -29,7 +29,7 @@ public:
     static std::string to_string(extract_image_patches_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, extract_image_patches_node const& desc);
+    typed_primitive_inst(network& network, extract_image_patches_node const& desc);
 };
 
 using extract_image_patches_inst = typed_primitive_inst<extract_image_patches>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/fully_connected_inst.h b/inference-engine/thirdparty/clDNN/src/include/fully_connected_inst.h
index db3a156dec5..3982507a937 100644
--- a/inference-engine/thirdparty/clDNN/src/include/fully_connected_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/fully_connected_inst.h
@@ -16,7 +16,7 @@ struct typed_program_node<fully_connected> : public typed_program_node_base<full
     using parent = typed_program_node_base<fully_connected>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
         : parent(prim, prog) {}
 
     program_node& input() const { return get_dependency(0); }
@@ -36,7 +36,7 @@ public:
     static std::string to_string(fully_connected_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, fully_connected_node const& node);
+    typed_primitive_inst(network& network, fully_connected_node const& node);
 
     memory::ptr weights_memory() const { return dep_memory_ptr(1); }
     memory::ptr bias_memory() const { return dep_memory_ptr(2); }
diff --git a/inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h b/inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h
index 65b23e3d8ec..84be2c0a8fa 100644
--- a/inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h
@@ -17,7 +17,7 @@ struct typed_program_node<fused_conv_eltwise> : public typed_program_node_base<f
     using parent = typed_program_node_base<fused_conv_eltwise>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
         : parent(prim, prog),
           split(this->get_primitive()->split()),
           depthwise_sep_opt(false),
@@ -81,7 +81,7 @@ public:
     static std::string to_string(fused_conv_eltwise_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, fused_conv_eltwise_node const& node);
+    typed_primitive_inst(network& network, fused_conv_eltwise_node const& node);
 
     memory::ptr weights_memory(size_t index) const {
         if (static_cast<int32_t>(index) >= node.get_split())
diff --git a/inference-engine/thirdparty/clDNN/src/include/gather_elements_inst.h b/inference-engine/thirdparty/clDNN/src/include/gather_elements_inst.h
new file mode 100644
index 00000000000..5bf15b4930c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/gather_elements_inst.h
@@ -0,0 +1,49 @@
+/*
+// Copyright (c) 2021 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "cldnn/primitives/gather_elements.hpp"
+#include "primitive_inst.h"
+#include <string>
+
+namespace cldnn {
+template <>
+struct typed_program_node<gather_elements> : public typed_program_node_base<gather_elements> {
+    using parent = typed_program_node_base<gather_elements>;
+
+public:
+    using parent::parent;
+
+    program_node& input(size_t index = 0) const { return get_dependency(index); }
+};
+
+using gather_elements_node = typed_program_node<gather_elements>;
+
+template <>
+class typed_primitive_inst<gather_elements> : public typed_primitive_inst_base<gather_elements> {
+    using parent = typed_primitive_inst_base<gather_elements>;
+
+public:
+    static layout calc_output_layout(gather_elements_node const& node);
+    static std::string to_string(gather_elements_node const& node);
+
+public:
+    typed_primitive_inst(network& network, gather_elements_node const& desc);
+};
+
+using gather_elements_inst = typed_primitive_inst<gather_elements>;
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/include/gather_inst.h b/inference-engine/thirdparty/clDNN/src/include/gather_inst.h
index 9acd82a499c..9d733bbc1a8 100644
--- a/inference-engine/thirdparty/clDNN/src/include/gather_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/gather_inst.h
@@ -31,7 +31,7 @@ public:
     static std::string to_string(gather_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, gather_node const& desc);
+    typed_primitive_inst(network& network, gather_node const& desc);
 };
 
 using gather_inst = typed_primitive_inst<gather>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/gather_nd_inst.h b/inference-engine/thirdparty/clDNN/src/include/gather_nd_inst.h
index 86a3b0b4cec..2d7ad62e4ac 100644
--- a/inference-engine/thirdparty/clDNN/src/include/gather_nd_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/gather_nd_inst.h
@@ -29,7 +29,7 @@ public:
     static std::string to_string(gather_nd_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, gather_nd_node const& desc);
+    typed_primitive_inst(network& network, gather_nd_node const& desc);
 };
 
 using gather_nd_inst = typed_primitive_inst<gather_nd>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/gather_tree_inst.h b/inference-engine/thirdparty/clDNN/src/include/gather_tree_inst.h
index 589c004af1a..a08aeb3226c 100644
--- a/inference-engine/thirdparty/clDNN/src/include/gather_tree_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/gather_tree_inst.h
@@ -16,7 +16,7 @@ private:
     using parent = typed_program_node_base<gather_tree>;
 public:
     using parent::parent;
-    typed_program_node(const std::shared_ptr<gather_tree> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<gather_tree> prim, program& prog) : parent(prim, prog) {
     }
     program_node& input() const { return get_dependency(0); }
 };
@@ -30,7 +30,7 @@ class typed_primitive_inst<gather_tree> : public typed_primitive_inst_base<gathe
 public:
     static layout calc_output_layout(gather_tree_node const& node);
     static std::string to_string(gather_tree_node const& node);
-    typed_primitive_inst(network_impl& network, gather_tree_node const& node);
+    typed_primitive_inst(network& network, gather_tree_node const& node);
 };
 
 using gather_tree_inst = typed_primitive_inst<gather_tree>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/gemm_inst.h b/inference-engine/thirdparty/clDNN/src/include/gemm_inst.h
index 670edf13063..88f6e5a0d8d 100644
--- a/inference-engine/thirdparty/clDNN/src/include/gemm_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/gemm_inst.h
@@ -32,7 +32,7 @@ public:
     static std::string to_string(gemm_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, gemm_node const& node);
+    typed_primitive_inst(network& network, gemm_node const& node);
 };
 
 using gemm_inst = typed_primitive_inst<gemm>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h b/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h
index 32c90371abb..3b94d7a6343 100644
--- a/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h
@@ -15,7 +15,7 @@ namespace cldnn {
 template <>
 struct typed_program_node<generic_layer> : public typed_program_node_base<generic_layer> {
     using parent = typed_program_node_base<generic_layer>;
-    typed_program_node(const std::shared_ptr<generic_layer> prim, program_impl& prog);
+    typed_program_node(const std::shared_ptr<generic_layer> prim, program& prog);
 
 public:
     using parent::parent;
@@ -35,7 +35,7 @@ public:
     static std::string to_string(generic_layer_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, generic_layer_node const& node);
+    typed_primitive_inst(network& network, generic_layer_node const& node);
 };
 
 using generic_layer_inst = typed_primitive_inst<generic_layer>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/grn_inst.h b/inference-engine/thirdparty/clDNN/src/include/grn_inst.h
index c470a44d9dd..db0e5f1f866 100644
--- a/inference-engine/thirdparty/clDNN/src/include/grn_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/grn_inst.h
@@ -32,7 +32,7 @@ public:
     static std::string to_string(grn_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, grn_node const& node);
+    typed_primitive_inst(network& network, grn_node const& node);
 };
 
 using grn_inst = typed_primitive_inst<grn>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h b/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h
index c6199634634..8e837b0e992 100644
--- a/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h
@@ -18,7 +18,7 @@ struct typed_program_node<input_layout> : public typed_program_node_base<input_l
     using parent = typed_program_node_base<input_layout>;
     using parent::parent;
 
-    typed_program_node(const std::shared_ptr<input_layout> prim, program_impl& prog);
+    typed_program_node(const std::shared_ptr<input_layout> prim, program& prog);
 };
 
 using input_layout_node = typed_program_node<input_layout>;
@@ -32,7 +32,7 @@ public:
     static std::string to_string(input_layout_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, input_layout_node const& node);
+    typed_primitive_inst(network& network, input_layout_node const& node);
 
     void set_data(memory::ptr mem);
 };
diff --git a/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h b/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
index f97f74ebbbc..ab6c272da75 100644
--- a/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
+++ b/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
@@ -28,7 +28,7 @@ enum class data_types : size_t;
 enum class tuning_mode;
 struct format;
 struct layout;
-struct program_impl;
+struct program;
 struct program_node;
 }  // namespace cldnn
 
@@ -72,6 +72,7 @@ using shape_calculation_mode = kernel_selector::ShapeCalculationMode;
 using interpolate_axis = kernel_selector::InterpolateAxis;
 using border_type = kernel_selector::BorderType;
 using gather_axis = kernel_selector::GatherAxis;
+using gather_elements_axis = kernel_selector::GatherAxis;
 using scatter_update_axis = kernel_selector::ScatterUpdateAxis;
 using reduce_mode = kernel_selector::ReduceMode;
 using cum_sum_axis = kernel_selector::CumSumAxis;
@@ -232,16 +233,16 @@ params_t get_weight_bias_zero_point_default_params(const arg_t& arg, uint32_t sp
     return params;
 }
 
-void set_optional_params(const program_impl& program, kernel_selector::optional_params& params);
+void set_optional_params(const program& program, kernel_selector::optional_params& params);
 
 template <typename optional_params_t>
-inline optional_params_t get_default_optional_params(const program_impl& program) {
+inline optional_params_t get_default_optional_params(const program& program) {
     optional_params_t params;
     set_optional_params(program, params);
     return params;
 }
 
 template <typename optional_params_t>
-inline optional_params_t get_default_weights_bias_optional_params(const program_impl& program) {
+inline optional_params_t get_default_weights_bias_optional_params(const program& program) {
     return get_default_optional_params<optional_params_t>(program);
 }
diff --git a/inference-engine/thirdparty/clDNN/src/include/loop_inst.h b/inference-engine/thirdparty/clDNN/src/include/loop_inst.h
index 54044d26e8b..c63df1a06e8 100644
--- a/inference-engine/thirdparty/clDNN/src/include/loop_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/loop_inst.h
@@ -13,7 +13,6 @@
 #include "cldnn/runtime/memory.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 
-#include "network_impl.h"
 #include "primitive_inst.h"
 #include <string>
 #include <memory>
@@ -24,21 +23,19 @@ template<>
 struct typed_program_node<loop> : public typed_program_node_base<loop> {
 private:
     using parent = typed_program_node_base<loop>;
-    topology body_topology;
-    topology_impl& body;
+    mutable topology body;
 
     std::vector<loop::io_primitive_map> input_primitive_maps;
     std::vector<loop::io_primitive_map> output_primitive_maps;
     mutable std::vector<loop::backedge_mapping> back_edges;
     bool use_current_iteration;
     bool use_execution_condition;
-    mutable program_impl::ptr body_program;
+    mutable program::ptr body_program;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog) :
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog) :
         parent(prim, prog),
-        body_topology(this->get_primitive()->body),
-        body(*body_topology.get()),
+        body(this->get_primitive()->body),
         input_primitive_maps(this->get_primitive()->input_primitive_maps),
         output_primitive_maps(this->get_primitive()->output_primitive_maps),
         back_edges(this->get_primitive()->back_edges),
@@ -50,7 +47,7 @@ public:
     int64_t max_iteration;
 
     int64_t get_max_iteration() const { return max_iteration; }
-    program_impl::ptr get_body_program() const { return body_program; }
+    program::ptr get_body_program() const { return body_program; }
     bool is_current_iteration_used() const { return use_current_iteration; }
     bool is_execution_condition_used() const { return use_execution_condition; }
 
@@ -248,7 +245,7 @@ public:
 
         // add current_iteration primitive if current_iteration primitive is not exist in body
         if (body_topology_map.find(current_iteration_id) == body_topology_map.end()) {
-            body.add(std::make_shared<input_layout>(current_iteration_id, body_input_layout));
+            body.add_primitive(std::make_shared<input_layout>(current_iteration_id, body_input_layout));
         } else {
             const auto& body_input_prim = body.at(current_iteration_id);
             const auto input_layout_prim = std::dynamic_pointer_cast<input_layout>(body_input_prim);
@@ -265,11 +262,11 @@ public:
         auto mem = get_program().get_engine().allocate_memory(body_input_layout);
         auto& stream = get_program().get_stream();
         write_scalar_value(mem, stream, 1);
-        body.add(std::make_shared<data>(increment_value_id, mem));
+        body.add_primitive(std::make_shared<data>(increment_value_id, mem));
 
         // add eltwise sum updating current_iteration with incremental data
         const primitive_id updated_currnet_iteration_id = current_iteration_id + "_update";
-        body.add(std::make_shared<eltwise>(updated_currnet_iteration_id,
+        body.add_primitive(std::make_shared<eltwise>(updated_currnet_iteration_id,
             current_iteration_id, increment_value_id, eltwise_mode::sum));
 
         // set backedge
@@ -285,7 +282,7 @@ public:
             if (body_output == body_topology_map.end()) {
                 auto mem = get_program().get_engine().allocate_memory(body_output_layout);
                 auto md = std::make_shared<data>(id, mem);
-                body.add(md);
+                body.add_primitive(md);
             } else {
                 auto body_output_prim = body.at(body_output->first);
                 auto mem = get_program().get_engine().allocate_memory(body_output_layout);
@@ -301,7 +298,7 @@ public:
 
             // add inputs for body network if not exist
             if (body.get_primitives().count(internal_input_id) == 0) {
-                body.add(std::make_shared<input_layout>(internal_input_id, calculated_layout));
+                body.add_primitive(std::make_shared<input_layout>(internal_input_id, calculated_layout));
             } else {
                 body.change_input_layout(internal_input_id, calculated_layout);
             }
@@ -345,7 +342,7 @@ public:
         auto opts = get_program().get_options();
         std::vector<primitive_id> output_names_vec(output_names.begin(), output_names.end());
         opts.set_option(build_option::outputs(output_names_vec));
-        body_program = program_impl::build_program(get_program().get_engine(), body, opts, false, false, true);
+        body_program = program::build_program(get_program().get_engine(), body, opts, false, false, true);
     }
 
     const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; }
@@ -558,11 +555,13 @@ private:
     size_t current_iteratoin_backedge_mapping_idx = 0;
 
 public:
-    typed_primitive_inst(network_impl& network, const loop_node& node);
-    network_impl::ptr get_body_network() const { return body_network; }
+    typed_primitive_inst(network& network, const loop_node& node);
+    network::ptr get_body_network() const { return body_network; }
     void preprocess_input_memory();
     void preprocess_output_memory();
     void preprocess_backedge_memory();
+    void update_mapped_memory();
+    void set_output_memory(memory::ptr mem, bool check = true) override;
     const backedge_memory_mapping& get_current_iteration_backedge_mapping() const {
         if (!node.is_current_iteration_used()) {
             CLDNN_ERROR_MESSAGE(node.id(), "no backedge mapping for current_iteration");
@@ -571,7 +570,7 @@ public:
     }
 
 private:
-    network_impl::ptr body_network;
+    network::ptr body_network;
     memory::ptr get_external_memory(const primitive_id& external_id) const;
     std::vector<memory::ptr> get_sliced_mem(const primitive_id& internal_id) const;
 };
diff --git a/inference-engine/thirdparty/clDNN/src/include/lrn_inst.h b/inference-engine/thirdparty/clDNN/src/include/lrn_inst.h
index a7bcd3f33c0..01320d60dba 100644
--- a/inference-engine/thirdparty/clDNN/src/include/lrn_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/lrn_inst.h
@@ -22,7 +22,7 @@ public:
     static std::string to_string(lrn_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, lrn_node const& node);
+    typed_primitive_inst(network& network, lrn_node const& node);
 };
 
 using lrn_inst = typed_primitive_inst<lrn>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_input_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_input_inst.h
index 9ad70dffef0..0d7dbd41b08 100644
--- a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_input_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_input_inst.h
@@ -17,7 +17,7 @@ struct typed_program_node<lstm_dynamic_input> : public typed_program_node_base<l
     using parent = typed_program_node_base<lstm_dynamic_input>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog) : parent(prim, prog) {}
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog) : parent(prim, prog) {}
 
     program_node& input() const { return get_dependency(0); }
     program_node& dyn_length() const { return get_dependency(1); }
@@ -45,7 +45,7 @@ public:
     static std::string to_string(lstm_dynamic_input_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, lstm_dynamic_input_node const& node);
+    typed_primitive_inst(network& network, lstm_dynamic_input_node const& node);
 
     memory::ptr dyn_length_memory() const { return dep_memory_ptr(1); }
     memory::ptr weights_memory() const { return dep_memory_ptr(2); }
diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_inst.h
index fe281d35ff4..70618be4c75 100644
--- a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_inst.h
@@ -15,7 +15,7 @@ template <>
 struct typed_program_node<lstm_dynamic> : public typed_program_node_base<lstm_dynamic> {
     using parent = typed_program_node_base<lstm_dynamic>;
 
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog) : parent(prim, prog) {}
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog) : parent(prim, prog) {}
 
     program_node& input() const { return get_dependency(0); }
     float clip() const { return get_primitive()->clip; }
@@ -40,7 +40,7 @@ public:
     static layout calc_output_layout(lstm_dynamic_node const& node);
     static std::string to_string(lstm_dynamic_node const& node);
 
-    typed_primitive_inst(network_impl& network, lstm_dynamic_node const& node);
+    typed_primitive_inst(network& network, lstm_dynamic_node const& node);
 
     static void check_direction(program_node& node, int32_t direction, std::string name) {
         if (node.get_output_layout().size.spatial[1] != direction)
diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_timeloop_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_timeloop_inst.h
index b7ded70f51b..0dfeecb84c5 100644
--- a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_timeloop_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_timeloop_inst.h
@@ -27,7 +27,7 @@ private:
     }
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
         : parent(prim, prog) {
         init_params_list();
         can_share_buffer(false);
@@ -66,7 +66,7 @@ public:
     static std::string to_string(lstm_dynamic_timeloop_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, lstm_dynamic_timeloop_node const& node);
+    typed_primitive_inst(network& network, lstm_dynamic_timeloop_node const& node);
 
     memory::ptr dyn_length_memory() const { return get_dependency_memory("dyn_length"); }
     memory::ptr recurrent_memory() const { return get_dependency_memory("recurrent"); }
diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h
index e87d75dba08..61f4f207d7e 100644
--- a/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h
@@ -42,7 +42,7 @@ public:
     static std::string to_string(lstm_elt_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, lstm_elt_node const& node);
+    typed_primitive_inst(network& network, lstm_elt_node const& node);
 
     memory::ptr cell_memory() const { return dep_memory_ptr(1); }
     bool cell_term() const { return !argument.cell.empty(); }
diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_gemm_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_gemm_inst.h
index bc2ae664afe..ece11765e35 100644
--- a/inference-engine/thirdparty/clDNN/src/include/lstm_gemm_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/lstm_gemm_inst.h
@@ -38,7 +38,7 @@ public:
     static std::string to_string(lstm_gemm_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, lstm_gemm_node const& node);
+    typed_primitive_inst(network& network, lstm_gemm_node const& node);
 
     memory::ptr weights_memory() const { return dep_memory_ptr(1); }
     memory::ptr recurrent_memory() const { return dep_memory_ptr(2); }
diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_inst.h
index 2f94e236283..0e8648eac3a 100644
--- a/inference-engine/thirdparty/clDNN/src/include/lstm_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/lstm_inst.h
@@ -50,7 +50,7 @@ public:
     static std::string to_string(lstm_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, lstm_node const& node);
+    typed_primitive_inst(network& network, lstm_node const& node);
 
     memory& weights_memory() const { return dep_memory(1); }
     memory& recurrent_memory() const { return dep_memory(2); }
diff --git a/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h b/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h
index 543344011de..944b446e153 100644
--- a/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h
@@ -15,7 +15,7 @@ namespace cldnn {
 template <>
 struct typed_program_node<max_unpooling> : public typed_program_node_base<max_unpooling> {
     using parent = typed_program_node_base<max_unpooling>;
-    typed_program_node(const std::shared_ptr<max_unpooling> prim, program_impl& prog);
+    typed_program_node(const std::shared_ptr<max_unpooling> prim, program& prog);
 
 public:
     using parent::parent;
@@ -30,7 +30,7 @@ class typed_primitive_inst<max_unpooling> : public typed_primitive_inst_base<max
     using parent = typed_primitive_inst_base<max_unpooling>;
 
 public:
-    typed_primitive_inst(network_impl& network, max_unpooling_node const& desc);
+    typed_primitive_inst(network& network, max_unpooling_node const& desc);
     static layout calc_output_layout(max_unpooling_node const& node);
     static std::string to_string(max_unpooling_node const& node);
 };
diff --git a/inference-engine/thirdparty/clDNN/src/include/mutable_data_inst.h b/inference-engine/thirdparty/clDNN/src/include/mutable_data_inst.h
index 311873fb015..c156925a836 100644
--- a/inference-engine/thirdparty/clDNN/src/include/mutable_data_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/mutable_data_inst.h
@@ -16,7 +16,7 @@ template <>
 struct typed_program_node<mutable_data> : public typed_program_node_base<mutable_data> {
     using parent = typed_program_node_base<mutable_data>;
 
-    typed_program_node(const std::shared_ptr<mutable_data> prim, program_impl& prog);
+    typed_program_node(const std::shared_ptr<mutable_data> prim, program& prog);
 
     memory& get_attached_memory() const { return *mem; }
     memory::ptr get_attached_memory_ptr() const { return mem; }
@@ -38,7 +38,8 @@ public:
     static layout calc_output_layout(mutable_data_node const& node) { return node.get_attached_memory().get_layout(); }
     static std::string to_string(mutable_data_node const& node);
 
-    typed_primitive_inst(network_impl& network, mutable_data_node const& node);
+    typed_primitive_inst(network& network, mutable_data_node const& node);
+    void set_output_memory(memory::ptr mem, bool check = true) override;
 };
 
 using mutable_data_inst = typed_primitive_inst<mutable_data>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/mvn_inst.h b/inference-engine/thirdparty/clDNN/src/include/mvn_inst.h
index d4787d67608..0d1429605c1 100644
--- a/inference-engine/thirdparty/clDNN/src/include/mvn_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/mvn_inst.h
@@ -32,7 +32,7 @@ public:
     static std::string to_string(mvn_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, mvn_node const& node);
+    typed_primitive_inst(network& network, mvn_node const& node);
 };
 
 using mvn_inst = typed_primitive_inst<mvn>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/network_impl.h b/inference-engine/thirdparty/clDNN/src/include/network_impl.h
deleted file mode 100644
index ebef3ca5190..00000000000
--- a/inference-engine/thirdparty/clDNN/src/include/network_impl.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "cldnn/graph/network.hpp"
-#include "cldnn/runtime/engine.hpp"
-#include "cldnn/runtime/event.hpp"
-#include "cldnn/runtime/stream.hpp"
-#include "program_impl.h"
-#include "topology_impl.h"
-#include "impls/implementation_map.hpp"
-
-#include <map>
-#include <vector>
-#include <unordered_map>
-#include <string>
-#include <memory>
-#include <list>
-#include <set>
-
-namespace cldnn {
-
-class primitive_inst;
-
-struct network_impl {
-public:
-    using ptr = std::shared_ptr<network_impl>;
-    explicit network_impl(program_impl::ptr program, stream::ptr stream, bool is_internal = false, bool is_primary_stream = false);
-    network_impl(engine& engine,
-                 const topology_impl& topo,
-                 const build_options& options = build_options(),
-                 bool is_internal = false);
-    network_impl(engine& engine,
-                 const std::set<std::shared_ptr<program_node>>& nodes,
-                 const build_options& options,
-                 bool is_internal);
-    ~network_impl();
-
-
-    static ptr build_network(engine& engine,
-                             const topology_impl& topology,
-                             const build_options& options,
-                             bool is_internal = false);
-    static ptr build_network(engine& engine,
-                             const std::set<std::shared_ptr<program_node>>& nodes,
-                             const build_options& options,
-                             bool is_internal);
-
-    static ptr allocate_network(stream::ptr stream,
-                                program_impl::ptr program,
-                                bool is_internal = false,
-                                bool is_primary_stream = false);
-
-    static ptr allocate_network(engine& engine,
-                                program_impl::ptr program,
-                                bool is_internal = false,
-                                bool is_primary_stream = false);
-    program_impl::cptr get_program() const { return _program; }
-    program_impl::ptr get_program() { return _program; }
-    engine& get_engine() const { return _program->get_engine(); }
-
-    void reset_execution(bool wait = true);
-    void set_input_data(const primitive_id& id, memory::ptr data);
-    void set_output_memory(const primitive_id& id, memory::ptr mem);
-
-    void set_learning_rate(const float lr);
-    float get_learning_rate();
-
-    std::vector<std::shared_ptr<primitive_inst>> const& get_outputs() { return _outputs; }
-
-    const std::vector<std::shared_ptr<const primitive_inst>>& get_outputs() const {
-        return reinterpret_cast<const std::vector<std::shared_ptr<const primitive_inst>>&>(_outputs);
-    }
-
-    std::vector<primitive_id> get_output_ids() const;
-    std::vector<primitive_id> get_input_ids() const;
-    std::vector<primitive_id> get_executed_primitive_ids() const;
-    std::vector<primitive_id> get_all_primitive_ids() const;
-    std::vector<primitive_id> get_all_primitive_org_ids() const;
-    const program_impl::primitives_info& get_primitives_info() const;
-    const program_impl::graph_optimizer_info& get_optimizer_passes_info() const;
-    void execute(const std::vector<event::ptr>& events);
-    void validate_primitives();
-    void set_arguments();
-    // Implementation specific calls
-    std::shared_ptr<primitive_inst> get_primitive(const primitive_id& id);
-    std::string get_primitive_info(const primitive_id& id) const;
-    const event::ptr& get_primitive_event(const primitive_id& id) const { return _events.at(id); }
-    bool has_event(const primitive_id& id) const { return _events.count(id); }
-    std::vector<std::shared_ptr<primitive_inst>> get_primitives(const std::vector<primitive_id>& ids);
-    std::vector<std::shared_ptr<primitive_inst>> get_primitives(const std::vector<program_node*>& nodes);
-    void execute_primitive(const std::shared_ptr<primitive_inst>& primitive,
-                           const std::vector<event::ptr>& events);
-    void allocate_primitives();
-    void build_insts_deps();
-    uint32_t get_id() const { return net_id; }
-    stream& get_stream() const { return *_stream; }
-    stream::ptr get_stream_ptr() const { return _stream; }
-    void build_exec_order();
-    bool is_internal() const { return _internal; }
-    bool is_primary_stream() { return _is_primary_stream; }
-
-    /// Create memory object with specified @p layout and allocation @p type for primitive with @p id
-    /// Underlying memory handle can be reused with other primitives from memory pool based on @p dependencies
-    memory_ptr get_memory_from_pool(const layout& layout,
-                                    primitive_id id,
-                                    std::set<primitive_id> dependencies,
-                                    allocation_type type,
-                                    bool reusable = true);
-
-private:
-    uint32_t net_id = 0;
-    program_impl::ptr _program;
-    stream::ptr _stream;
-    std::unique_ptr<memory_pool> _memory_pool;
-    bool _internal;
-    bool _is_primary_stream;
-    bool _reset_arguments;
-    float _learning_rate = static_cast<float>(0.00001);
-
-    std::map<primitive_id, std::shared_ptr<primitive_inst>> _primitives;
-    std::vector<std::shared_ptr<primitive_inst>> _inputs;
-    std::vector<std::shared_ptr<primitive_inst>> _outputs;
-    std::list<std::shared_ptr<primitive_inst>> _exec_order;
-    std::list<std::shared_ptr<primitive_inst>> _data_outputs;
-
-    std::unordered_map<primitive_id, event::ptr> _events;
-
-    void allocate_primitive_instance(program_node const& node);
-    void transfer_memory_to_device(std::shared_ptr<primitive_inst> instance, program_node const& node);
-    void add_to_exec_order(const primitive_id& id);
-    std::shared_ptr<primitive_inst> find_in_internal_networks(const primitive_id& id);
-    std::shared_ptr<primitive_inst> find_primitive(const primitive_id& id);
-    void check_names();
-};
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/include/non_max_suppression_inst.h b/inference-engine/thirdparty/clDNN/src/include/non_max_suppression_inst.h
index 11e3151ec6b..36345261fe0 100644
--- a/inference-engine/thirdparty/clDNN/src/include/non_max_suppression_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/non_max_suppression_inst.h
@@ -17,7 +17,7 @@ struct typed_program_node<non_max_suppression> : public typed_program_node_base<
     using parent = typed_program_node_base<non_max_suppression>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog)
         : parent(prim, prog)
     {}
 
@@ -87,7 +87,7 @@ class typed_primitive_inst<non_max_suppression> : public typed_primitive_inst_ba
     using parent = typed_primitive_inst_base<non_max_suppression>;
 
 public:
-    typed_primitive_inst(network_impl& network, non_max_suppression_node const& node)
+    typed_primitive_inst(network& network, non_max_suppression_node const& node)
         : parent(network, node)
     {}
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/normalize_inst.h b/inference-engine/thirdparty/clDNN/src/include/normalize_inst.h
index 4f14fe2f3e8..6efaec3aa65 100644
--- a/inference-engine/thirdparty/clDNN/src/include/normalize_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/normalize_inst.h
@@ -33,7 +33,7 @@ public:
     static std::string to_string(normalize_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, normalize_node const& node);
+    typed_primitive_inst(network& network, normalize_node const& node);
 
     memory::ptr scale_memory() const { return dep_memory_ptr(1); }
 };
diff --git a/inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h b/inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h
index 0db8759d476..0067f87784a 100644
--- a/inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h
@@ -19,7 +19,7 @@ private:
 public:
     using parent::parent;
 
-    typed_program_node(const std::shared_ptr<one_hot> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<one_hot> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
     program_node& input() const { return get_dependency(0); }
@@ -34,7 +34,7 @@ class typed_primitive_inst<one_hot> : public typed_primitive_inst_base<one_hot>
 public:
     static layout calc_output_layout(one_hot_node const& node);
     static std::string to_string(one_hot_node const& node);
-    typed_primitive_inst(network_impl& network, one_hot_node const& node);
+    typed_primitive_inst(network& network, one_hot_node const& node);
 };
 
 using one_hot_inst = typed_primitive_inst<one_hot>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h
index f726e1ca1ef..156f21dfa2e 100644
--- a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h
+++ b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "layout_optimizer.h"
 #include "split_inst.h"
 #include "lstm_inst.h"
@@ -28,9 +28,9 @@ class base_pass {
 
 public:
     explicit base_pass(const std::string& pass_name) : name(pass_name) {}
-    virtual void run(program_impl& p) = 0;
+    virtual void run(program& p) = 0;
     std::string get_name() { return name; }
-    void clean_marks(program_impl& p) {
+    void clean_marks(program& p) {
         for (auto& node : p.get_processing_order()) {
             node->unmark();
         }
@@ -42,8 +42,8 @@ private:
 
 class pass_manager {
 public:
-    explicit pass_manager(program_impl& p);
-    void run(program_impl& p, base_pass& pass);
+    explicit pass_manager(program& p);
+    void run(program& p, base_pass& pass);
     uint32_t get_pass_count() { return pass_count; }
     uint32_t inc_pass_count() { return ++pass_count; }
     ~pass_manager() {}
@@ -58,8 +58,8 @@ public:
     add_required_reorders() : base_pass("add_required_reorders") {}
 
 private:
-    void run(program_impl& p) override;
-    void add_reorder(program_impl& p, program_node* node, program_node* usr);
+    void run(program& p) override;
+    void add_reorder(program& p, program_node* node, program_node* usr);
 };
 
 class add_reshape_to_primitives : public base_pass {
@@ -67,7 +67,7 @@ public:
     add_reshape_to_primitives() : base_pass("add_reshape_to_primitives_pass") {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class calculate_prior_boxes : public base_pass {
@@ -75,7 +75,7 @@ public:
     calculate_prior_boxes() : base_pass("calculated_prior_boxes") {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class compile_graph : public base_pass {
@@ -83,7 +83,7 @@ public:
     compile_graph() : base_pass("compile_graph") {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class eltwise_shrinking : public base_pass {
@@ -91,7 +91,7 @@ public:
     eltwise_shrinking() : base_pass("eltwise_shrinking") {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class eltwise_remove_stride : public base_pass {
@@ -99,8 +99,8 @@ public:
     eltwise_remove_stride() : base_pass("eltwise_remove_stride") {}
 
 private:
-    void run(program_impl& p) override;
-    void conv_stride_extend(program_impl& p, program_node& node, cldnn::tensor& tensor);
+    void run(program& p) override;
+    void conv_stride_extend(program& p, program_node& node, cldnn::tensor& tensor);
 };
 
 class graph_initializations : public base_pass {
@@ -108,11 +108,11 @@ public:
     graph_initializations() : base_pass("init") {}
 
 private:
-    void run(program_impl& p) override;
-    void handle_split_node(program_impl& p, split_node& node);
-    void handle_lstm_node(program_impl& p, lstm_node& node);
-    void handle_dynamic_lstm_node(program_impl& p, lstm_dynamic_node& node);
-    void set_outputs(program_impl& p);
+    void run(program& p) override;
+    void handle_split_node(program& p, split_node& node);
+    void handle_lstm_node(program& p, lstm_node& node);
+    void handle_dynamic_lstm_node(program& p, lstm_dynamic_node& node);
+    void set_outputs(program& p);
 };
 
 class handle_reshape : public base_pass {
@@ -120,7 +120,7 @@ public:
     handle_reshape() : base_pass("handle_reshape") {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class handle_input_padding : public base_pass {
@@ -128,7 +128,7 @@ public:
     handle_input_padding() : base_pass("handle_input_padding") {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class mark_nodes : public base_pass {
@@ -136,7 +136,7 @@ public:
     mark_nodes() : base_pass("analyzed_graph") {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class prepare_buffer_fusing : public base_pass {
@@ -144,7 +144,7 @@ public:
     prepare_buffer_fusing() : base_pass("prepare_buffer_fusing") {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class prepare_quantization : public base_pass {
@@ -152,13 +152,13 @@ public:
     prepare_quantization() : base_pass("prepare_quantization") {}
 
 private:
-    void run(program_impl& p) override;
-    void handle_quantize_node(program_impl& p, quantize_node& quantize_node);
-    void prepare_packed_quantize(program_impl& p, quantize_node& quantize_node);
-    void prepare_dequantize_merge(program_impl& p, eltwise_node& eltwise_node);
-    void remove_fake_reorders(program_impl& p, reorder_node& reorder_node);
-    void prepare_asymmetric_quantization(program_impl& p, convolution_node& convolution_node);
-    void prepare_scale_shift_opt(program_impl &p, quantize_node& quantize_node);
+    void run(program& p) override;
+    void handle_quantize_node(program& p, quantize_node& quantize_node);
+    void prepare_packed_quantize(program& p, quantize_node& quantize_node);
+    void prepare_dequantize_merge(program& p, eltwise_node& eltwise_node);
+    void remove_fake_reorders(program& p, reorder_node& reorder_node);
+    void prepare_asymmetric_quantization(program& p, convolution_node& convolution_node);
+    void prepare_scale_shift_opt(program &p, quantize_node& quantize_node);
 };
 
 class prepare_conv_eltw_fusing : public base_pass {
@@ -167,9 +167,9 @@ public:
         base_pass("prepare_conv_eltw_fusing"), _lo(lo_ref), b_fs_yx_fsv16_opt(b_fs_yx_fsv16_opt) {}
 
 private:
-    void run(program_impl& p) override;
-    void fuse_conv_eltwise(program_impl& p, program_node* node);
-    void fuse_conv_depth_to_space(program_impl& p, program_node* node);
+    void run(program& p) override;
+    void fuse_conv_eltwise(program& p, program_node* node);
+    void fuse_conv_depth_to_space(program& p, program_node* node);
     layout_optimizer& _lo;
     bool b_fs_yx_fsv16_opt;
 };
@@ -179,8 +179,8 @@ public:
     prepare_conv_eltw_read_write_opt() : base_pass("prepare_conv_eltw_read_write_opt") {}
 
 private:
-    void run(program_impl& p) override;
-    void conv_eltwise_read_write_opt(program_impl& p, program_node* node);
+    void run(program& p) override;
+    void conv_eltwise_read_write_opt(program& p, program_node* node);
 };
 
 class prepare_primitive_fusing : public base_pass {
@@ -189,13 +189,13 @@ public:
         base_pass("prepare_primitive_fusing"), _lo(lo_ref) {}
 
 private:
-    void run(program_impl& p) override;
-    void fuse_sigmoid_mul_to_swish(program_impl &p);
-    void fuse_bias(program_impl &p);
-    void fuse_reorders(program_impl& p);
-    void fuse_activations(program_impl& p);
-    void fuse_simple_primitives(program_impl &p);
-    void optimize_fused_ops(program_impl &p);
+    void run(program& p) override;
+    void fuse_sigmoid_mul_to_swish(program &p);
+    void fuse_bias(program &p);
+    void fuse_reorders(program& p);
+    void fuse_activations(program& p);
+    void fuse_simple_primitives(program &p);
+    void optimize_fused_ops(program &p);
     layout_optimizer& _lo;
 };
 
@@ -205,7 +205,7 @@ public:
         base_pass("pre_replace_deconv"), _lo(lo_ref) {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
     layout_optimizer& _lo;
 };
 
@@ -214,10 +214,10 @@ public:
     explicit pre_optimize_bias(reorder_factory& rf_ref);
 
 private:
-    void run(program_impl& p) override;
-    virtual void run(program_impl& p, reorder_factory& rf);
+    void run(program& p) override;
+    virtual void run(program& p, reorder_factory& rf);
     template <typename T>
-    void optimize_bias(T& node, reorder_factory& rf, program_impl& p);
+    void optimize_bias(T& node, reorder_factory& rf, program& p);
     reorder_factory& _rf;
 };
 
@@ -227,7 +227,7 @@ public:
         : base_pass("prepare_padding"), output_size_handling_enabled(output_size_handling_enabled_switch) {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
     bool output_size_handling_enabled;
 };
 
@@ -236,8 +236,8 @@ public:
     post_input_reorder() : base_pass("post_input_reorder") {}
 
 private:
-    void run(program_impl& p) override;
-    program_node& add_reorder(program_impl& p, program_node* node, program_node* usr, const layout& reorder_layout);
+    void run(program& p) override;
+    program_node& add_reorder(program& p, program_node* node, program_node* usr, const layout& reorder_layout);
 };
 
 class post_optimize_weights : public base_pass {
@@ -256,11 +256,11 @@ private:
         {}
     };
 
-    void run(program_impl& p) override;
+    void run(program& p) override;
     template<typename T>
     weights_bias_offset get_weights_bias_offset(const T& node);
     template<typename T>
-    void optimize_weights(T& node, program_impl& p);
+    void optimize_weights(T& node, program& p);
     reorder_factory& _rf;
 };
 
@@ -269,12 +269,12 @@ public:
     propagate_constants() : base_pass("propagate_constants") {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
     std::list<std::pair<primitive_id, memory::ptr>> calculate(engine& engine, build_options bo);
     bool has_non_const_user(program_node& node) const;
-    void handle_constant(program_impl& prog, program_node& node);
-    void add_constant(program_impl& prog, program_node& node);
-    void add_deps_to_tpl(program_impl& prog, const std::vector<program_node*>& node);
+    void handle_constant(program& prog, program_node& node);
+    void add_constant(program& prog, program_node& node);
+    void add_deps_to_tpl(program& prog, const std::vector<program_node*>& node);
 
     bool has_non_trivial_constants = false;
     std::list<typed_program_node<data>*> const_inputs;
@@ -286,7 +286,7 @@ class remove_redundant_reorders : public base_pass {
 public:
     explicit remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing = false, bool update_implementations = false,
         bool remove_output_reorders = false);
-    void run(program_impl& p) override;
+    void run(program& p) override;
 
 private:
     layout_optimizer& lo;
@@ -300,8 +300,8 @@ public:
     reorder_inputs(layout_optimizer& lo_ref, reorder_factory& rf_ref);
 
 private:
-    void run(program_impl& p) override;
-    virtual void run(program_impl& p, layout_optimizer& lo, reorder_factory& rf);
+    void run(program& p) override;
+    virtual void run(program& p, layout_optimizer& lo, reorder_factory& rf);
     layout_optimizer& _lo;
     reorder_factory& _rf;
 };
@@ -311,19 +311,19 @@ public:
     trim_to_outputs() : base_pass("trimmed") {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class strided_slice_optimize : public base_pass {
 public:
     strided_slice_optimize() : base_pass("strided_slice_optimize") {}
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class reverse_optional_nodes_outputs : public base_pass {
 public:
     reverse_optional_nodes_outputs() : base_pass("reverse_optional_nodes_outputs") {}
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class concat_input_order : public base_pass {
@@ -345,7 +345,7 @@ class concat_input_order : public base_pass {
     // - no fused primitives
 public:
     concat_input_order() : base_pass("concat_input_order") {}
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class memory_dependency_pass : public base_pass {
@@ -369,19 +369,19 @@ public:
 class basic_memory_dependencies : public memory_dependency_pass {
 public:
     basic_memory_dependencies() : memory_dependency_pass("basic_memory_dependencies") {}
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class skipped_branch_memory_dependencies : public memory_dependency_pass {
 public:
     skipped_branch_memory_dependencies() : memory_dependency_pass("skipped_branch_memory_dependencies") {}
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class oooq_memory_dependencies : public memory_dependency_pass {
 public:
     oooq_memory_dependencies() : memory_dependency_pass("oooq_memory_dependencies") {}
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 class update_loop_primitive_map : public base_pass {
@@ -389,7 +389,7 @@ public:
     update_loop_primitive_map() : base_pass("update_loop_primitive_map") {}
 
 private:
-    void run(program_impl& p) override;
+    void run(program& p) override;
 };
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/include/permute_inst.h b/inference-engine/thirdparty/clDNN/src/include/permute_inst.h
index b6250a2b54d..5444b3dc20d 100644
--- a/inference-engine/thirdparty/clDNN/src/include/permute_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/permute_inst.h
@@ -15,7 +15,7 @@ namespace cldnn {
 template <>
 struct typed_program_node<permute> : public typed_program_node_base<permute> {
     using parent = typed_program_node_base<permute>;
-    typed_program_node(const std::shared_ptr<permute> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<permute> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
 
@@ -36,7 +36,7 @@ public:
     static std::string to_string(permute_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, permute_node const& node);
+    typed_primitive_inst(network& network, permute_node const& node);
 };
 
 using permute_inst = typed_primitive_inst<permute>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h b/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h
index 6bcc208cefb..fa1649a97ac 100644
--- a/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h
@@ -15,7 +15,7 @@ namespace cldnn {
 template <>
 struct typed_program_node<pooling> : public typed_program_node_base<pooling> {
     using parent = typed_program_node_base<pooling>;
-    typed_program_node(const std::shared_ptr<pooling> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<pooling> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h b/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
index 52d1fd4694c..863d2267dc1 100644
--- a/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
@@ -9,6 +9,7 @@
 #include "cldnn/runtime/error_handler.hpp"
 #include "cldnn/runtime/event.hpp"
 #include "cldnn/runtime/memory.hpp"
+#include "cldnn/graph/network.hpp"
 #include "kernel_selector_helper.h"
 #include "meta_utils.h"
 #include "program_node.h"
@@ -20,7 +21,6 @@
 
 namespace cldnn {
 
-struct network_impl;
 class primitive_inst;
 
 template <class PType>
@@ -86,9 +86,9 @@ public:
     bool can_be_optimized() const { return _node.can_be_optimized(); }
     std::shared_ptr<const primitive> desc() const { return _node.get_primitive(); }
     program_node const& get_node() const { return _node; }
-    network_impl& get_network() const { return _network; }
+    network& get_network() const { return _network; }
     uint32_t get_network_id() const;
-    void set_output_memory(memory::ptr mem);
+    virtual void set_output_memory(memory::ptr mem, bool check = true);
     void check_memory_to_set(const memory& mem, const layout& layout) const;
     const std::list<const cldnn::program_node *>& get_users() const { return _node.get_users(); }
 
@@ -141,9 +141,9 @@ public:
     }
 
 protected:
-    primitive_inst(network_impl& network, program_node const& node, bool allocate_memory);
+    primitive_inst(network& network, program_node const& node, bool allocate_memory);
 
-    network_impl& _network;
+    network& _network;
     program_node const& _node;
 
     std::unique_ptr<primitive_impl> _impl;
@@ -242,14 +242,14 @@ public:
     const typed_node& node;
     const PType& argument;
 
-    typed_primitive_inst_base(network_impl& network, typed_node const& node)
+    typed_primitive_inst_base(network& network, typed_node const& node)
         : typed_primitive_inst_base(network, node, do_allocate_memory(node)) {}
 
 protected:
-    typed_primitive_inst_base(network_impl& network, typed_node const& node, bool allocate_memory)
+    typed_primitive_inst_base(network& network, typed_node const& node, bool allocate_memory)
         : primitive_inst(network, node, allocate_memory), node(_node), argument(*node.get_primitive()) {}
 
-    typed_primitive_inst_base(network_impl& network, typed_node const& node, memory::ptr buffer)
+    typed_primitive_inst_base(network& network, typed_node const& node, memory::ptr buffer)
         : typed_primitive_inst_base(network, node, false) {
         _output = buffer;
     }
diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_type.h b/inference-engine/thirdparty/clDNN/src/include/primitive_type.h
index 53425561413..89e4e85018c 100644
--- a/inference-engine/thirdparty/clDNN/src/include/primitive_type.h
+++ b/inference-engine/thirdparty/clDNN/src/include/primitive_type.h
@@ -11,20 +11,20 @@
 #include <string>
 
 namespace cldnn {
-struct network_impl;
+struct network;
 class engine;
 struct program_node;
 struct primitive_impl;
 class primitive_inst;
-struct program_impl;
+struct program;
 struct primitive;
 
 struct primitive_type {
     virtual ~primitive_type() = default;
 
-    virtual std::shared_ptr<program_node> create_node(program_impl& program,
+    virtual std::shared_ptr<program_node> create_node(program& program,
                                                       const std::shared_ptr<primitive> prim) const = 0;
-    virtual std::shared_ptr<primitive_inst> create_instance(network_impl& network,
+    virtual std::shared_ptr<primitive_inst> create_instance(network& network,
                                                             const program_node& node) const = 0;
     virtual std::unique_ptr<primitive_impl> choose_impl(const program_node& node) const = 0;
     virtual bool does_an_implementation_exist(const program_node& node) const = 0;
diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h b/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h
index 5e34a7bba5f..58cbe823a17 100644
--- a/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h
+++ b/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h
@@ -11,7 +11,7 @@
 #include "primitive_type.h"
 #include "program_node.h"
 #include "primitive_inst.h"
-#include "network_impl.h"
+#include "cldnn/graph/network.hpp"
 #include "impls/implementation_map.hpp"
 
 #include <memory>
@@ -20,7 +20,7 @@
 namespace cldnn {
 template <class PType>
 struct primitive_type_base : primitive_type {
-    std::shared_ptr<cldnn::program_node> create_node(program_impl& program,
+    std::shared_ptr<cldnn::program_node> create_node(program& program,
                                                      const std::shared_ptr<primitive> prim) const override {
         if (prim->type != this)
             throw std::invalid_argument("primitive_type_base::create_node: primitive type mismatch");
@@ -28,7 +28,7 @@ struct primitive_type_base : primitive_type {
         return std::make_shared<typed_program_node<PType>>(std::static_pointer_cast<PType>(prim), program);
     }
 
-    std::shared_ptr<cldnn::primitive_inst> create_instance(network_impl& network,
+    std::shared_ptr<cldnn::primitive_inst> create_instance(network& network,
                                                            const cldnn::program_node& node) const override {
         if (node.type() != this)
             throw std::invalid_argument("primitive_type_base::create_instance: primitive type mismatch");
diff --git a/inference-engine/thirdparty/clDNN/src/include/prior_box_inst.h b/inference-engine/thirdparty/clDNN/src/include/prior_box_inst.h
index 16c6681509f..9099ed068be 100644
--- a/inference-engine/thirdparty/clDNN/src/include/prior_box_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/prior_box_inst.h
@@ -16,7 +16,7 @@ template <>
 struct typed_program_node<prior_box> : typed_program_node_base<prior_box> {
     using parent = typed_program_node_base<prior_box>;
 
-    typed_program_node(std::shared_ptr<prior_box> prim, program_impl& prog);
+    typed_program_node(std::shared_ptr<prior_box> prim, program& prog);
 
     program_node& input() const { return get_dependency(0); }
 
@@ -39,7 +39,7 @@ public:
     static std::string to_string(prior_box_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, prior_box_node const& node);
+    typed_primitive_inst(network& network, prior_box_node const& node);
 
     memory& input_memory() const { return dep_memory(0); }
 };
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h b/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h
index 381b1ea9cdf..b46f09d5c98 100644
--- a/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h
+++ b/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h
@@ -4,7 +4,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "program_node.h"
 #include <fstream>
 #include <string>
@@ -13,8 +13,8 @@ namespace cldnn {
 std::string get_dir_path(build_options);
 std::string get_serialization_network_name(build_options);
 
-void dump_graph_optimized(std::ofstream&, const program_impl&);
-void dump_graph_processing_order(std::ofstream&, const program_impl&);
-void dump_graph_init(std::ofstream&, const program_impl&, std::function<bool(program_node const&)> const&);
-void dump_graph_info(std::ofstream&, const program_impl&, std::function<bool(program_node const&)> const&);
+void dump_graph_optimized(std::ofstream&, const program&);
+void dump_graph_processing_order(std::ofstream&, const program&);
+void dump_graph_init(std::ofstream&, const program&, std::function<bool(program_node const&)> const&);
+void dump_graph_info(std::ofstream&, const program&, std::function<bool(program_node const&)> const&);
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_helpers.h b/inference-engine/thirdparty/clDNN/src/include/program_helpers.h
index 60291deb1e7..564e4f15238 100644
--- a/inference-engine/thirdparty/clDNN/src/include/program_helpers.h
+++ b/inference-engine/thirdparty/clDNN/src/include/program_helpers.h
@@ -8,7 +8,7 @@
 
 #include "program_node.h"
 #include "cldnn/runtime/engine.hpp"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "data_inst.h"
 
 #include <string>
@@ -31,26 +31,26 @@ struct program_helpers {
     //
     // T& case -> returns container which holds T&
     template <class T>
-    static program_impl::single_element_container<T> wrap_if_single(T& t) {
-        return program_impl::single_element_container<T>(t);
+    static program::single_element_container<T> wrap_if_single(T& t) {
+        return program::single_element_container<T>(t);
     }
 
     // helper function which creates single-element array if it's given anything
     // other than std::vector.
     // T const& case -> returns container which holds T const&
     template <class T>
-    static program_impl::single_element_container<T const> wrap_if_single(T const& t) {
-        return program_impl::single_element_container<T const>(t);
+    static program::single_element_container<T const> wrap_if_single(T const& t) {
+        return program::single_element_container<T const>(t);
     }
 
     // helper function which creates single-element array if it's given anything
     // other than std::vector.
     // T&& case -> returns container which holds new instance of T created by moving given param
     template <class T>
-    static program_impl::single_element_container<T> wrap_if_single(T&& t) {
+    static program::single_element_container<T> wrap_if_single(T&& t) {
         static_assert(meta::always_false<T>::value,
                       "Wrapping temporary object into single_element_container is an error (requires valid reference)");
-        return program_impl::single_element_container<T>(t);
+        return program::single_element_container<T>(t);
     }
 
     // helper function which creates single-element array if it's given anything
@@ -131,7 +131,7 @@ struct program_helpers {
 // and overload match and optimize methods.
 template <typename Impl>
 struct pattern_match_optimization {
-    pattern_match_optimization(program_impl& prog)
+    pattern_match_optimization(program& prog)
         : prog(prog)
     {}
 
@@ -154,9 +154,9 @@ struct pattern_match_optimization {
         return optimize(node);
     }
 
-    program_impl& get_program() { return prog; }
+    program& get_program() { return prog; }
 
-    program_impl& prog;
+    program& prog;
 };
 
 // Class for pattern-match optimizations that provides support for matching
@@ -205,15 +205,15 @@ bool run_node_optimizations(program_node& node, Opt&& opt, Rest&&... rest) {
 }
 
 // Runs pattern-match optimizations `Opts` on `node`.
-// Optimizations should have constructor with single argument `program_impl&`.
+// Optimizations should have constructor with single argument `program&`.
 template <typename... Opts>
-bool run_node_optimizations(program_impl& p, program_node& node) {
+bool run_node_optimizations(program& p, program_node& node) {
     return run_node_optimizations<Opts...>(node, Opts(p)...);
 }
 
 // Runs specified pattern-match optimizations on whole program, in processing order.
 template <typename... Opts>
-void run_node_optimizations(program_impl& p, Opts&&... opts) {
+void run_node_optimizations(program& p, Opts&&... opts) {
     auto it = p.get_processing_order().begin();
     while (it != p.get_processing_order().end()) {
         auto node = *it++;
@@ -222,7 +222,7 @@ void run_node_optimizations(program_impl& p, Opts&&... opts) {
 }
 
 template <typename... Opts>
-void run_node_optimizations(program_impl& p) {
+void run_node_optimizations(program& p) {
     run_node_optimizations(p, Opts(p)...);
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_impl.h b/inference-engine/thirdparty/clDNN/src/include/program_impl.h
deleted file mode 100644
index df45537f00a..00000000000
--- a/inference-engine/thirdparty/clDNN/src/include/program_impl.h
+++ /dev/null
@@ -1,339 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-#pragma once
-
-#include "cldnn/graph/program.hpp"
-#include "cldnn/runtime/engine.hpp"
-#include "cldnn/runtime/stream.hpp"
-#include "runtime/kernels_cache.hpp"
-
-#include <list>
-#include <string>
-#include <vector>
-#include <memory>
-#include <map>
-#include <utility>
-#include <set>
-
-namespace kernel_selector {
-class TuningCache;
-}  // namespace kernel_selector
-
-namespace cldnn {
-
-struct topology_impl;
-struct primitive_impl;
-struct program_node;
-class layout_optimizer;
-class pass_manager;
-class base_pass;
-class program_impl_wrapper;
-
-struct gpu_program_state {
-    kernels_cache _kernels_cache;
-
-    gpu_program_state(engine& engine) : _kernels_cache(engine) {}
-};
-
-/*
-    cldnn_program implementation
-*/
-struct program_impl {
-    using ptr = std::shared_ptr<program_impl>;
-    using cptr = std::shared_ptr<const program_impl>;
-    friend class calculate_prior_boxes;      // to be removed when possible
-    friend class graph_initializations;      // to be removed when possible
-    friend class prepare_padding;            // to be removed when possible
-    friend class propagate_constants;        // to be removed when possible
-    friend class pre_replace_deconv;         // to be removed when possible
-    friend class prepare_primitive_fusing;   // to be removed when possible
-    friend class prepare_quantization;       // to be removed when possible
-    friend class prepare_conv_eltw_fusing;   // to be removed when possible
-    friend class reorder_inputs;             // to be removed when possible
-    friend class remove_redundant_reorders;  // to be removed when possible
-    friend class program_impl_wrapper;       // this class is intended to extend the interface of program_impl for
-                                             // the usage within tests_core_internal project only
-public:
-    struct nodes_ordering {
-    public:
-        typedef std::list<program_node*> list_of_nodes;
-        typedef list_of_nodes::const_iterator const_iterator;
-        typedef list_of_nodes::const_reverse_iterator const_reverse_iterator;
-        typedef list_of_nodes::iterator node_iterator;
-        typedef list_of_nodes::reverse_iterator node_reverse_iterator;
-        const_iterator begin() const { return _processing_order.begin(); }
-        const_iterator end() const { return _processing_order.end(); }
-        const_reverse_iterator rbegin() const { return _processing_order.rbegin(); }
-        const_reverse_iterator rend() const { return _processing_order.rend(); }
-
-        void calc_processing_order_visit(program_node* node);
-        void calc_processing_order(program_impl& p);
-        int32_t get_processing_number(program_node* node) const {
-            return get_processing_number(get_processing_iterator(*node));
-        }
-        // int32_t get_processing_number(const_iterator iter) const { return 1+(int32_t)std::distance(begin(), iter); }
-        int32_t get_processing_number(node_iterator iter) const {
-            return 1 + (int32_t)std::distance(_processing_order.begin(), const_iterator(iter));
-        }
-        void calculate_BFS_processing_order();
-        size_t size() { return _processing_order.size(); }
-        bool is_correct(program_node* node);
-
-        node_iterator get_processing_iterator(program_node& node) const { return processing_order_iterators.at(&node); }
-        void clear() {
-            processing_order_iterators.clear();
-            _processing_order.clear();
-        }
-
-        void insert(program_node* key_node, program_node* node) {
-            node_iterator _where = processing_order_iterators.at(key_node);
-            processing_order_iterators[node] = _processing_order.insert(_where, node);
-        }
-
-        void insert_next(program_node* key_node, program_node* node) {
-            node_iterator _where = std::next(processing_order_iterators.at(key_node));
-            processing_order_iterators[node] = _processing_order.insert(_where, node);
-        }
-
-        void erase(program_node* key_node) {
-            node_iterator i = processing_order_iterators.at(key_node);
-            processing_order_iterators.erase(key_node);
-            _processing_order.erase(i);
-        }
-
-    private:
-        list_of_nodes _processing_order;
-        std::map<program_node*, node_iterator> processing_order_iterators;
-    };
-
-    template <class T>
-    struct single_element_container {
-        explicit single_element_container(T& t) : elem(&t) {}
-        constexpr size_t size() const { return 1; }
-        single_element_container begin() const { return single_element_container(elem); }
-        single_element_container end() const { return single_element_container(nullptr); }
-        single_element_container& operator++() {
-            elem = nullptr;
-            return *this;
-        }
-        bool operator!=(single_element_container const& sec) { return elem != sec.elem; }
-
-        T operator*() { return *elem; }
-
-    private:
-        explicit single_element_container(T* t) : elem(t) {}
-
-        T* elem;
-    };
-
-    typedef std::vector<primitive_info> primitives_info;
-    typedef std::vector<std::pair<std::string, primitives_info>> graph_optimizer_info;
-    typedef std::pair<primitive_id, std::vector<primitive_id>> optimized_info;
-
-    program_impl(engine& engine_ref,
-                 topology_impl const& topology,
-                 build_options const& options,
-                 bool is_internal,
-                 bool no_optimizations = false,
-                 bool is_body_program = false);
-    /* constructor used to build a program from subset of nodes of other program (used in propagate_constants) */
-    program_impl(engine& engine_ref,
-                 std::set<std::shared_ptr<program_node>> const& nodes,
-                 build_options const& options,
-                 bool is_internal);
-    ~program_impl();
-    engine& get_engine() const { return _engine; }
-    const build_options& get_options() const { return options; }
-    std::list<program_node*>& get_inputs() {
-        return inputs;
-    }  // ToDo: redesign trim to ouptut pass to make it const as_well as get_engine and get options
-    std::vector<program_node*>& get_outputs() {
-        return outputs;
-    }  // ToDo: redesign reorder-inputs pass to make it const as_well as get_engine and get options
-    bool is_loop_body() const { return is_body_program; }
-    bool is_debug_build() const { return options.get<build_option_type::debug>()->enabled(); }
-    const nodes_ordering& get_processing_order() const;
-    nodes_ordering& get_processing_order();
-    uint32_t get_prog_id() { return prog_id; }
-    stream& get_stream() { return *_stream; }
-    const std::list<primitive_id>& get_optimized_out() const { return optimized_out; }
-    const std::list<optimized_info>& get_optimized() const { return optimized; }
-    bool has_node(const primitive_id& prim) const { return nodes_map.count(prim) > 0; }
-    program_node& get_node(primitive_id const& id);
-    program_node const& get_node(primitive_id const& id) const;
-    std::shared_ptr<program_node> get_node_ptr(const primitive_id& prim) { return nodes_map.at(prim); }
-    std::shared_ptr<program_node> get_node_ptr(const primitive_id& prim) const { return nodes_map.at(prim); }
-
-    // returns already existing program_node for given primitive 'prim' (lookup in 'nodes_map')
-    // if it was previously created, otherwise creates and then returns program_node
-    program_node& get_or_create(std::shared_ptr<primitive> prim);
-
-    // Inserts given program_node 'node' as an intermediate node between 'next' and it's
-    //  dependency at 'prev_idx' index.
-    void add_intermediate(program_node& node,
-                          program_node& next,
-                          size_t prev_idx,
-                          bool connect_int_node_with_old_dep = true,
-                          bool move_usrs_of_prev_to_node = false);
-
-    // Gets or creates program_node for given primitive 'prim' and inserts it as an intermediate
-    // node between 'next' and it's dependency at 'prev_idx' index.
-    void add_intermediate(std::shared_ptr<primitive> prim,
-                          program_node& next,
-                          size_t prev_idx,
-                          bool connect_int_node_with_old_dep = true,
-                          bool move_usrs_of_prev_to_node = false);
-
-    // Inserts given program_node 'node' as an intermediate node between 'next' and it's
-    //  dependency prev
-    void add_intermediate(program_node& node,
-                          program_node& next,
-                          program_node& prev,
-                          bool connect_int_node_with_old_dep = true,
-                          bool move_usrs_of_prev_to_node = false);
-
-    // removes a node from the graph and deletes it afterwards,
-    // prereq: node cannot be marked as output and has to have exactly one dependency
-    // returns if 'node' has been extracted and removed successfully
-    bool extract_and_remove(program_node& node);
-
-    // Fuses two nodes into fused_node and removes peer_node from graph
-    void fuse_nodes(program_node& fused_node, program_node& peer_node, std::map<primitive_id, std::vector<primitive_id>>* fusing_history);
-
-    // returns if 'node' has been removed
-    bool remove_if_dangling(program_node& node);
-
-    void mark_if_constant(program_node& node);
-    // mark if the node is in data flow assuming that all dependencies are marked properly
-    void mark_if_data_flow(program_node& node);
-    // Reverses connection - user becomes dependency.
-
-    void remove_nodes(std::vector<program_node*>& to_remove);
-    void dump_program(const char* stage,
-                      bool with_full_info,
-                      std::function<bool(program_node const&)> const& filter = nullptr) const;
-
-    const primitives_info& get_primitives_info() const;
-    const graph_optimizer_info& get_optimizer_passes_info() const;
-    void save_pass_info(std::string pass_name);
-
-    void add_optimized_primitive_info(primitive_id optimized_primitive_id, std::vector<primitive_id> replaced_with_ids = {});
-
-    void reset_program();
-    uint32_t get_id() const { return prog_id; }
-
-    static ptr build_program(engine& engine,
-                             const topology_impl& topology,
-                             const build_options& options,
-                             bool is_internal = false,
-                             bool no_optimizations = false,
-                             bool is_body_program = false);
-    static ptr build_program(engine& engine,
-                             const std::set<std::shared_ptr<program_node>>& nodes,
-                             const build_options& options,
-                             bool is_internal);
-    static void init_primitives();
-    void compile();
-    void init_kernels();
-    kernel_id add_kernel(const std::shared_ptr<kernel_string> kernel_sring);
-    kernel::ptr get_kernel(kernel_id id);
-
-    void load_tuning_cache();
-    std::shared_ptr<kernel_selector::TuningCache> get_tuning_cache() const { return tuning_cache; }
-
-private:
-    uint32_t prog_id = 0;
-    engine& _engine;
-    stream::ptr _stream;
-    gpu_program_state program_state;
-    build_options options;
-    std::list<program_node*> inputs;
-    std::vector<program_node*> outputs;
-    nodes_ordering processing_order;
-    std::unique_ptr<pass_manager> pm;
-    std::shared_ptr<kernel_selector::TuningCache> tuning_cache;
-    bool is_body_program;
-
-
-    std::map<primitive_id, std::shared_ptr<program_node>> nodes_map;
-    std::list<primitive_id> optimized_out;
-
-    std::list<optimized_info> optimized;
-    primitives_info prim_info;
-    graph_optimizer_info optimizer_passes_info;
-
-    primitives_info get_current_stage_info() const;
-    /*
-    ** High-level functions, in order of usage
-    */
-    /* build nodes internal structure based on topology */
-    void prepare_nodes(topology_impl const& topology);
-    /* build nodes internal structure based on the subset of nodes of other program  (used in propagate_constants) */
-    void prepare_nodes(std::set<std::shared_ptr<program_node>> const& nodes);
-    void add_node_dependencies(program_node* node_ptr);
-    void copy_node_dependencies(program_node* dest, program_node* src);
-    void build_program(bool is_internal);
-    void init_graph();
-    void set_options();
-    void set_layout_optimizer_attributes(layout_optimizer& lo);
-
-    void apply_opt_pass(base_pass& pass);
-
-    template <class Pass, typename... Args>
-    typename std::enable_if<std::is_base_of<base_pass, Pass>::value &&
-                            std::is_constructible<Pass, Args...>::value>::type
-    apply_opt_pass(Args&&... args) {
-        auto pass = Pass(std::forward<Args>(args)...);
-        apply_opt_pass(pass);
-    }
-
-    void run_graph_compilation();
-    void pre_optimize_graph(bool is_internal);
-    void post_optimize_graph(bool is_internal);
-    void cleanup();
-    void transfer_memory_to_device();
-
-    /*
-    ** Analysis functions
-    */
-    // TODO: Remove once we will get full support for input/output padding in all primitive implementations.
-    bool analyze_output_size_handling_need();
-
-    /*
-    ** Optimization functions
-    */
-    void apply_needed_padding(program_node& node, program_node& prev_node, const padding& needed_padding);
-
-    /*
-    ** Memory pool functions
-    */
-    void prepare_memory_dependencies();
-    std::string get_memory_dependencies_string() const;
-
-    /*
-    ** Utilities
-    */
-    void add_split_outputs();
-    // mark if the node is constant assuming that all dependencies are marked properly
-    void reverse_connection(program_node& dep_node, program_node& user_node);
-
-    void add_connection(program_node& prev, program_node& next);
-
-    void remove_connection(program_node& prev, program_node& next);
-
-    void remove_all_connections(program_node& node);
-
-    void rename(program_node& node, primitive_id const& new_id);
-    void swap_names(program_node& node1, program_node& node2);
-    void replace_all_usages(program_node& old_node, program_node& new_node);
-
-    // old_node - node which will be replaced
-    // new_node - node which will replace the old one
-    void replace(program_node& old_node, program_node& new_node);
-};
-
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_node.h b/inference-engine/thirdparty/clDNN/src/include/program_node.h
index d0be0db6039..2b38d85d966 100644
--- a/inference-engine/thirdparty/clDNN/src/include/program_node.h
+++ b/inference-engine/thirdparty/clDNN/src/include/program_node.h
@@ -20,7 +20,7 @@
 
 namespace cldnn {
 
-struct program_impl;
+struct program;
 struct primitive_impl;
 class reorder_inputs;
 class graph_initializations;
@@ -56,7 +56,7 @@ struct fused_primitive_desc {
     to API level where all primitives store only ids of related ones.
 */
 struct program_node {
-    friend struct program_impl;                     // to be removed when possible
+    friend struct program;                     // to be removed when possible
     friend class compile_graph;                     // to be removed when possible
     friend class graph_initializations;             // to be removed when possible
     friend class pre_replace_deconv;                // to be removed when possible
@@ -70,7 +70,7 @@ struct program_node {
     template <class PType>
     friend struct typed_program_node;
 
-    program_node(std::shared_ptr<primitive> prim, program_impl& prog);
+    program_node(std::shared_ptr<primitive> prim, program& prog);
 
     program_node(program_node const&) = delete;
 
@@ -89,8 +89,8 @@ public:
         return type() == PType::type_id();
     }
 
-    program_impl& get_program() { return myprog; }
-    program_impl& get_program() const { return myprog; }
+    program& get_program() { return myprog; }
+    program& get_program() const { return myprog; }
 
     primitive_impl* get_selected_impl() const { return selected_impl.get(); }
     void set_selected_impl(std::unique_ptr<primitive_impl> impl);
@@ -316,7 +316,7 @@ protected:
     std::string unique_id;
 
     std::shared_ptr<primitive> desc;
-    program_impl& myprog;
+    program& myprog;
 
     std::unique_ptr<primitive_impl> selected_impl;
 
@@ -373,7 +373,7 @@ struct typed_program_node_base : public program_node {
     friend class cldnn::graph_initializations;
     friend class cldnn::pre_replace_deconv;
     friend class cldnn::prepare_quantization;
-    friend struct cldnn::program_impl;
+    friend struct cldnn::program;
     friend class cldnn::reorder_inputs;
 
 public:
diff --git a/inference-engine/thirdparty/clDNN/src/include/proposal_inst.h b/inference-engine/thirdparty/clDNN/src/include/proposal_inst.h
index 3c67afe4003..8ebbd259e68 100644
--- a/inference-engine/thirdparty/clDNN/src/include/proposal_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/proposal_inst.h
@@ -67,7 +67,7 @@ public:
     static std::string to_string(proposal_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, proposal_node const& desc);
+    typed_primitive_inst(network& network, proposal_node const& desc);
 
     const std::vector<anchor>& get_anchors() const { return _anchors; }
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h b/inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h
index 829f5228f61..afda5677342 100644
--- a/inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h
@@ -15,7 +15,7 @@ struct typed_program_node<pyramid_roi_align> : public typed_program_node_base<py
     using parent = typed_program_node_base<pyramid_roi_align>;
 
 public:
-    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog) : parent(prim, prog) {}
+    typed_program_node(std::shared_ptr<primitive> prim, program& prog) : parent(prim, prog) {}
 
     program_node& input() const { return get_dependency(0); }
     // program_node& boxes() const { return get_dependency(0); }
@@ -34,7 +34,7 @@ class typed_primitive_inst<pyramid_roi_align> : public typed_primitive_inst_base
 public:
     static layout calc_output_layout(pyramid_roi_align_node const& node);
     static std::string to_string(pyramid_roi_align_node const& node);
-    typed_primitive_inst(network_impl& network, pyramid_roi_align_node const& node);
+    typed_primitive_inst(network& network, pyramid_roi_align_node const& node);
 
     memory& input() const { return dep_memory(0); }
     memory& P2() const { return dep_memory(1); }
diff --git a/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h b/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h
index 1d680ecb3cc..a3ef5c7695b 100644
--- a/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h
@@ -114,7 +114,7 @@ public:
     static std::string to_string(quantize_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, quantize_node const& desc);
+    typed_primitive_inst(network& network, quantize_node const& desc);
 };
 
 using quantize_inst = typed_primitive_inst<quantize>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/reduce_inst.h b/inference-engine/thirdparty/clDNN/src/include/reduce_inst.h
index 678f861a2a8..f822682ef47 100644
--- a/inference-engine/thirdparty/clDNN/src/include/reduce_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/reduce_inst.h
@@ -31,7 +31,7 @@ public:
     static std::string to_string(reduce_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, reduce_node const& desc);
+    typed_primitive_inst(network& network, reduce_node const& desc);
 };
 
 using reduce_inst = typed_primitive_inst<reduce>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/region_yolo_inst.h b/inference-engine/thirdparty/clDNN/src/include/region_yolo_inst.h
index 0b6a24068cc..f98ab0facfa 100644
--- a/inference-engine/thirdparty/clDNN/src/include/region_yolo_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/region_yolo_inst.h
@@ -21,7 +21,7 @@ public:
     static std::string to_string(region_yolo_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, region_yolo_node const& desc);
+    typed_primitive_inst(network& network, region_yolo_node const& desc);
 };
 
 using region_yolo_inst = typed_primitive_inst<region_yolo>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/reorder_inst.h b/inference-engine/thirdparty/clDNN/src/include/reorder_inst.h
index ce851e482ae..85d301e5b13 100644
--- a/inference-engine/thirdparty/clDNN/src/include/reorder_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/reorder_inst.h
@@ -18,7 +18,7 @@ struct typed_program_node<reorder> : public typed_program_node_base<reorder> {
     using parent = typed_program_node_base<reorder>;
 
 public:
-    typed_program_node(const std::shared_ptr<reorder> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<reorder> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
 
@@ -51,7 +51,7 @@ public:
     static std::string to_string(reorder_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, reorder_node const& node);
+    typed_primitive_inst(network& network, reorder_node const& node);
     memory::ptr mean_nv12_memory() const { return dep_memory_ptr(2); }
     memory::ptr mean_memory() const { return dep_memory_ptr(1); }
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/reorg_yolo_inst.h b/inference-engine/thirdparty/clDNN/src/include/reorg_yolo_inst.h
index 43efd38a9b5..09d60784629 100644
--- a/inference-engine/thirdparty/clDNN/src/include/reorg_yolo_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/reorg_yolo_inst.h
@@ -21,7 +21,7 @@ public:
     static std::string to_string(reorg_yolo_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, reorg_yolo_node const& desc);
+    typed_primitive_inst(network& network, reorg_yolo_node const& desc);
 };
 
 using reorg_yolo_inst = typed_primitive_inst<reorg_yolo>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/resample_inst.h b/inference-engine/thirdparty/clDNN/src/include/resample_inst.h
index 6bb8de03b27..c7081b7577f 100644
--- a/inference-engine/thirdparty/clDNN/src/include/resample_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/resample_inst.h
@@ -6,7 +6,6 @@
 #pragma once
 #include "cldnn/primitives/resample.hpp"
 #include "primitive_inst.h"
-#include "topology_impl.h"
 
 #include <memory>
 #include <string>
@@ -15,7 +14,7 @@ namespace cldnn {
 template <>
 struct typed_program_node<resample> : public typed_program_node_base<resample> {
     using parent = typed_program_node_base<resample>;
-    typed_program_node(const std::shared_ptr<resample> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<resample> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
 
@@ -37,7 +36,7 @@ public:
     static std::string to_string(resample_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, resample_node const& node);
+    typed_primitive_inst(network& network, resample_node const& node);
 };
 
 using resample_inst = typed_primitive_inst<resample>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h b/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h
index 32adce55186..4974836facf 100644
--- a/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h
@@ -16,7 +16,7 @@ namespace cldnn {
 template <>
 struct typed_program_node<reshape> : public typed_program_node_base<reshape> {
     using parent = typed_program_node_base<reshape>;
-    typed_program_node(const std::shared_ptr<reshape> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<reshape> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
 
@@ -46,7 +46,7 @@ public:
     static std::string to_string(reshape_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, reshape_node const& node);
+    typed_primitive_inst(network& network, reshape_node const& node);
 
 private:
     void on_execute() override;
diff --git a/inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h b/inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h
index 60ba5b3dd4e..a532814fc34 100644
--- a/inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h
@@ -31,7 +31,7 @@ public:
     static std::string to_string(reverse_sequence_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, reverse_sequence_node const& desc);
+    typed_primitive_inst(network& network, reverse_sequence_node const& desc);
 };
 
 using reverse_sequence_inst = typed_primitive_inst<reverse_sequence>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/scale_inst.h b/inference-engine/thirdparty/clDNN/src/include/scale_inst.h
index a25fb49be99..6d0a581816a 100644
--- a/inference-engine/thirdparty/clDNN/src/include/scale_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/scale_inst.h
@@ -21,7 +21,7 @@ private:
 public:
     using parent::parent;
 
-    typed_program_node(const std::shared_ptr<scale> prim, program_impl& prog) : parent(prim, prog) {
+    typed_program_node(const std::shared_ptr<scale> prim, program& prog) : parent(prim, prog) {
         support_padding_all(true);
     }
     program_node& input() const { return get_dependency(0); }
@@ -46,7 +46,7 @@ public:
     static std::string to_string(scale_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, scale_node const& desc);
+    typed_primitive_inst(network& network, scale_node const& desc);
 
     memory::ptr scale_memory() const { return dep_memory_ptr(1); }
     memory::ptr bias_memory() const { return dep_memory_ptr(2); }
diff --git a/inference-engine/thirdparty/clDNN/src/include/scatter_elements_update_inst.h b/inference-engine/thirdparty/clDNN/src/include/scatter_elements_update_inst.h
index f70bed68bee..00f994cada0 100644
--- a/inference-engine/thirdparty/clDNN/src/include/scatter_elements_update_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/scatter_elements_update_inst.h
@@ -30,7 +30,7 @@ public:
     static std::string to_string(scatter_elements_update_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, scatter_elements_update_node const& desc);
+    typed_primitive_inst(network& network, scatter_elements_update_node const& desc);
 };
 
 using scatter_elements_update_inst = typed_primitive_inst<scatter_elements_update>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/scatter_nd_update_inst.h b/inference-engine/thirdparty/clDNN/src/include/scatter_nd_update_inst.h
index acf3ee9c6c0..725fb42a26f 100644
--- a/inference-engine/thirdparty/clDNN/src/include/scatter_nd_update_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/scatter_nd_update_inst.h
@@ -30,7 +30,7 @@ public:
     static std::string to_string(scatter_nd_update_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, scatter_nd_update_node const& desc);
+    typed_primitive_inst(network& network, scatter_nd_update_node const& desc);
 };
 
 using scatter_nd_update_inst = typed_primitive_inst<scatter_nd_update>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/scatter_update_inst.h b/inference-engine/thirdparty/clDNN/src/include/scatter_update_inst.h
index 1c9b7934f25..b02bfd51568 100644
--- a/inference-engine/thirdparty/clDNN/src/include/scatter_update_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/scatter_update_inst.h
@@ -31,7 +31,7 @@ public:
     static std::string to_string(scatter_update_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, scatter_update_node const& desc);
+    typed_primitive_inst(network& network, scatter_update_node const& desc);
 };
 
 using scatter_update_inst = typed_primitive_inst<scatter_update>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/select_inst.h b/inference-engine/thirdparty/clDNN/src/include/select_inst.h
index 4adde4e8c60..3ed53f20a16 100644
--- a/inference-engine/thirdparty/clDNN/src/include/select_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/select_inst.h
@@ -30,7 +30,7 @@ class typed_primitive_inst<select> : public typed_primitive_inst_base<select> {
 public:
     static layout calc_output_layout(select_node const& node);
     static std::string to_string(select_node const& node);
-    typed_primitive_inst(network_impl& network, select_node const& node);
+    typed_primitive_inst(network& network, select_node const& node);
 };
 
 using select_inst = typed_primitive_inst<select>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h b/inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h
index 278f9c56fab..c1a1a6ecbb3 100644
--- a/inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h
@@ -31,7 +31,7 @@ public:
     static std::string to_string(shuffle_channels_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, shuffle_channels_node const& desc);
+    typed_primitive_inst(network& network, shuffle_channels_node const& desc);
 };
 
 using shuffle_channels_inst = typed_primitive_inst<shuffle_channels>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/softmax_inst.h b/inference-engine/thirdparty/clDNN/src/include/softmax_inst.h
index 8bf45e73924..c663e59672d 100644
--- a/inference-engine/thirdparty/clDNN/src/include/softmax_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/softmax_inst.h
@@ -21,7 +21,7 @@ public:
     static std::string to_string(softmax_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, softmax_node const& desc);
+    typed_primitive_inst(network& network, softmax_node const& desc);
 };
 
 using softmax_inst = typed_primitive_inst<softmax>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/space_to_batch_inst.h b/inference-engine/thirdparty/clDNN/src/include/space_to_batch_inst.h
index db2f69719d9..073be33bafd 100644
--- a/inference-engine/thirdparty/clDNN/src/include/space_to_batch_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/space_to_batch_inst.h
@@ -32,7 +32,7 @@ public:
     static std::string to_string(space_to_batch_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, space_to_batch_node const& desc);
+    typed_primitive_inst(network& network, space_to_batch_node const& desc);
 };
 
 using space_to_batch_inst = typed_primitive_inst<space_to_batch>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/space_to_depth_inst.h b/inference-engine/thirdparty/clDNN/src/include/space_to_depth_inst.h
index 2cebeb7cdf8..8447e99d98b 100644
--- a/inference-engine/thirdparty/clDNN/src/include/space_to_depth_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/space_to_depth_inst.h
@@ -32,7 +32,7 @@ public:
     static std::string to_string(space_to_depth_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, space_to_depth_node const& desc);
+    typed_primitive_inst(network& network, space_to_depth_node const& desc);
 };
 
 using space_to_depth_inst = typed_primitive_inst<space_to_depth>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/split_inst.h b/inference-engine/thirdparty/clDNN/src/include/split_inst.h
index af9a8e402eb..d78959f23b0 100644
--- a/inference-engine/thirdparty/clDNN/src/include/split_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/split_inst.h
@@ -31,7 +31,7 @@ class typed_primitive_inst<split> : public typed_primitive_inst_base<split> {
 public:
     static layout calc_output_layout(split_node const& node);
     static std::string to_string(split_node const& node);
-    typed_primitive_inst(network_impl& network, split_node const& node);
+    typed_primitive_inst(network& network, split_node const& node);
 };
 
 using split_inst = typed_primitive_inst<split>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h b/inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h
index 406d46b44ef..dfa60799b4e 100644
--- a/inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h
@@ -82,7 +82,7 @@ public:
     static std::string to_string(strided_slice_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, strided_slice_node const& desc);
+    typed_primitive_inst(network& network, strided_slice_node const& desc);
 };
 
 using strided_slice_inst = typed_primitive_inst<strided_slice>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/tile_inst.h b/inference-engine/thirdparty/clDNN/src/include/tile_inst.h
index 4569201f79c..6bb942e9435 100644
--- a/inference-engine/thirdparty/clDNN/src/include/tile_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/tile_inst.h
@@ -33,7 +33,7 @@ public:
     static std::string to_string(tile_node const& node);
 
 public:
-    typed_primitive_inst(network_impl& network, tile_node const& desc);
+    typed_primitive_inst(network& network, tile_node const& desc);
 };
 
 using tile_inst = typed_primitive_inst<tile>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/topology_impl.h b/inference-engine/thirdparty/clDNN/src/include/topology_impl.h
deleted file mode 100644
index bb1df4fc4b7..00000000000
--- a/inference-engine/thirdparty/clDNN/src/include/topology_impl.h
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-
-#include "cldnn/primitives/primitive.hpp"
-#include "cldnn/primitives/input_layout.hpp"
-
-#include <map>
-#include <memory>
-#include <vector>
-
-namespace cldnn {
-
-typedef std::map<primitive_id, std::shared_ptr<primitive>> topology_map;
-
-struct topology_impl {
-public:
-    using ptr = std::shared_ptr<topology_impl>;
-    explicit topology_impl(const topology_map& map = topology_map()) : _primitives(map) {}
-
-    void add(std::shared_ptr<primitive> desc) {
-        auto id = desc->id;
-        auto itr = _primitives.find(id);
-        if (itr != _primitives.end()) {
-            if (itr->second != desc)
-                throw std::runtime_error("different primitive with id '" + id + "' exists already");
-
-            // adding the same primitive more than once is not an error
-            return;
-        }
-
-        _primitives.insert({id, desc});
-    }
-
-    const std::shared_ptr<primitive>& at(primitive_id id) const {
-        try {
-            return _primitives.at(id);
-        } catch (...) {
-            throw std::runtime_error("Topology doesn't contain primtive: " + id);
-        }
-    }
-
-    void change_input_layout(const primitive_id& id, const layout& new_layout) {
-        auto& inp_layout = this->at(id);
-        if (inp_layout->type != input_layout::type_id()) {
-            throw std::runtime_error("Primitive: " + id + " is not input_layout.");
-        }
-        auto inp_lay_prim = static_cast<input_layout*>(inp_layout.get());
-        inp_lay_prim->change_layout(new_layout);
-    }
-
-    const topology_map& get_primitives() const { return _primitives; }
-
-    const std::vector<primitive_id> get_primitives_id() const {
-        std::vector<primitive_id> prim_ids;
-        for (const auto& prim : _primitives) prim_ids.push_back(prim.first);
-        return prim_ids;
-    }
-
-private:
-    topology_map _primitives;
-};
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/input_layout.cpp b/inference-engine/thirdparty/clDNN/src/input_layout.cpp
index 6aaefe16e65..ddc4c5470ac 100644
--- a/inference-engine/thirdparty/clDNN/src/input_layout.cpp
+++ b/inference-engine/thirdparty/clDNN/src/input_layout.cpp
@@ -18,12 +18,12 @@ primitive_type_id input_layout::type_id() {
     return &instance;
 }
 
-input_layout_node::typed_program_node(const std::shared_ptr<input_layout> dprim, program_impl& prog)
+input_layout_node::typed_program_node(const std::shared_ptr<input_layout> dprim, program& prog)
     : parent(dprim, prog) {
     can_share_buffer(false);
 }
 
-input_layout_inst::typed_primitive_inst(network_impl& network, input_layout_node const& node) : parent(network, node) {
+input_layout_inst::typed_primitive_inst(network& network, input_layout_node const& node) : parent(network, node) {
     _has_valid_input = false;  // by default input for 'input_layout' is invalid as long as user doesn't call set_data
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
index cdcc65d95fd..77becaa27ba 100644
--- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
+++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
@@ -2,13 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "cldnn/graph/program.hpp"
+
 #include "kernel_selector_helper.h"
 #include "kernel_selector_params.h"
 #include "to_string_utils.h"
-
-
 #include "program_node.h"
-#include "program_impl.h"
 
 #include <string>
 #include <vector>
@@ -764,7 +763,7 @@ void set_params(const program_node& node, kernel_selector::params& params) {
     }
 }
 
-void set_optional_params(const program_impl& program, kernel_selector::optional_params& params) {
+void set_optional_params(const program& program, kernel_selector::optional_params& params) {
     params.meaningfulKernelsNames = false;
     params.allowStaticInputReordering = program.get_options().get<build_option_type::optimize_data>()->enabled() ||
                                         program.get_options().get<build_option_type::allow_static_input_reorder>()->enabled();
diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
index d2a00a2f4cf..998d7bf21ca 100644
--- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
@@ -3,8 +3,6 @@
 //
 
 #include "layout_optimizer.h"
-#include "topology_impl.h"
-#include "network_impl.h"
 #include "primitive_inst.h"
 #include "cldnn/runtime/error_handler.hpp"
 
diff --git a/inference-engine/thirdparty/clDNN/src/loop.cpp b/inference-engine/thirdparty/clDNN/src/loop.cpp
index 66de96f6354..d36f9476bcb 100644
--- a/inference-engine/thirdparty/clDNN/src/loop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/loop.cpp
@@ -83,7 +83,7 @@ layout loop_inst::calc_output_layout(loop_node const & node) {
     // from outputs of loop's dependency and calculate loop output layout
     // from the outputs of body program
     if (!node.get_body_program()) {
-        node.build_body_program();
+        const_cast<loop_node&>(node).build_body_program();
     }
 
     // type checks
@@ -126,7 +126,7 @@ std::string loop_inst::to_string(const loop_node & node) {
     auto node_info = node.desc_to_json();
 
     json_composite loop_info;
-    loop_info.add("body input id", desc->body.get_primitive_ids());
+    loop_info.add("body input id", desc->body.get_primitives_ids());
     loop_info.add("trip_count_id", desc->trip_count_id);
     loop_info.add("initial_execution_id", desc->initial_execution_id);
     loop_info.add("current_iteration_id", desc->current_iteration_id);
@@ -189,6 +189,109 @@ static void validate_mappings(loop_node const & node) {
     }
 }
 
+void loop_inst::update_mapped_memory() {
+    if (!preproc_memories_done) {
+        return;
+    }
+    // update output memory
+    const auto& output_primitive_maps = node.get_output_primitive_maps();
+    for (size_t i = 0; i < output_primitive_maps.size(); ++i) {
+        const auto& output_mapping = output_primitive_maps.at(i);
+        const primitive_id& external_id = output_mapping.external_id;
+        const primitive_id& internal_id = output_mapping.internal_id;
+        memory::ptr to_mem = get_external_memory(external_id);
+        if (output_mapping.axis < 0) {
+            body_network->get_primitive(internal_id)->set_output_memory(to_mem);
+        } else {
+            for (auto& mem_mapping : concatenated_output_mem_mappings) {
+                if (mem_mapping.concat_data_prim->id() == internal_id) {
+                    mem_mapping.concatenated_mem = to_mem;
+                    break;
+                }
+            }
+        }
+    }
+    // update input memory
+    for (size_t memory_num = 0; memory_num < inputs_memory_count(); memory_num++) {
+        const primitive_id& input_external_id = dependencies().at(memory_num)->id();
+        auto input_map_ptrs = node.find_io_primitive_maps(input_external_id, true);
+        if (input_map_ptrs.empty()) {
+            if (input_external_id == node.get_trip_count_id() ||
+                input_external_id == node.get_initial_execution_id()) {
+                continue;
+            }
+        }
+
+        auto memory = input_memory_ptr(memory_num);
+        for (size_t i = 0; i < input_map_ptrs.size(); ++i) {
+            const auto input_map = input_map_ptrs.at(i);
+            bool is_concatenated_input = (input_map->axis >= 0);
+            if (is_concatenated_input) {
+                for (auto& mem_mapping : concatenated_input_mem_mappings) {
+                    if (mem_mapping.sliced_data_prim->id() == input_map->internal_id) {
+                        mem_mapping.concatenated_mem = memory;
+                        break;
+                    }
+                }
+            } else {
+                body_network->set_input_data(input_map->internal_id, memory);
+            }
+        }
+    }
+    //update backedges memory
+    const auto& back_edges = node.get_back_edges();
+    // checking if memory is a destination of a backedge
+    for (const auto& back_edge : back_edges) {
+        //find corresponding input of the backedge
+        const auto input_map_ptrs = node.find_io_primitive_maps(back_edge.to, false);
+        assert(input_map_ptrs.size() == 1);
+        const auto& input_map = input_map_ptrs.front();
+        auto backedged_sliced_output_mems = get_sliced_mem(back_edge.from);
+        const auto backedge_to_prim = body_network->get_primitive(back_edge.to);
+        const auto backedge_from_prim = body_network->get_primitive(back_edge.from);
+        memory::ptr initial_mem = get_external_memory(input_map->external_id);
+
+        for (auto& backedge_mapping : backedge_memory_mappings) {
+            if (backedge_mapping.from_primitive->id() == backedge_from_prim->id() &&
+                backedge_mapping.to_primitive->id() == backedge_to_prim->id()) {
+                if (backedged_sliced_output_mems.empty()) {
+                    // backedge output which does not need concatenation
+                    // input memory = output memory = loop output memory
+                    const auto output_mapping = node.find_io_primitive_maps(back_edge.from, false);
+                    memory::ptr backedge_mem;
+                    if (output_mapping.empty()) {
+                        // from and to primitives in backedge are connected directly
+                        if (backedge_to_prim == backedge_from_prim->dependencies().front()) {
+                            backedge_mapping.initial_mem = initial_mem;
+                            continue;
+                        } else {
+                            // generally, shouldn't go this way, but...
+                            auto output_prim = body_network->get_primitive(back_edge.from);
+                            layout output_layout = output_prim->output_memory().get_layout();
+                            backedge_mem = body_network->get_engine().allocate_memory(output_layout, 0);
+                        }
+                    } else {
+                        backedge_mem = get_external_memory(output_mapping.front()->external_id);
+                    }
+                    body_network->set_input_data(back_edge.to, backedge_mem);
+                    body_network->set_output_memory(back_edge.from, backedge_mem);
+                    backedge_mapping.from_mems = { backedge_mem };
+                    backedge_mapping.initial_mem = initial_mem;
+                } else {
+                    backedge_mapping.from_mems = backedged_sliced_output_mems;
+                    backedge_mapping.initial_mem = initial_mem;
+                }
+                break;
+            }
+        }
+    }
+}
+
+void loop_inst::set_output_memory(memory::ptr mem, bool check) {
+    primitive_inst::set_output_memory(mem, check);
+    update_mapped_memory();
+}
+
 void loop_inst::preprocess_output_memory() {
     auto& engine = _network.get_engine();
     const auto& output_primitive_maps = node.get_output_primitive_maps();
@@ -329,28 +432,28 @@ void loop_inst::preprocess_backedge_memory() {
 }
 
 std::vector<memory::ptr> loop_inst::get_sliced_mem(const primitive_id& internal_id) const {
-        for (const auto& mem_mapping : concatenated_input_mem_mappings) {
-            if (mem_mapping.sliced_data_prim->id() == internal_id) {
-                return mem_mapping.sliced_mems;
-            }
+    for (const auto& mem_mapping : concatenated_input_mem_mappings) {
+        if (mem_mapping.sliced_data_prim->id() == internal_id) {
+            return mem_mapping.sliced_mems;
         }
-        for (const auto& mem_mapping : concatenated_output_mem_mappings) {
-            if (mem_mapping.concat_data_prim->id() == internal_id) {
-                return mem_mapping.sliced_mems;
-            }
-        }
-        return {}; // not found
     }
+    for (const auto& mem_mapping : concatenated_output_mem_mappings) {
+        if (mem_mapping.concat_data_prim->id() == internal_id) {
+            return mem_mapping.sliced_mems;
+        }
+    }
+    return {}; // not found
+}
 
 memory::ptr loop_inst::get_external_memory(const primitive_id& external_id) const {
     const auto outputPrim = _network.get_primitive(external_id);
     return outputPrim->output_memory_ptr();
 }
 
-loop_inst::typed_primitive_inst(network_impl & network, loop_node const & node)
+loop_inst::typed_primitive_inst(network & network, loop_node const & node)
     : parent(network, node),
       preproc_memories_done(false),
-      body_network(network_impl::allocate_network(network.get_stream_ptr(),
+      body_network(network::allocate_network(network.get_stream_ptr(),
                                                   node.get_body_program(),
                                                   false,
                                                   network.is_primary_stream())) {
diff --git a/inference-engine/thirdparty/clDNN/src/lrn.cpp b/inference-engine/thirdparty/clDNN/src/lrn.cpp
index 2f84b0fad13..dd598660616 100644
--- a/inference-engine/thirdparty/clDNN/src/lrn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lrn.cpp
@@ -58,7 +58,7 @@ std::string lrn_inst::to_string(lrn_node const& node) {
     return primitive_description.str();
 }
 
-lrn_inst::typed_primitive_inst(network_impl& network, lrn_node const& desc) : parent(network, desc) {
+lrn_inst::typed_primitive_inst(network& network, lrn_node const& desc) : parent(network, desc) {
     CLDNN_ERROR_LESS_OR_EQUAL_THAN(desc.id(),
                                    "LRN argument size",
                                    argument.size,
diff --git a/inference-engine/thirdparty/clDNN/src/lstm.cpp b/inference-engine/thirdparty/clDNN/src/lstm.cpp
index c04b686e297..a7113fdd3ae 100644
--- a/inference-engine/thirdparty/clDNN/src/lstm.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lstm.cpp
@@ -62,7 +62,7 @@ std::string lstm_inst::to_string(lstm_node const& node) {
     return primitive_description.str();
 }
 
-lstm_inst::typed_primitive_inst(network_impl& network, lstm_node const& node) : parent(network, node) {
+lstm_inst::typed_primitive_inst(network& network, lstm_node const& node) : parent(network, node) {
     auto input_layout = node.input().get_output_layout();
     CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(),
                                   "input format",
diff --git a/inference-engine/thirdparty/clDNN/src/lstm_dynamic.cpp b/inference-engine/thirdparty/clDNN/src/lstm_dynamic.cpp
index 4d26546e995..a6f02c7501f 100644
--- a/inference-engine/thirdparty/clDNN/src/lstm_dynamic.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lstm_dynamic.cpp
@@ -53,7 +53,7 @@ std::string lstm_dynamic_inst::to_string(lstm_dynamic_node const& node) {
     return primitive_description.str();
 }
 
-lstm_dynamic_inst::typed_primitive_inst(network_impl& network, lstm_dynamic_node const& node) : parent(network, node) {
+lstm_dynamic_inst::typed_primitive_inst(network& network, lstm_dynamic_node const& node) : parent(network, node) {
     CLDNN_ERROR_MESSAGE(node.id(),
                         std::string("This primitive_inst should never be created. It should be repalced by ")
                         .append("lstm_dynamic_input + lstm_dyamic_timeloop combinations."));
diff --git a/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp b/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp
index 36b346c2f19..d24a965dda1 100644
--- a/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp
@@ -50,7 +50,7 @@ std::string lstm_dynamic_input_inst::to_string(lstm_dynamic_input_node const& no
     return primitive_description.str();
 }
 
-lstm_dynamic_input_inst::typed_primitive_inst(network_impl& network, lstm_dynamic_input_node const& node)
+lstm_dynamic_input_inst::typed_primitive_inst(network& network, lstm_dynamic_input_node const& node)
     : parent(network, node) {
     // Check input
     auto input_layout = node.input().get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/lstm_dynamic_timeloop.cpp b/inference-engine/thirdparty/clDNN/src/lstm_dynamic_timeloop.cpp
index 44db5f6ff28..7ece007c67d 100644
--- a/inference-engine/thirdparty/clDNN/src/lstm_dynamic_timeloop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lstm_dynamic_timeloop.cpp
@@ -119,7 +119,7 @@ std::string lstm_dynamic_timeloop_inst::to_string(lstm_dynamic_timeloop_node con
     return primitive_description.str();
 }
 
-lstm_dynamic_timeloop_inst::typed_primitive_inst(network_impl& network, lstm_dynamic_timeloop_node const& node)
+lstm_dynamic_timeloop_inst::typed_primitive_inst(network& network, lstm_dynamic_timeloop_node const& node)
     : parent(network, node) {
     auto batch_size = node.get_output_layout().size.batch[0];
     auto direction = node.direction();
diff --git a/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp b/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp
index 3e66d985f6d..494988887bf 100644
--- a/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp
@@ -48,7 +48,7 @@ std::string lstm_elt_inst::to_string(lstm_elt_node const& node) {
     return primitive_description.str();
 }
 
-lstm_elt_inst::typed_primitive_inst(network_impl& network, lstm_elt_node const& node) : parent(network, node) {
+lstm_elt_inst::typed_primitive_inst(network& network, lstm_elt_node const& node) : parent(network, node) {
     auto input_size = node.input().get_output_layout();
     CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(),
                                   "input format",
diff --git a/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp b/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp
index 77f2b624529..87180e6cfd8 100644
--- a/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp
@@ -56,7 +56,7 @@ std::string lstm_gemm_inst::to_string(lstm_gemm_node const& node) {
     return primitive_description.str();
 }
 
-lstm_gemm_inst::typed_primitive_inst(network_impl& network, lstm_gemm_node const& node) : parent(network, node) {
+lstm_gemm_inst::typed_primitive_inst(network& network, lstm_gemm_node const& node) : parent(network, node) {
     auto input_layout = node.input().get_output_layout();
     CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(),
                                   "input format",
diff --git a/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp b/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp
index b43b72ca084..dfac193c4c3 100644
--- a/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp
@@ -16,7 +16,7 @@ primitive_type_id max_unpooling::type_id() {
     return &instance;
 }
 
-max_unpooling_node::typed_program_node(const std::shared_ptr<max_unpooling> prim, program_impl& prog)
+max_unpooling_node::typed_program_node(const std::shared_ptr<max_unpooling> prim, program& prog)
     : parent(prim, prog) {
     can_share_buffer(false);  // for max_unpooling initial zero values are significant
 }
@@ -142,7 +142,7 @@ std::string max_unpooling_inst::to_string(max_unpooling_node const& node) {
     return primitive_description.str();
 }
 
-max_unpooling_inst::typed_primitive_inst(network_impl& network, max_unpooling_node const& node)
+max_unpooling_inst::typed_primitive_inst(network& network, max_unpooling_node const& node)
     : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/mutable_data.cpp b/inference-engine/thirdparty/clDNN/src/mutable_data.cpp
index 7ba0640d9c1..8bb6df36758 100644
--- a/inference-engine/thirdparty/clDNN/src/mutable_data.cpp
+++ b/inference-engine/thirdparty/clDNN/src/mutable_data.cpp
@@ -20,7 +20,7 @@ primitive_type_id mutable_data::type_id() {
 }
 
 namespace {
-memory::ptr attach_or_copy_data(network_impl& network, memory::ptr mem, bool reuse) {
+memory::ptr attach_or_copy_data(network& network, memory::ptr mem, bool reuse) {
     auto& engine = network.get_engine();
     auto& stream = network.get_stream();
 
@@ -37,7 +37,7 @@ memory::ptr attach_or_copy_data(network_impl& network, memory::ptr mem, bool reu
 }
 }  // namespace
 
-mutable_data_node::typed_program_node(const std::shared_ptr<mutable_data> dprim, program_impl& prog)
+mutable_data_node::typed_program_node(const std::shared_ptr<mutable_data> dprim, program& prog)
     : parent(dprim, prog), mem(dprim->mem) {
     recalc_output_layout(false);
     can_share_buffer(false);
@@ -57,7 +57,26 @@ std::string mutable_data_inst::to_string(mutable_data_node const& node) {
     return primitive_description.str();
 }
 
-mutable_data_inst::typed_primitive_inst(network_impl& network, mutable_data_node const& node)
+void mutable_data_inst::set_output_memory(memory::ptr mem_new, bool check) {
+    auto& eng = _network.get_engine();
+    auto& mem_node = const_cast<program_node&>(_node).as<mutable_data>();
+    auto& mem_attached = mem_node.get_attached_memory();
+    const auto& mem_orig = *_output;
+
+    if (!eng.is_the_same_buffer(*mem_new, mem_attached)) {
+        if (_node.is_input()) {
+            mem_new->copy_from(_network.get_stream(), *_output);
+        }
+
+        // re-attach mutable_data internal memory if necessary
+        if (eng.is_the_same_buffer(mem_orig, mem_attached)) {
+            mem_node.attach_memory(eng.reinterpret_buffer(*mem_new, mem_attached.get_layout()));
+        }
+    }
+    primitive_inst::set_output_memory(mem_new, check);
+}
+
+mutable_data_inst::typed_primitive_inst(network& network, mutable_data_node const& node)
     : parent(network, node, attach_or_copy_data(network, node.get_attached_memory_ptr(), network.is_primary_stream())) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/mvn.cpp b/inference-engine/thirdparty/clDNN/src/mvn.cpp
index bf637d1a911..93c3eca7992 100644
--- a/inference-engine/thirdparty/clDNN/src/mvn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/mvn.cpp
@@ -50,5 +50,5 @@ std::string mvn_inst::to_string(mvn_node const& node) {
     return primitive_description.str();
 }
 
-mvn_inst::typed_primitive_inst(network_impl& network, mvn_node const& node) : parent(network, node) {}
+mvn_inst::typed_primitive_inst(network& network, mvn_node const& node) : parent(network, node) {}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/network.cpp b/inference-engine/thirdparty/clDNN/src/network.cpp
index dbba7bfcddd..1aef6da691c 100644
--- a/inference-engine/thirdparty/clDNN/src/network.cpp
+++ b/inference-engine/thirdparty/clDNN/src/network.cpp
@@ -15,19 +15,22 @@
 #include "cldnn/runtime/stream.hpp"
 #include "cldnn/runtime/debug_configuration.hpp"
 
-#include "network_impl.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
+#include "cldnn/graph/network.hpp"
+
 #include "to_string_utils.h"
 #include "primitive_inst.h"
 #include "input_layout_inst.h"
 #include "mutable_data_inst.h"
 #include "condition_inst.h"
+#include "loop_inst.h"
 #include "kernel_selector_helper.h"
 #include "runtime/cldnn_itt.hpp"
 
 #include <algorithm>
 #include <string>
 #include <vector>
+#include <stack>
 #include <memory>
 #include <set>
 #include <utility>
@@ -40,91 +43,6 @@
 
 namespace cldnn {
 
-network::network(program const& program, uint16_t stream_id)
-    : _impl(network_impl::allocate_network(program.get()->get_engine(), program.get(), false, stream_id == 0)) {}
-
-engine& network::get_engine() const {
-    return _impl->get_engine();
-}
-
-program network::get_program() const {
-    auto impl = std::const_pointer_cast<program_impl>(_impl->get_program());
-    return program(impl);
-}
-
-void network::set_input_data(const primitive_id& id, memory::ptr mem) const {
-    _impl->set_input_data(id, mem);
-}
-
-void network::set_output_memory(const primitive_id& id, memory::ptr mem) const {
-    _impl->set_output_memory(id, mem);
-}
-
-stream& network::get_stream() const {
-    return _impl->get_stream();
-}
-
-stream::ptr network::get_stream_ptr() const {
-    return _impl->get_stream_ptr();
-}
-
-uint32_t network::get_id() {
-    return _impl->get_id();
-}
-
-std::string network::get_primitive_info(const primitive_id& id) const {
-    return _impl->get_primitive_info(id);
-}
-
-std::vector<primitive_info> network::get_primitives_info() {
-    return _impl->get_primitives_info();
-}
-
-std::vector<std::pair<std::string, std::vector<primitive_info>>> network::get_optimization_steps_info() {
-    return _impl->get_optimizer_passes_info();
-}
-
-std::vector<primitive_id> network::get_executed_primitive_ids() const {
-    return _impl->get_executed_primitive_ids();
-}
-
-std::vector<primitive_id> network::get_all_primitive_ids() const {
-    return _impl->get_all_primitive_ids();
-}
-
-std::vector<primitive_id> network::get_all_primitive_org_ids() const {
-    return _impl->get_all_primitive_org_ids();
-}
-
-std::vector<primitive_id> network::get_input_ids() const {
-    return _impl->get_input_ids();
-}
-
-std::vector<primitive_id> network::get_output_ids() const {
-    return _impl->get_output_ids();
-}
-
-memory::ptr network::get_output_memory(const primitive_id& output_id) const {
-    return _impl->get_primitive(output_id)->output_memory_ptr();
-}
-
-event::ptr network::get_primitive_event(const primitive_id& output_id) const {
-    return _impl->get_primitive_event(output_id);
-}
-
-std::map<primitive_id, network_output> network::execute(const std::vector<event::ptr>& dependencies) const {
-    std::vector<event::ptr> dep_impls(dependencies.size());
-
-    _impl->execute(dependencies);
-
-    auto output_ids = get_output_ids();
-    std::map<primitive_id, network_output> result;
-    for (auto& id : output_ids) {
-        result.emplace(id, get_output(id));
-    }
-    return result;
-}
-
 #ifdef GPU_DEBUG_CONFIG
 static float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false) {
 #if defined HALF_HALF_HPP
@@ -280,10 +198,10 @@ static void log_memory_to_file(memory::ptr mem, stream& stream, std::string laye
 }
 #endif
 /*
-Network_impl will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants
+Network will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants
 opt pass).
 */
-network_impl::network_impl(program_impl::ptr program, stream::ptr stream, bool is_internal, bool is_primary_stream)
+network::network(program::ptr program, stream::ptr stream, bool is_internal, bool is_primary_stream)
     : _program(program)
     , _stream(stream)
     , _memory_pool(new memory_pool(program->get_engine()))
@@ -300,55 +218,59 @@ network_impl::network_impl(program_impl::ptr program, stream::ptr stream, bool i
     build_insts_deps();
     build_exec_order();
     validate_primitives();
+    add_default_output_chains();
 }
 
-network_impl::~network_impl() {
+network::network(engine& engine,
+                 const topology& topo,
+                 const build_options& options,
+                 bool is_internal)
+    : network(program::build_program(engine, topo, options, is_internal), engine.create_stream(), is_internal) {}
+
+network::network(engine& engine,
+                 const std::set<std::shared_ptr<program_node>>& nodes,
+                 const build_options& options,
+                 bool is_internal)
+    : network(program::build_program(engine, nodes, options, is_internal), engine.create_stream(), is_internal) {}
+
+network::network(program::ptr program, uint16_t stream_id)
+    : network(program, program->get_engine().create_stream(), false, stream_id ==0) {}
+
+network::~network() {
     _memory_pool->clear_pool_for_network(net_id);
 }
 
-network_impl::ptr network_impl::allocate_network(stream::ptr stream, program_impl::ptr program, bool is_internal, bool is_primary_stream) {
-    return std::make_shared<network_impl>(program, stream, is_internal, is_primary_stream);
+network::ptr network::allocate_network(stream::ptr stream, program::ptr program, bool is_internal, bool is_primary_stream) {
+    return std::make_shared<network>(program, stream, is_internal, is_primary_stream);
 }
 
-network_impl::ptr network_impl::allocate_network(engine& engine, program_impl::ptr program, bool is_internal, bool is_primary_stream) {
+network::ptr network::allocate_network(engine& engine, program::ptr program, bool is_internal, bool is_primary_stream) {
     auto stream = engine.create_stream();
-    return std::make_shared<network_impl>(program, stream, is_internal, is_primary_stream);
+    return std::make_shared<network>(program, stream, is_internal, is_primary_stream);
 }
 
-network_impl::ptr network_impl::build_network(engine& engine,
-                                              const topology_impl& topology,
+network::ptr network::build_network(engine& engine,
+                                              const topology& topology,
                                               const build_options& options,
                                               bool is_internal) {
-    return std::make_shared<network_impl>(engine, topology, options, is_internal);
+    return std::make_shared<network>(engine, topology, options, is_internal);
 }
 
-network_impl::ptr network_impl::build_network(engine& engine,
+network::ptr network::build_network(engine& engine,
                                               const std::set<std::shared_ptr<program_node>>& nodes,
                                               const build_options& options,
                                               bool is_internal) {
-    return std::make_shared<network_impl>(engine, nodes, options, is_internal);
+    return std::make_shared<network>(engine, nodes, options, is_internal);
 }
 
-network_impl::network_impl(engine& engine,
-                           const topology_impl& topo,
-                           const build_options& options,
-                           bool is_internal)
-    : network_impl(program_impl::build_program(engine, topo, options, is_internal), engine.create_stream(), is_internal) {}
-
-network_impl::network_impl(engine& engine,
-                           const std::set<std::shared_ptr<program_node>>& nodes,
-                           const build_options& options,
-                           bool is_internal)
-    : network_impl(program_impl::build_program(engine, nodes, options, is_internal), engine.create_stream(), is_internal) {}
-
-void network_impl::validate_primitives() {
+void network::validate_primitives() {
     for (auto const& prim : _exec_order) {
         bool valid = prim->validate();
         CLDNN_ERROR_NOT_EQUAL(prim->id(), "validate", valid, "", true, "has not a valid instance.");
     }
 }
 
-void network_impl::set_arguments() {
+void network::set_arguments() {
     if (!_reset_arguments)
         return;
 
@@ -358,7 +280,7 @@ void network_impl::set_arguments() {
     _reset_arguments = false;
 }
 
-void network_impl::reset_execution(bool wait) {
+void network::reset_execution(bool wait) {
     if (wait && _events.size() > 0) {
         std::vector<event::ptr> events;
         for (auto& pair : _events) {
@@ -374,7 +296,7 @@ void network_impl::reset_execution(bool wait) {
     _events.clear();
 }
 
-void network_impl::set_input_data(const primitive_id& id, memory::ptr data) {
+void network::set_input_data(const primitive_id& id, memory::ptr data) {
     std::shared_ptr<primitive_inst> primitive_inst;
 
     primitive_inst = find_primitive(id);
@@ -393,33 +315,120 @@ void network_impl::set_input_data(const primitive_id& id, memory::ptr data) {
     input->set_data(data);
 }
 
-void network_impl::set_output_memory(const primitive_id& id, memory::ptr mem) {
-    std::shared_ptr<primitive_inst> primitive_inst;
-
-    primitive_inst = find_primitive(id);
-
-    if (primitive_inst == nullptr)
-        throw std::runtime_error("topology doesn't contain primitive: " + id);
-
-    auto iter = std::find(_outputs.begin(), _outputs.end(), primitive_inst);
-    if (iter == _outputs.end())
-        throw std::runtime_error("primitive: " + id + " is not a network output");
-
-    auto output = std::static_pointer_cast<input_layout_inst>(primitive_inst);
-
-    // Wait for previous execution completion
-    reset_execution(true);
-    output->set_output_memory(mem);
-}
-
-void cldnn::network_impl::check_names() {
-    for (auto const& prim : _primitives) {
-        if (find_in_internal_networks(prim.first) != nullptr)
-            CLDNN_ERROR_MESSAGE("Network_impl", "Found primitive with id: " + prim.first + "in anotother network.");
+void network::add_default_output_chains() {
+    for (auto& output : _outputs) {
+        add_output_chain(output);
     }
 }
 
-std::shared_ptr<primitive_inst> cldnn::network_impl::find_primitive(const primitive_id& id) {
+network::output_chains_map::iterator network::add_output_chain(std::shared_ptr<primitive_inst>& p_inst) {
+    std::vector<std::shared_ptr<primitive_inst>> chain;
+    std::stack<std::shared_ptr<const primitive_inst>> candidates;
+    auto& eng = get_engine();
+    const auto& mem_orig = p_inst->output_memory();
+
+    auto add_mdata_chain = [&](std::shared_ptr<primitive_inst>& p_inst) {
+        auto mdata_ptr = std::dynamic_pointer_cast<mutable_data_inst>(p_inst);
+        if (!mdata_ptr)
+            return;
+        // special handling for mutable data, which can share
+        // its attached memory with both its inputs and outputs
+        for (auto& dep : p_inst->dependencies()) {
+            // check dependencies
+            if (eng.is_the_same_buffer(mem_orig, dep->output_memory())) {
+                chain.push_back(std::const_pointer_cast<primitive_inst>(dep));
+            }
+            // then second order dependencies
+            for (auto& second_dep : dep->dependencies()) {
+                if (eng.is_the_same_buffer(mem_orig, second_dep->output_memory())) {
+                    chain.push_back(std::const_pointer_cast<primitive_inst>(second_dep));
+                }
+            }
+        }
+
+        //then users
+        const auto& users = p_inst->get_users();
+        for (const auto& usr : users) {
+            auto usr_prim = get_primitive(usr->id());
+            if (eng.is_the_same_buffer(mem_orig, usr_prim->output_memory())) {
+                chain.push_back(usr_prim);
+            }
+        }
+    };
+
+    if (p_inst->can_be_optimized()) {
+        candidates.push(p_inst);
+    } else {
+        chain.push_back(p_inst);
+    }
+    add_mdata_chain(p_inst);
+
+    // find all dependencies that are 'optimized'
+    while (!candidates.empty()) {
+        auto& cand = candidates.top();
+        candidates.pop();
+        const auto& mem_cand = cand->output_memory();
+        if (eng.is_the_same_buffer(mem_orig, mem_cand)) {
+            auto nc_cand = std::const_pointer_cast<primitive_inst>(cand);
+            chain.push_back(nc_cand);
+            add_mdata_chain(nc_cand);
+        }
+
+        for (auto& dep : cand->dependencies()) {
+            if (dep->can_be_optimized()) {
+                candidates.push(dep);
+            } else {
+                const auto& mem_dep = dep->output_memory();
+                if (eng.is_the_same_buffer(mem_orig, mem_dep)) {
+                    auto nc_dep = std::const_pointer_cast<primitive_inst>(dep);
+                    chain.push_back(nc_dep);
+                    add_mdata_chain(nc_dep);
+                }
+            }
+        }
+    }
+
+    std::sort(chain.begin(), chain.end());
+    chain.erase(std::unique(chain.begin(), chain.end()), chain.end());
+    return _output_chains.insert({ p_inst->id(), chain }).first;
+}
+
+void network::set_output_memory(const primitive_id& id, memory::ptr mem_new) {
+    std::shared_ptr<primitive_inst> p_inst;
+
+    p_inst = find_primitive(id);
+
+    if (!p_inst)
+        throw std::runtime_error("topology doesn't contain primitive: " + id);
+
+    auto iter = std::find(_outputs.begin(), _outputs.end(), p_inst);
+    if (iter == _outputs.end())
+        throw std::runtime_error("primitive: " + id + " is not a network output");
+
+    // Wait for previous execution completion
+    reset_execution(true);
+
+    auto& eng = get_engine();
+    // locate primitive chain for this output
+    // if no chain found - add it
+    auto o_iter = _output_chains.find(id);
+    if (o_iter == _output_chains.end()) {
+        o_iter = add_output_chain(p_inst);
+    }
+
+    for (auto& prim : o_iter->second) {
+        prim->set_output_memory(eng.reinterpret_buffer(*mem_new, prim->output_memory().get_layout()), false);
+    }
+}
+
+void cldnn::network::check_names() {
+    for (auto const& prim : _primitives) {
+        if (find_in_internal_networks(prim.first) != nullptr)
+            CLDNN_ERROR_MESSAGE("Network", "Found primitive with id: " + prim.first + "in anotother network.");
+    }
+}
+
+std::shared_ptr<primitive_inst> cldnn::network::find_primitive(const primitive_id& id) {
     std::shared_ptr<primitive_inst> ret;
 
     if (_primitives.find(id) != _primitives.end())
@@ -428,7 +437,7 @@ std::shared_ptr<primitive_inst> cldnn::network_impl::find_primitive(const primit
     return find_in_internal_networks(id);
 }
 
-std::shared_ptr<primitive_inst> cldnn::network_impl::find_in_internal_networks(const primitive_id& id) {
+std::shared_ptr<primitive_inst> cldnn::network::find_in_internal_networks(const primitive_id& id) {
     std::shared_ptr<primitive_inst> ret;
 
     for (auto const& prim : _primitives) {
@@ -445,16 +454,16 @@ std::shared_ptr<primitive_inst> cldnn::network_impl::find_in_internal_networks(c
     return nullptr;
 }
 
-void network_impl::set_learning_rate(const float lr) { _learning_rate = lr; }
-
-float network_impl::get_learning_rate() { return _learning_rate; }
-
-std::string network_impl::get_primitive_info(const primitive_id& id) const {
+std::string network::get_primitive_info(const primitive_id& id) const {
     const auto& node = _program->get_node(id);
     return node.type()->to_string(node);
 }
 
-void network_impl::allocate_primitives() {
+memory::ptr network::get_output_memory(const primitive_id& output_id) {
+    return get_primitive(output_id)->output_memory_ptr();
+}
+
+void network::allocate_primitives() {
     std::vector<std::shared_ptr<program_node>> nodes_to_allocate{};
     for (auto node : _program->get_processing_order()) {
         nodes_to_allocate.push_back(_program->get_node_ptr(node->id()));
@@ -470,25 +479,36 @@ void network_impl::allocate_primitives() {
     }
 }
 
-void network_impl::build_insts_deps() {
+void network::build_insts_deps() {
     for (auto& inst : _primitives) {
         inst.second->build_deps();
     }
 }
 
-void network_impl::build_exec_order() {
+void network::build_exec_order() {
     for (auto& node : _program->get_processing_order()) {
         if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
             add_to_exec_order(node->id());
         }
     }
 }
-void network_impl::add_to_exec_order(const primitive_id& id) {
+void network::add_to_exec_order(const primitive_id& id) {
     auto inst = get_primitive(id);
     _exec_order.push_back(inst);
 }
 
-void network_impl::execute(const std::vector<event::ptr>& events) {
+std::map<primitive_id, network_output> network::execute(const std::vector<event::ptr>& dependencies) {
+    execute_impl(dependencies);
+
+    auto output_ids = get_output_ids();
+    std::map<primitive_id, network_output> result;
+    for (auto& id : output_ids) {
+        result.emplace(id, get_output(id));
+    }
+    return result;
+}
+
+void network::execute_impl(const std::vector<event::ptr>& events) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "NetworkImpl::Execute");
     // Wait for previous execution completion
     reset_execution(false);
@@ -591,21 +611,21 @@ void network_impl::execute(const std::vector<event::ptr>& events) {
     get_stream().flush();
 }
 
-std::vector<primitive_id> network_impl::get_input_ids() const {
+std::vector<primitive_id> network::get_input_ids() const {
     std::vector<primitive_id> ret;
     ret.reserve(_inputs.size());
     for (auto const& input : _inputs) ret.push_back(input->id());
     return ret;
 }
 
-std::vector<primitive_id> network_impl::get_output_ids() const {
+std::vector<primitive_id> network::get_output_ids() const {
     std::vector<primitive_id> ret;
     ret.reserve(_outputs.size());
     for (auto const& output : _outputs) ret.push_back(output->id());
     return ret;
 }
 
-std::vector<primitive_id> network_impl::get_executed_primitive_ids() const {
+std::vector<primitive_id> network::get_executed_primitive_ids() const {
     std::vector<primitive_id> ret;
     ret.reserve(_exec_order.size());
     for (auto const& executed_primitive : _exec_order) {
@@ -614,7 +634,7 @@ std::vector<primitive_id> network_impl::get_executed_primitive_ids() const {
     return ret;
 }
 
-std::vector<primitive_id> network_impl::get_all_primitive_ids() const {
+std::vector<primitive_id> network::get_all_primitive_ids() const {
     std::vector<primitive_id> ret;
     ret.reserve(_primitives.size());
     for (auto const& primitive : _primitives)
@@ -625,29 +645,29 @@ std::vector<primitive_id> network_impl::get_all_primitive_ids() const {
     return ret;
 }
 
-std::vector<primitive_id> network_impl::get_all_primitive_org_ids() const {
+std::vector<primitive_id> network::get_all_primitive_org_ids() const {
     std::vector<primitive_id> ret;
     ret.reserve(_primitives.size());
     for (auto const& primitive : _primitives) ret.push_back(primitive.second->org_id());
     return ret;
 }
 
-const program_impl::primitives_info& network_impl::get_primitives_info() const {
+const program::primitives_info& network::get_primitives_info() const {
     return _program->get_primitives_info();
 }
 
-const program_impl::graph_optimizer_info& network_impl::get_optimizer_passes_info() const {
+const program::graph_optimizer_info& network::get_optimizer_passes_info() const {
     return _program->get_optimizer_passes_info();
 }
 
-std::shared_ptr<primitive_inst> network_impl::get_primitive(const primitive_id& id) {
+std::shared_ptr<primitive_inst> network::get_primitive(const primitive_id& id) {
     if (!_primitives.count(id))
         allocate_primitive_instance(_program->get_node(id));
 
     return _primitives.at(id);
 }
 
-std::vector<std::shared_ptr<primitive_inst>> network_impl::get_primitives(const std::vector<primitive_id>& ids) {
+std::vector<std::shared_ptr<primitive_inst>> network::get_primitives(const std::vector<primitive_id>& ids) {
     std::vector<std::shared_ptr<primitive_inst>> result(ids.size());
     std::transform(std::begin(ids), std::end(ids), std::begin(result), [&](const primitive_id& id) {
         return get_primitive(id);
@@ -655,7 +675,7 @@ std::vector<std::shared_ptr<primitive_inst>> network_impl::get_primitives(const
     return result;
 }
 
-std::vector<std::shared_ptr<primitive_inst>> network_impl::get_primitives(const std::vector<program_node*>& nodes) {
+std::vector<std::shared_ptr<primitive_inst>> network::get_primitives(const std::vector<program_node*>& nodes) {
     std::vector<std::shared_ptr<primitive_inst>> result(nodes.size());
     std::transform(std::begin(nodes), std::end(nodes), std::begin(result), [&](const program_node* node) {
         return get_primitive(node->id());
@@ -663,7 +683,7 @@ std::vector<std::shared_ptr<primitive_inst>> network_impl::get_primitives(const
     return result;
 }
 
-void network_impl::execute_primitive(const std::shared_ptr<primitive_inst>& primitive,
+void network::execute_primitive(const std::shared_ptr<primitive_inst>& primitive,
                                      const std::vector<event::ptr>& events) {
     auto id = primitive->id();
     auto it = _events.find(id);
@@ -677,7 +697,7 @@ void network_impl::execute_primitive(const std::shared_ptr<primitive_inst>& prim
     _events.insert({id, ev});
 }
 
-void network_impl::allocate_primitive_instance(program_node const& node) {
+void network::allocate_primitive_instance(program_node const& node) {
     if (_primitives.count(node.id()))
         return;
 
@@ -701,7 +721,7 @@ void network_impl::allocate_primitive_instance(program_node const& node) {
         transfer_memory_to_device(inst, node);
 }
 
-void network_impl::transfer_memory_to_device(std::shared_ptr<primitive_inst> instance, program_node const& node) {
+void network::transfer_memory_to_device(std::shared_ptr<primitive_inst> instance, program_node const& node) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "NetworkImpl::TransferMemory");
     auto& inst_mem = instance->output_memory();
     auto alloc_type = inst_mem.get_allocation_type();
@@ -723,7 +743,7 @@ void network_impl::transfer_memory_to_device(std::shared_ptr<primitive_inst> ins
     }
 }
 
-memory::ptr network_impl::get_memory_from_pool(const layout& layout,
+memory::ptr network::get_memory_from_pool(const layout& layout,
                                                primitive_id id,
                                                std::set<primitive_id> dependencies,
                                                allocation_type type,
diff --git a/inference-engine/thirdparty/clDNN/src/nodes_ordering.cpp b/inference-engine/thirdparty/clDNN/src/nodes_ordering.cpp
index 54fb3046ddb..160edccfef2 100644
--- a/inference-engine/thirdparty/clDNN/src/nodes_ordering.cpp
+++ b/inference-engine/thirdparty/clDNN/src/nodes_ordering.cpp
@@ -4,7 +4,7 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "program_node.h"
 #include "cldnn/runtime/error_handler.hpp"
 #include <vector>
@@ -13,7 +13,7 @@
 
 namespace cldnn {
 // helper method for calc_processing order
-void program_impl::nodes_ordering::calc_processing_order_visit(program_node* node) {
+void program::nodes_ordering::calc_processing_order_visit(program_node* node) {
     if (node->is_marked())
         return;
     for (auto user : node->users) {
@@ -27,7 +27,7 @@ void program_impl::nodes_ordering::calc_processing_order_visit(program_node* nod
 
 // DFS to sort nodes topologically
 // any topological sort of nodes is required for further optimizations
-void program_impl::nodes_ordering::calc_processing_order(program_impl& p) {
+void program::nodes_ordering::calc_processing_order(program& p) {
     _processing_order.clear();
     for (auto input : p.get_inputs()) {
         calc_processing_order_visit(input);
@@ -45,7 +45,7 @@ void program_impl::nodes_ordering::calc_processing_order(program_impl& p) {
     input: any topological order in processing order
     output: BFS topological order.
     */
-void program_impl::nodes_ordering::calculate_BFS_processing_order() {
+void program::nodes_ordering::calculate_BFS_processing_order() {
     std::map<program_node*, int> distances;
     for (auto itr : _processing_order) {
         distances[itr] = -1;
@@ -83,7 +83,7 @@ void program_impl::nodes_ordering::calculate_BFS_processing_order() {
 }
 
 // verifies if a given node will be processed before all its dependent nodes
-bool program_impl::nodes_ordering::is_correct(program_node* node) {
+bool program::nodes_ordering::is_correct(program_node* node) {
     for (auto& dep : node->get_dependencies()) {
         if (get_processing_number(node) < get_processing_number(dep)) {
             return false;
diff --git a/inference-engine/thirdparty/clDNN/src/normalize.cpp b/inference-engine/thirdparty/clDNN/src/normalize.cpp
index d342e958b1e..55fe2cac122 100644
--- a/inference-engine/thirdparty/clDNN/src/normalize.cpp
+++ b/inference-engine/thirdparty/clDNN/src/normalize.cpp
@@ -51,7 +51,7 @@ std::string normalize_inst::to_string(normalize_node const& node) {
     return primitive_description.str();
 }
 
-normalize_inst::typed_primitive_inst(network_impl& network, normalize_node const& node) : parent(network, node) {
+normalize_inst::typed_primitive_inst(network& network, normalize_node const& node) : parent(network, node) {
     /// Scale f dimension should be 1 (if all channels have the same scale) or equal to input feature size (one scale per channel).
     auto scale_layout = node.scale().get_output_layout();
     auto scale_size = scale_layout.size;
diff --git a/inference-engine/thirdparty/clDNN/src/one_hot.cpp b/inference-engine/thirdparty/clDNN/src/one_hot.cpp
index d3a02ec58c4..93820d3c4dc 100644
--- a/inference-engine/thirdparty/clDNN/src/one_hot.cpp
+++ b/inference-engine/thirdparty/clDNN/src/one_hot.cpp
@@ -65,7 +65,7 @@ std::string one_hot_inst::to_string(one_hot_node const& node) {
     return primitive_description.str();
 }
 
-one_hot_inst::typed_primitive_inst(network_impl& network, one_hot_node const& node) : parent(network, node) {
+one_hot_inst::typed_primitive_inst(network& network, one_hot_node const& node) : parent(network, node) {
     auto input_layout = node.input().get_output_layout();
 
     const auto& input_sizes = input_layout.size;
diff --git a/inference-engine/thirdparty/clDNN/src/pass_manager.cpp b/inference-engine/thirdparty/clDNN/src/pass_manager.cpp
index 5b5b546ff3b..2a1af23a4c0 100644
--- a/inference-engine/thirdparty/clDNN/src/pass_manager.cpp
+++ b/inference-engine/thirdparty/clDNN/src/pass_manager.cpp
@@ -4,7 +4,7 @@
 
 #include "pass_manager.h"
 #include "program_dump_graph.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 
 #include <chrono>
 #include <ctime>
@@ -13,7 +13,7 @@
 #include <iostream>
 #include <string>
 
-pass_manager::pass_manager(program_impl& p) {
+pass_manager::pass_manager(program& p) {
     pass_count = 0;
     auto path = get_dir_path(p.get_options());
     if (!path.empty()) {
@@ -44,7 +44,7 @@ pass_manager::pass_manager(program_impl& p) {
     }
 }
 
-void pass_manager::run(program_impl& p, base_pass& pass) {
+void pass_manager::run(program& p, base_pass& pass) {
     using ms = std::chrono::duration<double, std::ratio<1, 1000>>;
     using Time = std::chrono::high_resolution_clock;
 
diff --git a/inference-engine/thirdparty/clDNN/src/permute.cpp b/inference-engine/thirdparty/clDNN/src/permute.cpp
index 3d6671f923a..7909c426077 100644
--- a/inference-engine/thirdparty/clDNN/src/permute.cpp
+++ b/inference-engine/thirdparty/clDNN/src/permute.cpp
@@ -64,7 +64,7 @@ std::string permute_inst::to_string(permute_node const& node) {
     return primitive_description.str();
 }
 
-permute_inst::typed_primitive_inst(network_impl& network, permute_node const& node) : parent(network, node) {
+permute_inst::typed_primitive_inst(network& network, permute_node const& node) : parent(network, node) {
     auto permute_order = argument.permute_order;
 
     CLDNN_ERROR_LESS_THAN(node.id(),
diff --git a/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp b/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp
index cef109cd1d7..b6c65a5b739 100644
--- a/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp
+++ b/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp
@@ -10,13 +10,14 @@
 #include "arg_max_min_inst.h"
 #include "fused_conv_eltwise_inst.h"
 
-#include "network_impl.h"
+#include "cldnn/graph/network.hpp"
 #include "cldnn/runtime/engine.hpp"
 #include "cldnn/runtime/memory.hpp"
 
 #include "cldnn/runtime/error_handler.hpp"
 #include "json_object.h"
 #include <string>
+#include <stack>
 #include <vector>
 #include <memory>
 #include <algorithm>
@@ -51,6 +52,8 @@ void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout
             if (layout.format.is_image_2d())
                 CLDNN_ERROR_MESSAGE(_node.id(), "Attempt to set user-supplied input or output buffer instead of an image");
             break;
+        case shared_mem_type::shared_mem_usm:
+            break;
         default:
             CLDNN_ERROR_MESSAGE(_node.id(), "Attempt to set user-supplied input or output memory of unknown/invalid type");
             break;
@@ -58,12 +61,23 @@ void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout
     }
 }
 
-void primitive_inst::set_output_memory(memory::ptr mem) {
+void primitive_inst::set_output_memory(memory::ptr mem_new, bool check) {
+    auto& eng = _network.get_engine();
+    // skip all the buzz if no action actually required
+    if (eng.is_the_same_buffer(*mem_new, *_output)) {
+        return;
+    }
+
     auto ol = _node.get_output_layout();
 
-    check_memory_to_set(*mem, ol);
+    if (check)
+        check_memory_to_set(*mem_new, ol);
 
-    _output = mem;
+    if (_node.is_constant()) {
+        mem_new->copy_from(_network.get_stream(), *_output);
+    } else {
+        _output = mem_new;
+    }
 }
 
 event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
@@ -112,7 +126,7 @@ void primitive_inst::build_deps() {
     }
 }
 
-primitive_inst::primitive_inst(network_impl& network, program_node const& node, bool allocate_memory)
+primitive_inst::primitive_inst(network& network, program_node const& node, bool allocate_memory)
     : _network(network), _node(node), _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr), _output(), _output_changed(false) {
     if (allocate_memory) {
         // In case when output is mutable_data primitive, and other users dependencies are only used for
diff --git a/inference-engine/thirdparty/clDNN/src/prior_box.cpp b/inference-engine/thirdparty/clDNN/src/prior_box.cpp
index 8d536ac6561..59bd9e8d5f7 100644
--- a/inference-engine/thirdparty/clDNN/src/prior_box.cpp
+++ b/inference-engine/thirdparty/clDNN/src/prior_box.cpp
@@ -219,7 +219,7 @@ void calculate_prior_box_output(memory::ptr output_mem, stream& stream, layout c
 }
 }  // namespace
 
-prior_box_node::typed_program_node(std::shared_ptr<prior_box> prim, program_impl& prog) : parent(prim, prog) {
+prior_box_node::typed_program_node(std::shared_ptr<prior_box> prim, program& prog) : parent(prim, prog) {
     constant = true;
 }
 
@@ -452,7 +452,7 @@ std::string prior_box_inst::to_string(prior_box_node const& node) {
     return primitive_description.str();
 }
 
-prior_box_inst::typed_primitive_inst(network_impl& network, prior_box_node const& node) : parent(network, node) {
+prior_box_inst::typed_primitive_inst(network& network, prior_box_node const& node) : parent(network, node) {
     CLDNN_ERROR_MESSAGE(node.id(), "Prior box primitive instance should not be created!");
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp
index bd62de0435a..3caf8727407 100644
--- a/inference-engine/thirdparty/clDNN/src/program.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program.cpp
@@ -5,6 +5,11 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include "cldnn/runtime/error_handler.hpp"
+#include "cldnn/runtime/memory.hpp"
+#include "cldnn/runtime/engine.hpp"
+#include "cldnn/runtime/debug_configuration.hpp"
+#include "cldnn/graph/program.hpp"
+
 #include "kernel_selector_helper.h"
 #include "device_cache_reader.h"
 #include "auto_tuner.h"
@@ -12,7 +17,6 @@
 #include "pass_manager.h"
 #include "primitive_type.h"
 #include "program_dump_graph.h"
-#include "program_impl.h"
 #include "sliding_window_utils.h"
 #include "program_helpers.h"
 
@@ -56,13 +60,11 @@
 #include "loop_inst.h"
 #include "to_string_utils.h"
 #include "runtime/cldnn_itt.hpp"
+#include "runtime/kernels_cache.hpp"
 #include "impls/ocl/register.hpp"
 #include "impls/cpu/register.hpp"
 #include "impls/common/register.hpp"
 
-#include "cldnn/runtime/memory.hpp"
-#include "cldnn/runtime/engine.hpp"
-
 #include "kernel_base.h"
 
 #include <algorithm>
@@ -79,18 +81,15 @@
 #include <vector>
 #include <stdexcept>
 
-program::program(engine& engine, const topology& topology, const build_options& options)
-    : _impl(program_impl::build_program(engine, *topology.get(), options)) {}
-
-program_impl::program_impl(engine& engine_ref,
-                           topology_impl const& topology,
-                           build_options const& options,
-                           bool is_internal,
-                           bool no_optimizations,
-                           bool is_body_program)
+program::program(engine& engine_ref,
+                 topology const& topology,
+                 build_options const& options,
+                 bool is_internal,
+                 bool no_optimizations,
+                 bool is_body_program)
     : _engine(engine_ref),
       _stream(_engine.create_stream()),
-      program_state(_engine),
+      _kernels_cache(std::unique_ptr<kernels_cache>(new kernels_cache(_engine))),
       options(options),
       processing_order(),
       tuning_cache(nullptr),
@@ -106,12 +105,12 @@ program_impl::program_impl(engine& engine_ref,
     }
 }
 
-program_impl::program_impl(engine& engine_ref,
-                           std::set<std::shared_ptr<program_node>> const& nodes,
-                           build_options const& options,
-                           bool is_internal)
+program::program(engine& engine_ref,
+                 std::set<std::shared_ptr<program_node>> const& nodes,
+                 build_options const& options,
+                 bool is_internal)
     : _engine(engine_ref),
-      program_state(_engine),
+      _kernels_cache(std::unique_ptr<kernels_cache>(new kernels_cache(_engine))),
       options(options),
       processing_order(),
       tuning_cache(nullptr) {
@@ -122,10 +121,10 @@ program_impl::program_impl(engine& engine_ref,
     build_program(is_internal);
 }
 
-program_impl::~program_impl() {
+program::~program() {
 }
 
-void program_impl::init_primitives() {
+void program::init_primitives() {
     static bool is_initialized = false;
     if (!is_initialized) {
         common::register_implementations();
@@ -135,19 +134,18 @@ void program_impl::init_primitives() {
     }
 }
 
-void program_impl::compile() {
-    auto& cache = program_state._kernels_cache;
-    cache.build_all();
+void program::compile() {
+    _kernels_cache->build_all();
 }
 
-void program_impl::init_kernels() {
+void program::init_kernels() {
     for (auto& n : get_processing_order()) {
         if (n->get_selected_impl())
             n->get_selected_impl()->init_kernels();
     }
 }
 
-void program_impl::load_tuning_cache() {
+void program::load_tuning_cache() {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "ProgramImpl::LoadTuningCache");
     try {
         tuning_cache = kernel_selector::CreateTuningCacheFromFile(get_engine().configuration().tuning_cache_path);
@@ -156,31 +154,31 @@ void program_impl::load_tuning_cache() {
     }
 }
 
-kernel_id program_impl::add_kernel(const std::shared_ptr<kernel_string> kernelSring) {
-    return program_state._kernels_cache.set_kernel_source(kernelSring, false);
+kernel_id program::add_kernel(const std::shared_ptr<kernel_string> kernelSring) {
+    return _kernels_cache->set_kernel_source(kernelSring, false);
 }
 
-kernel::ptr program_impl::get_kernel(kernel_id id) {
-    return program_state._kernels_cache.get_kernel(id);
+kernel::ptr program::get_kernel(kernel_id id) {
+    return _kernels_cache->get_kernel(id);
 }
 
-program_impl::ptr program_impl::build_program(engine& engine,
-                                              const topology_impl& topology,
-                                              const build_options& options,
-                                              bool is_internal,
-                                              bool no_optimizations,
-                                              bool is_body_program) {
-    return std::make_shared<program_impl>(engine, topology, options, is_internal, no_optimizations, is_body_program);
+program::ptr program::build_program(engine& engine,
+                                    const topology& topology,
+                                    const build_options& options,
+                                    bool is_internal,
+                                    bool no_optimizations,
+                                    bool is_body_program) {
+    return std::make_shared<program>(engine, topology, options, is_internal, no_optimizations, is_body_program);
 }
 
-program_impl::ptr program_impl::build_program(engine& engine,
-                                              const std::set<std::shared_ptr<program_node>>& nodes,
-                                              const build_options& options,
-                                              bool is_internal) {
-    return std::make_shared<program_impl>(engine, nodes, options, is_internal);
+program::ptr program::build_program(engine& engine,
+                                    const std::set<std::shared_ptr<program_node>>& nodes,
+                                    const build_options& options,
+                                    bool is_internal) {
+    return std::make_shared<program>(engine, nodes, options, is_internal);
 }
 
-program_node& program_impl::get_node(primitive_id const& id) {
+program_node& program::get_node(primitive_id const& id) {
     try {
         return *nodes_map.at(id);
     } catch (...) {
@@ -188,7 +186,7 @@ program_node& program_impl::get_node(primitive_id const& id) {
     }
 }
 
-program_node const& program_impl::get_node(primitive_id const& id) const {
+program_node const& program::get_node(primitive_id const& id) const {
     try {
         return *nodes_map.at(id);
     } catch (...) {
@@ -197,7 +195,7 @@ program_node const& program_impl::get_node(primitive_id const& id) const {
 }
 
 // TODO: Remove once we will get full support for input/output padding in all primitive implementations.
-bool program_impl::analyze_output_size_handling_need() {
+bool program::analyze_output_size_handling_need() {
     bool handling_needed = false;
 
     // Calculate output size and compare with specified.
@@ -301,7 +299,7 @@ bool program_impl::analyze_output_size_handling_need() {
 
 // create new nodes for a program based on the set of nodes
 // method created to be used by propagate_constants to build sub program from constant nodes
-void program_impl::prepare_nodes(std::set<std::shared_ptr<program_node>> const& nodes) {
+void program::prepare_nodes(std::set<std::shared_ptr<program_node>> const& nodes) {
     for (const auto& itr : nodes) {
         if (itr.get()->is_type<data>()) {
             get_or_create(std::make_shared<input_layout>(itr.get()->id(),
@@ -334,7 +332,7 @@ void program_impl::prepare_nodes(std::set<std::shared_ptr<program_node>> const&
 }
 
 // create all nodes from topology primitives, add dependencies among them and create inputs list
-void program_impl::prepare_nodes(topology_impl const& topology) {
+void program::prepare_nodes(topology const& topology) {
     auto const& topo_map = topology.get_primitives();
     for (const auto& prim : topo_map) {
         get_or_create(prim.second);
@@ -352,7 +350,7 @@ void program_impl::prepare_nodes(topology_impl const& topology) {
 }
 
 // add node's dependecies from its primitive dependencies
-void program_impl::add_node_dependencies(program_node* node) {
+void program::add_node_dependencies(program_node* node) {
     auto deps = node->get_primitive()->dependencies();
     // add pointers to node's dependencies
     for (auto& dep : deps) {
@@ -367,10 +365,10 @@ void program_impl::add_node_dependencies(program_node* node) {
     }
 }
 
-/* helper method for program_impl constructor from list of nodes which
+/* helper method for program constructor from list of nodes which
    copies src_node dependecies to the destination node dest_node dependencies.
    But only to those which appaer in this program implementation nodes_map */
-void program_impl::copy_node_dependencies(program_node* dest_node, program_node* src_node) {
+void program::copy_node_dependencies(program_node* dest_node, program_node* src_node) {
     if (dest_node->get_primitive()->id != src_node->get_primitive()->id) {
         throw std::runtime_error("Node " + src_node->get_primitive()->id + " and its copy " +
                                  dest_node->get_primitive()->id + " do not match.");
@@ -393,7 +391,7 @@ void program_impl::copy_node_dependencies(program_node* dest_node, program_node*
     }
 }
 
-void program_impl::set_options() {
+void program::set_options() {
     static std::atomic<uint32_t> id_gen{0};
     prog_id = ++id_gen;
     assert(prog_id != 0);
@@ -404,12 +402,17 @@ void program_impl::set_options() {
         throw std::invalid_argument("Engine must be created with profiling enabled in tune_and_cache mode!");
     }
 
+    GPU_DEBUG_GET_INSTANCE(debug_config);
+    GPU_DEBUG_IF(!debug_config->dump_graphs.empty()) {
+        options.set_option(cldnn::build_option::graph_dumps_dir(debug_config->dump_graphs));
+    }
+
     if (!options.get<build_option_type::force_implementations>()->forcing.empty()) {
         options.set_option(build_option::optimize_data(true));
     }
 }
 
-void program_impl::build_program(bool is_internal) {
+void program::build_program(bool is_internal) {
     init_graph();
     { pre_optimize_graph(is_internal); }
     run_graph_compilation();
@@ -426,7 +429,7 @@ void program_impl::build_program(bool is_internal) {
     cleanup();
 }
 
-void program_impl::init_graph() {
+void program::init_graph() {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "ProgramImpl::InitGraph");
     apply_opt_pass<graph_initializations>();
 
@@ -440,9 +443,9 @@ void program_impl::init_graph() {
     apply_opt_pass<mark_nodes>();
 }
 
-void program_impl::run_graph_compilation() { apply_opt_pass<compile_graph>(); }
+void program::run_graph_compilation() { apply_opt_pass<compile_graph>(); }
 
-void program_impl::pre_optimize_graph(bool is_internal) {
+void program::pre_optimize_graph(bool is_internal) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "ProgramImpl::PreOptimizeGraph");
 
     if (!is_internal)
@@ -524,7 +527,7 @@ void program_impl::pre_optimize_graph(bool is_internal) {
     apply_opt_pass<add_required_reorders>();
 }
 
-void program_impl::post_optimize_graph(bool is_internal) {
+void program::post_optimize_graph(bool is_internal) {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "ProgramImpl::PostOptimizeGraph");
     // input reorder for fully connected if necessary
     apply_opt_pass<post_input_reorder>();
@@ -548,7 +551,7 @@ void program_impl::post_optimize_graph(bool is_internal) {
 }
 
 // mark if the node is constant assuming that all dependencies are marked properly
-void program_impl::mark_if_constant(program_node& node) {
+void program::mark_if_constant(program_node& node) {
     if (node.get_dependencies().empty() || node.is_type<prior_box>()) {
         return;
     }
@@ -562,7 +565,7 @@ void program_impl::mark_if_constant(program_node& node) {
 }
 
 // mark if the node is in data flow assuming that all dependencies are marked properly
-void program_impl::mark_if_data_flow(program_node& node) {
+void program::mark_if_data_flow(program_node& node) {
     if (node.is_type<mutable_data>() || node.is_type<input_layout>()) {
         node.data_flow = true;
     } else {
@@ -579,7 +582,7 @@ void program_impl::mark_if_data_flow(program_node& node) {
     }
 }
 
-void program_impl::transfer_memory_to_device() {
+void program::transfer_memory_to_device() {
     OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "ProgramImpl::TransferMemory");
     if (!get_engine().supports_allocation(allocation_type::usm_device))
         return;
@@ -611,7 +614,7 @@ void program_impl::transfer_memory_to_device() {
     }
 }
 
-void program_impl::cleanup() {
+void program::cleanup() {
     for (auto& node : processing_order)
         node->get_output_layout();
 
@@ -627,7 +630,7 @@ void program_impl::cleanup() {
     }
 }
 
-void program_impl::add_split_outputs() {
+void program::add_split_outputs() {
     auto itr = nodes_map.begin();
     while (itr != nodes_map.end()) {
         auto node_itr = itr++;
@@ -651,11 +654,11 @@ void program_impl::add_split_outputs() {
     }
 }
 
-program_impl::nodes_ordering& program_impl::get_processing_order() { return processing_order; }
+program::nodes_ordering& program::get_processing_order() { return processing_order; }
 
-const program_impl::nodes_ordering& program_impl::get_processing_order() const { return processing_order; }
+const program::nodes_ordering& program::get_processing_order() const { return processing_order; }
 
-void program_impl::prepare_memory_dependencies() {
+void program::prepare_memory_dependencies() {
     if (!get_engine().configuration().use_memory_pool)
         return;
 
@@ -664,7 +667,7 @@ void program_impl::prepare_memory_dependencies() {
     apply_opt_pass<oooq_memory_dependencies>();
 }
 
-std::string program_impl::get_memory_dependencies_string() const {
+std::string program::get_memory_dependencies_string() const {
     std::string mem_dep = "Memory dependencies/restrictions:\n";
     auto itr = processing_order.begin();
     while (itr != processing_order.end()) {
@@ -678,7 +681,7 @@ std::string program_impl::get_memory_dependencies_string() const {
     return mem_dep;
 }
 
-void program_impl::apply_needed_padding(program_node& node, program_node& prev_node, const padding& needed_padding) {
+void program::apply_needed_padding(program_node& node, program_node& prev_node, const padding& needed_padding) {
     auto target_layout = prev_node.get_output_layout();
 
     // Short circuit if padding did not change.
@@ -697,7 +700,7 @@ void program_impl::apply_needed_padding(program_node& node, program_node& prev_n
     prev_node.merge_output_padding(needed_padding);
 }
 
-void program_impl::reverse_connection(program_node& dep_node, program_node& user_node) {
+void program::reverse_connection(program_node& dep_node, program_node& user_node) {
     if (std::find(dep_node.users.begin(), dep_node.users.end(), &user_node) != dep_node.users.end()) {
         remove_connection(dep_node, user_node);
         add_connection(user_node, dep_node);
@@ -706,7 +709,7 @@ void program_impl::reverse_connection(program_node& dep_node, program_node& user
     }
 }
 
-program_node& program_impl::get_or_create(std::shared_ptr<primitive> prim) {
+program_node& program::get_or_create(std::shared_ptr<primitive> prim) {
     auto itr = nodes_map.lower_bound(prim->id);
     if (itr != nodes_map.end() && itr->first == prim->id)
         return *itr->second;
@@ -716,7 +719,7 @@ program_node& program_impl::get_or_create(std::shared_ptr<primitive> prim) {
     return *new_node;
 }
 
-void program_impl::add_intermediate(program_node& node,
+void program::add_intermediate(program_node& node,
                                     program_node& next,
                                     size_t prev_idx,
                                     bool connect_int_node_with_old_dep,
@@ -753,7 +756,7 @@ void program_impl::add_intermediate(program_node& node,
     }
 }
 
-void program_impl::add_intermediate(std::shared_ptr<primitive> prim,
+void program::add_intermediate(std::shared_ptr<primitive> prim,
                                     program_node& next,
                                     size_t prev_idx,
                                     bool connect_int_node_with_old_dep,
@@ -761,7 +764,7 @@ void program_impl::add_intermediate(std::shared_ptr<primitive> prim,
     add_intermediate(get_or_create(prim), next, prev_idx, connect_int_node_with_old_dep, move_usrs_of_prev_to_node);
 }
 
-void program_impl::add_intermediate(program_node& node,
+void program::add_intermediate(program_node& node,
                                     program_node& next,
                                     program_node& prev,
                                     bool connect_int_node_with_old_dep,
@@ -783,18 +786,18 @@ void program_impl::add_intermediate(program_node& node,
     add_intermediate(node, next, idx, connect_int_node_with_old_dep, move_usrs_of_prev_to_node);
 }
 
-void program_impl::add_connection(program_node& prev, program_node& next) {
+void program::add_connection(program_node& prev, program_node& next) {
     prev.users.push_back(&next);
     next.dependencies.push_back(&prev);
 }
 
-void program_impl::remove_connection(program_node& prev, program_node& next) {
+void program::remove_connection(program_node& prev, program_node& next) {
     prev.users.remove(&next);
     next.dependencies.erase(std::remove(next.dependencies.begin(), next.dependencies.end(), &prev),
                             next.dependencies.end());
 }
 
-void program_impl::remove_all_connections(program_node& node) {
+void program::remove_all_connections(program_node& node) {
     // since the graph is not topological sorted, we need to remove the node from both dependencies and users
     for (auto& e : node.users) {
         e->dependencies.erase(std::remove(e->dependencies.begin(), e->dependencies.end(), &node),
@@ -807,7 +810,7 @@ void program_impl::remove_all_connections(program_node& node) {
     node.users.clear();
 }
 
-void program_impl::rename(program_node& node, primitive_id const& new_id) {
+void program::rename(program_node& node, primitive_id const& new_id) {
     if (nodes_map.count(new_id))
         throw std::runtime_error("Trying to rename program_node but node with id " + new_id + " already exists");
     if (node.is_output())
@@ -824,7 +827,7 @@ void program_impl::rename(program_node& node, primitive_id const& new_id) {
     const_cast<primitive_id&>(node.desc->id) = new_id;
 }
 
-void program_impl::swap_names(program_node& node1, program_node& node2) {
+void program::swap_names(program_node& node1, program_node& node2) {
     const auto _extract_id = [](program_node& node) -> primitive_id& {
         return const_cast<primitive_id&>(node.desc->id);
     };
@@ -833,18 +836,17 @@ void program_impl::swap_names(program_node& node1, program_node& node2) {
     std::swap(_extract_id(node1), _extract_id(node2));
 }
 
-void program_impl::replace_all_usages(program_node& old_node, program_node& new_node) {
+void program::replace_all_usages(program_node& old_node, program_node& new_node) {
+    // We need a copy of users of old_node because old_node may be removed when doing replace_dependency()
     const std::list<program_node*> users(old_node.users);
     auto itr = users.begin();
-    bool end = (itr == users.end());
-    while (!end) {
-        auto& usage = (*itr++);
-        end = (itr == users.end());
-        usage->replace_dependency(old_node, new_node);
+    while (itr != users.end()) {
+        auto user = *(itr++);
+        user->replace_dependency(old_node, new_node);
     }
 }
 
-void program_impl::replace(program_node& old_node, program_node& new_node) {
+void program::replace(program_node& old_node, program_node& new_node) {
     if (!new_node.dependencies.empty() || !new_node.users.empty())
         throw std::invalid_argument("Node which is about to replace other node should be detached");
 
@@ -905,7 +907,7 @@ void program_impl::replace(program_node& old_node, program_node& new_node) {
     }
 }
 
-bool program_impl::remove_if_dangling(program_node& node) {
+bool program::remove_if_dangling(program_node& node) {
     if (!node.users.empty())
         return false;
     if (!node.dependencies.empty())
@@ -923,7 +925,7 @@ bool program_impl::remove_if_dangling(program_node& node) {
     return true;
 }
 
-bool program_impl::extract_and_remove(program_node& node) {
+bool program::extract_and_remove(program_node& node) {
     if (node.get_dependencies().size() != 1)
         return false;
 
@@ -969,7 +971,7 @@ bool program_impl::extract_and_remove(program_node& node) {
     return true;
 }
 
-void program_impl::fuse_nodes(program_node &fused_node, program_node &peer_node, std::map<primitive_id, std::vector<primitive_id>>* fusing_history) {
+void program::fuse_nodes(program_node &fused_node, program_node &peer_node, std::map<primitive_id, std::vector<primitive_id>>* fusing_history) {
     auto peer_layout = peer_node.get_output_layout();
     fused_primitive_desc local_desc;
     local_desc.node = get_node_ptr(peer_node.id());
@@ -1050,7 +1052,7 @@ void program_impl::fuse_nodes(program_node &fused_node, program_node &peer_node,
     fused_node.recalc_output_layout(true);
 }
 
-void program_impl::remove_nodes(std::vector<program_node*>& to_remove) {
+void program::remove_nodes(std::vector<program_node*>& to_remove) {
     for (auto const& node : to_remove) {
         if (node->is_input()) {
             get_inputs().remove(node);
@@ -1071,7 +1073,7 @@ void program_impl::remove_nodes(std::vector<program_node*>& to_remove) {
 
 // TODO: break this function into number of smaller ones + add per-primitive fields (possibly use
 // primitive_inst::to_string?)
-void program_impl::dump_program(const char* stage,
+void program::dump_program(const char* stage,
                                 bool with_full_info,
                                 std::function<bool(program_node const&)> const& filter) const {
     std::string path = get_dir_path(options);
@@ -1092,7 +1094,7 @@ void program_impl::dump_program(const char* stage,
     dump_graph_optimized(graph, *this);
 }
 
-program_impl::primitives_info program_impl::get_current_stage_info() const {
+program::primitives_info program::get_current_stage_info() const {
     primitives_info info;
 
     auto get_inference_precision = [](program_node& node) -> data_types {
@@ -1172,13 +1174,13 @@ program_impl::primitives_info program_impl::get_current_stage_info() const {
     return info;
 }
 
-void program_impl::save_pass_info(std::string pass_name) {
+void program::save_pass_info(std::string pass_name) {
     // TODO: Directory path here can be probably changed to some bool flag
     if (!options.get<build_option_type::graph_dumps_dir>()->directory_path.empty())
         optimizer_passes_info.emplace_back(pass_name, get_current_stage_info());
 }
 
-void program_impl::add_optimized_primitive_info(primitive_id optimized_primitive_id,
+void program::add_optimized_primitive_info(primitive_id optimized_primitive_id,
                                                 std::vector<primitive_id> replaced_with_ids) {
     for (auto& e : optimized) {
         auto it = std::find_if(e.second.begin(), e.second.end(), [&optimized_primitive_id](const primitive_id& id) {
@@ -1193,15 +1195,15 @@ void program_impl::add_optimized_primitive_info(primitive_id optimized_primitive
     optimized.emplace_back(optimized_primitive_id, replaced_with_ids);
 }
 
-const program_impl::graph_optimizer_info& program_impl::get_optimizer_passes_info() const {
+const program::graph_optimizer_info& program::get_optimizer_passes_info() const {
     return optimizer_passes_info;
 }
 
-const program_impl::primitives_info& program_impl::get_primitives_info() const { return prim_info; }
+const program::primitives_info& program::get_primitives_info() const { return prim_info; }
 
-void program_impl::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); }
+void program::apply_opt_pass(base_pass& pass) { pm->run(*this, pass); }
 
-void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
+void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
     lo.set_implementation_forcing(options.get<build_option_type::force_implementations>()->forcing);
 
     // first pass to set layout optimization_attributes for topology
diff --git a/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp b/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp
index 12e1abb4f34..7704ea3a7f1 100644
--- a/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp
@@ -163,7 +163,7 @@ std::string get_load_program_name(build_options opts) {
 }
 
 void dump_graph_init(std::ofstream& graph,
-                     const program_impl& program,
+                     const program& program,
                      std::function<bool(program_node const&)> const& filter) {
     const auto extr_oformat = [](program_node* ptr) {
         std::string out = fmt_to_str(ptr->get_output_layout().format);
@@ -272,21 +272,21 @@ void dump_graph_init(std::ofstream& graph,
     close_stream(graph);
 }
 
-void dump_graph_processing_order(std::ofstream& graph, const program_impl& program) {
+void dump_graph_processing_order(std::ofstream& graph, const program& program) {
     for (auto node : program.get_processing_order())
         graph << reinterpret_cast<uintptr_t>(node) << " (" << node->id() << ")\n";
     graph << '\n';
     close_stream(graph);
 }
 
-void dump_graph_optimized(std::ofstream& graph, const program_impl& program) {
+void dump_graph_optimized(std::ofstream& graph, const program& program) {
     for (auto& prim_id : program.get_optimized_out()) graph << prim_id << "\n";
     graph << '\n';
     close_stream(graph);
 }
 
 void dump_graph_info(std::ofstream& graph,
-                     const program_impl& program,
+                     const program& program,
                      std::function<bool(program_node const&)> const& filter) {
     for (auto& node : program.get_processing_order()) {
         if (filter && !filter(*node))
diff --git a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp
index ccd099c2d4a..6ec5ed03b52 100644
--- a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp
@@ -5,7 +5,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include "program_helpers.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "data_inst.h"
 #include <algorithm>
 #include <utility>
diff --git a/inference-engine/thirdparty/clDNN/src/program_node.cpp b/inference-engine/thirdparty/clDNN/src/program_node.cpp
index a2b964ff591..a37bd13b5ff 100644
--- a/inference-engine/thirdparty/clDNN/src/program_node.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program_node.cpp
@@ -3,7 +3,7 @@
 //
 
 #include "program_node.h"
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "primitive_inst.h"
 #include "to_string_utils.h"
 #include "json_object.h"
@@ -15,7 +15,7 @@
 
 using namespace cldnn;
 
-program_node::program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+program_node::program_node(std::shared_ptr<primitive> prim, program& prog)
     : desc(prim), myprog(prog), org_id(prim->id) {
     if (prim)
         output_layout.data_padding = prim->output_padding;
diff --git a/inference-engine/thirdparty/clDNN/src/proposal.cpp b/inference-engine/thirdparty/clDNN/src/proposal.cpp
index 967c41ed9d9..c9961fdaacd 100644
--- a/inference-engine/thirdparty/clDNN/src/proposal.cpp
+++ b/inference-engine/thirdparty/clDNN/src/proposal.cpp
@@ -108,7 +108,7 @@ std::string proposal_inst::to_string(proposal_node const& node) {
     return primitive_description.str();
 }
 
-proposal_inst::typed_primitive_inst(network_impl& network, proposal_node const& node) : parent(network, node) {
+proposal_inst::typed_primitive_inst(network& network, proposal_node const& node) : parent(network, node) {
     generate_anchors(argument.base_bbox_size,
                      argument.ratios,
                      argument.scales,
diff --git a/inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp b/inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp
index 37eb2bc1650..3bd092abf48 100644
--- a/inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp
+++ b/inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp
@@ -44,6 +44,6 @@ std::string pyramid_roi_align_inst::to_string(pyramid_roi_align_node const& node
     return primitive_description.str();
 }
 
-pyramid_roi_align_inst::typed_primitive_inst(network_impl& network, pyramid_roi_align_node const& node)
+pyramid_roi_align_inst::typed_primitive_inst(network& network, pyramid_roi_align_node const& node)
     : parent(network, node) {}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/quantize.cpp b/inference-engine/thirdparty/clDNN/src/quantize.cpp
index 6d26b7fdbdb..49991b80ddd 100644
--- a/inference-engine/thirdparty/clDNN/src/quantize.cpp
+++ b/inference-engine/thirdparty/clDNN/src/quantize.cpp
@@ -60,6 +60,6 @@ std::string quantize_inst::to_string(quantize_node const& node) {
     return primitive_description.str();
 }
 
-quantize_inst::typed_primitive_inst(network_impl& network, quantize_node const& node) : parent(network, node) {}
+quantize_inst::typed_primitive_inst(network& network, quantize_node const& node) : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/reduce.cpp b/inference-engine/thirdparty/clDNN/src/reduce.cpp
index e4f4634f062..0570616cad8 100644
--- a/inference-engine/thirdparty/clDNN/src/reduce.cpp
+++ b/inference-engine/thirdparty/clDNN/src/reduce.cpp
@@ -96,6 +96,6 @@ std::string reduce_inst::to_string(reduce_node const& node) {
     return primitive_description.str();
 }
 
-reduce_inst::typed_primitive_inst(network_impl& network, reduce_node const& node) : parent(network, node) {}
+reduce_inst::typed_primitive_inst(network& network, reduce_node const& node) : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/region_yolo.cpp b/inference-engine/thirdparty/clDNN/src/region_yolo.cpp
index 5732529b0e6..825e46b4fc5 100644
--- a/inference-engine/thirdparty/clDNN/src/region_yolo.cpp
+++ b/inference-engine/thirdparty/clDNN/src/region_yolo.cpp
@@ -61,5 +61,5 @@ std::string region_yolo_inst::to_string(region_yolo_node const& node) {
     return primitive_description.str();
 }
 
-region_yolo_inst::typed_primitive_inst(network_impl& network, region_yolo_node const& node) : parent(network, node) {}
+region_yolo_inst::typed_primitive_inst(network& network, region_yolo_node const& node) : parent(network, node) {}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/reorder.cpp b/inference-engine/thirdparty/clDNN/src/reorder.cpp
index 7300a83e3a4..8985fc2f073 100644
--- a/inference-engine/thirdparty/clDNN/src/reorder.cpp
+++ b/inference-engine/thirdparty/clDNN/src/reorder.cpp
@@ -186,7 +186,7 @@ std::string reorder_inst::to_string(reorder_node const& node) {
     return primitive_description.str();
 }
 
-reorder_inst::typed_primitive_inst(network_impl& network, reorder_node const& node)
+reorder_inst::typed_primitive_inst(network& network, reorder_node const& node)
     : parent(network, node, !node.can_be_optimized()) {
     if (node.can_be_optimized())
         reuse_input();
diff --git a/inference-engine/thirdparty/clDNN/src/reorg_yolo.cpp b/inference-engine/thirdparty/clDNN/src/reorg_yolo.cpp
index 01d12f22e19..772f02ad6b5 100644
--- a/inference-engine/thirdparty/clDNN/src/reorg_yolo.cpp
+++ b/inference-engine/thirdparty/clDNN/src/reorg_yolo.cpp
@@ -45,5 +45,5 @@ std::string reorg_yolo_inst::to_string(reorg_yolo_node const& node) {
 
     return primitive_description.str();
 }
-reorg_yolo_inst::typed_primitive_inst(network_impl& network, reorg_yolo_node const& node) : parent(network, node) {}
+reorg_yolo_inst::typed_primitive_inst(network& network, reorg_yolo_node const& node) : parent(network, node) {}
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/resample.cpp b/inference-engine/thirdparty/clDNN/src/resample.cpp
index 187dbdfc8e4..bf09e57c998 100644
--- a/inference-engine/thirdparty/clDNN/src/resample.cpp
+++ b/inference-engine/thirdparty/clDNN/src/resample.cpp
@@ -121,7 +121,7 @@ std::string resample_inst::to_string(resample_node const& node) {
     return primitive_description.str();
 }
 
-resample_inst::typed_primitive_inst(network_impl& network, resample_node const& node) : parent(network, node) {
+resample_inst::typed_primitive_inst(network& network, resample_node const& node) : parent(network, node) {
     if (node.get_primitive()->operation_type == resample_type::bilinear &&
         node.get_output_layout().format.dimension() > 4) {
         CLDNN_ERROR_MESSAGE(node.id(), "5D not supported for interp resample type.");
diff --git a/inference-engine/thirdparty/clDNN/src/reshape.cpp b/inference-engine/thirdparty/clDNN/src/reshape.cpp
index e6fc316cc53..b6de6de3b88 100644
--- a/inference-engine/thirdparty/clDNN/src/reshape.cpp
+++ b/inference-engine/thirdparty/clDNN/src/reshape.cpp
@@ -63,7 +63,7 @@ std::string reshape_inst::to_string(reshape_node const& node) {
     return primitive_description.str();
 }
 
-reshape_inst::typed_primitive_inst(network_impl& network, reshape_node const& node) : parent(network, node, false) {
+reshape_inst::typed_primitive_inst(network& network, reshape_node const& node) : parent(network, node, false) {
     auto input_layout = node.input().get_output_layout();
     auto output_layout = node.get_output_layout();
     CLDNN_ERROR_DATA_TYPES_MISMATCH(node.id(),
diff --git a/inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp b/inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp
index c0c2f1fb51f..fcffc6a940d 100644
--- a/inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp
+++ b/inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp
@@ -42,7 +42,7 @@ std::string reverse_sequence_inst::to_string(reverse_sequence_node const& node)
     return primitive_description.str();
 }
 
-reverse_sequence_inst::typed_primitive_inst(network_impl& network, reverse_sequence_node const& node)
+reverse_sequence_inst::typed_primitive_inst(network& network, reverse_sequence_node const& node)
     : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/scale.cpp b/inference-engine/thirdparty/clDNN/src/scale.cpp
index 52916cf1699..e7392f3e75a 100644
--- a/inference-engine/thirdparty/clDNN/src/scale.cpp
+++ b/inference-engine/thirdparty/clDNN/src/scale.cpp
@@ -72,7 +72,7 @@ std::string scale_inst::to_string(scale_node const& node) {
     return primitive_description.str();
 }
 
-scale_inst::typed_primitive_inst(network_impl& network, scale_node const& node) : parent(network, node) {
+scale_inst::typed_primitive_inst(network& network, scale_node const& node) : parent(network, node) {
     auto scale_layout = node.scale_in().get_output_layout();
     auto scale_format = scale_layout.format;
 
diff --git a/inference-engine/thirdparty/clDNN/src/scatter_elements_update.cpp b/inference-engine/thirdparty/clDNN/src/scatter_elements_update.cpp
index ccfb299221e..8014d1be06d 100644
--- a/inference-engine/thirdparty/clDNN/src/scatter_elements_update.cpp
+++ b/inference-engine/thirdparty/clDNN/src/scatter_elements_update.cpp
@@ -55,6 +55,6 @@ std::string scatter_elements_update_inst::to_string(scatter_elements_update_node
     return primitive_description.str();
 }
 
-scatter_elements_update_inst::typed_primitive_inst(network_impl& network, scatter_elements_update_node const& node) : parent(network, node) {}
+scatter_elements_update_inst::typed_primitive_inst(network& network, scatter_elements_update_node const& node) : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/scatter_nd_update.cpp b/inference-engine/thirdparty/clDNN/src/scatter_nd_update.cpp
index 435650c28f6..f13eeaeded9 100644
--- a/inference-engine/thirdparty/clDNN/src/scatter_nd_update.cpp
+++ b/inference-engine/thirdparty/clDNN/src/scatter_nd_update.cpp
@@ -49,6 +49,6 @@ std::string scatter_nd_update_inst::to_string(scatter_nd_update_node const& node
     return primitive_description.str();
 }
 
-scatter_nd_update_inst::typed_primitive_inst(network_impl& network, scatter_nd_update_node const& node) : parent(network, node) {}
+scatter_nd_update_inst::typed_primitive_inst(network& network, scatter_nd_update_node const& node) : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/scatter_update.cpp b/inference-engine/thirdparty/clDNN/src/scatter_update.cpp
index cba37e0a4cb..f0b7a3f18c8 100644
--- a/inference-engine/thirdparty/clDNN/src/scatter_update.cpp
+++ b/inference-engine/thirdparty/clDNN/src/scatter_update.cpp
@@ -91,6 +91,6 @@ std::string scatter_update_inst::to_string(scatter_update_node const& node) {
     return primitive_description.str();
 }
 
-scatter_update_inst::typed_primitive_inst(network_impl& network, scatter_update_node const& node) : parent(network, node) {}
+scatter_update_inst::typed_primitive_inst(network& network, scatter_update_node const& node) : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/select.cpp b/inference-engine/thirdparty/clDNN/src/select.cpp
index 433e8ba04d2..b4725a5422f 100644
--- a/inference-engine/thirdparty/clDNN/src/select.cpp
+++ b/inference-engine/thirdparty/clDNN/src/select.cpp
@@ -45,7 +45,7 @@ std::string select_inst::to_string(select_node const& node) {
     return primitive_description.str();
 }
 
-select_inst::typed_primitive_inst(network_impl& network, select_node const& node) : parent(network, node) {
+select_inst::typed_primitive_inst(network& network, select_node const& node) : parent(network, node) {
     auto& deps = node.get_dependencies();
 
     CLDNN_ERROR_LESS_THAN(node.id(),
diff --git a/inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp b/inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp
index 61e5578a8e9..2b1fd5aed8d 100644
--- a/inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp
+++ b/inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp
@@ -62,7 +62,7 @@ std::string shuffle_channels_inst::to_string(shuffle_channels_node const& node)
     return primitive_description.str();
 }
 
-shuffle_channels_inst::typed_primitive_inst(network_impl& network, shuffle_channels_node const& node)
+shuffle_channels_inst::typed_primitive_inst(network& network, shuffle_channels_node const& node)
     : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/softmax.cpp b/inference-engine/thirdparty/clDNN/src/softmax.cpp
index c0227f0695d..c37f89afdde 100644
--- a/inference-engine/thirdparty/clDNN/src/softmax.cpp
+++ b/inference-engine/thirdparty/clDNN/src/softmax.cpp
@@ -30,7 +30,7 @@ std::string softmax_inst::to_string(softmax_node const& node) {
     return primitive_description.str();
 }
 
-softmax_inst::typed_primitive_inst(network_impl& network, softmax_node const& node) : parent(network, node) {
+softmax_inst::typed_primitive_inst(network& network, softmax_node const& node) : parent(network, node) {
     //    auto& input_offset  = arg.input_offset;
     //    auto& output_offset = arg.output_offset;
     //    auto& output_size   = arg.output_size;
diff --git a/inference-engine/thirdparty/clDNN/src/space_to_batch.cpp b/inference-engine/thirdparty/clDNN/src/space_to_batch.cpp
index 9de115c3650..c8389601648 100644
--- a/inference-engine/thirdparty/clDNN/src/space_to_batch.cpp
+++ b/inference-engine/thirdparty/clDNN/src/space_to_batch.cpp
@@ -77,7 +77,7 @@ std::string space_to_batch_inst::to_string(space_to_batch_node const& node) {
     return primitive_description.str();
 }
 
-space_to_batch_inst::typed_primitive_inst(network_impl& network, space_to_batch_node const& node)
+space_to_batch_inst::typed_primitive_inst(network& network, space_to_batch_node const& node)
     : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/space_to_depth.cpp b/inference-engine/thirdparty/clDNN/src/space_to_depth.cpp
index 3fc1f16c3ef..11a5b14c66e 100644
--- a/inference-engine/thirdparty/clDNN/src/space_to_depth.cpp
+++ b/inference-engine/thirdparty/clDNN/src/space_to_depth.cpp
@@ -97,7 +97,7 @@ std::string space_to_depth_inst::to_string(space_to_depth_node const& node) {
     return primitive_description.str();
 }
 
-space_to_depth_inst::typed_primitive_inst(network_impl& network, space_to_depth_node const& node)
+space_to_depth_inst::typed_primitive_inst(network& network, space_to_depth_node const& node)
     : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/split.cpp b/inference-engine/thirdparty/clDNN/src/split.cpp
index 182fe49f0b0..385323c2a77 100644
--- a/inference-engine/thirdparty/clDNN/src/split.cpp
+++ b/inference-engine/thirdparty/clDNN/src/split.cpp
@@ -82,7 +82,7 @@ std::string split_inst::to_string(split_node const& node) {
     return primitive_description.str();
 }
 
-split_inst::typed_primitive_inst(network_impl& network, split_node const& node) : parent(network, node) {
+split_inst::typed_primitive_inst(network& network, split_node const& node) : parent(network, node) {
     CLDNN_ERROR_MESSAGE(node.id(), "Split primitive instance should not be created!");
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/strided_slice.cpp b/inference-engine/thirdparty/clDNN/src/strided_slice.cpp
index 8faf64a3626..817e5f3c009 100644
--- a/inference-engine/thirdparty/clDNN/src/strided_slice.cpp
+++ b/inference-engine/thirdparty/clDNN/src/strided_slice.cpp
@@ -52,7 +52,7 @@ std::string strided_slice_inst::to_string(strided_slice_node const& node) {
     return primitive_description.str();
 }
 
-strided_slice_inst::typed_primitive_inst(network_impl& network, strided_slice_node const& node)
+strided_slice_inst::typed_primitive_inst(network& network, strided_slice_node const& node)
     : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/tile.cpp b/inference-engine/thirdparty/clDNN/src/tile.cpp
index 798a835af45..0fa1348b198 100644
--- a/inference-engine/thirdparty/clDNN/src/tile.cpp
+++ b/inference-engine/thirdparty/clDNN/src/tile.cpp
@@ -40,6 +40,6 @@ std::string tile_inst::to_string(tile_node const& node) {
     return primitive_description.str();
 }
 
-tile_inst::typed_primitive_inst(network_impl& network, tile_node const& node) : parent(network, node) {}
+tile_inst::typed_primitive_inst(network& network, tile_node const& node) : parent(network, node) {}
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/topology.cpp b/inference-engine/thirdparty/clDNN/src/topology.cpp
index 4ba2c86a151..2ba9344b0c7 100644
--- a/inference-engine/thirdparty/clDNN/src/topology.cpp
+++ b/inference-engine/thirdparty/clDNN/src/topology.cpp
@@ -5,19 +5,34 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include "cldnn/graph/topology.hpp"
-#include "topology_impl.h"
 #include <vector>
 #include <memory>
 
 namespace cldnn {
 
-topology::topology() : _impl(new topology_impl()) {}
+void topology::add_primitive(std::shared_ptr<primitive> desc) {
+    auto id = desc->id;
+    auto itr = _primitives.find(id);
+    if (itr != _primitives.end()) {
+        if (itr->second != desc)
+            throw std::runtime_error("different primitive with id '" + id + "' exists already");
 
-const std::vector<primitive_id> topology::get_primitive_ids() const {
-    return _impl->get_primitives_id();
+        // adding the same primitive more than once is not an error
+        return;
+    }
+
+    _primitives.insert({id, desc});
 }
 
-void topology::change_input_layout(primitive_id id, const layout& new_layout) {
+const std::shared_ptr<primitive>& topology::at(primitive_id id) const {
+    try {
+        return _primitives.at(id);
+    } catch (...) {
+        throw std::runtime_error("Topology doesn't contain primtive: " + id);
+    }
+}
+
+void topology::change_input_layout(const primitive_id& id, const layout& new_layout) {
     if (new_layout.format < format::any || new_layout.format >= format::format_num)
         throw std::invalid_argument("Unknown format of layout.");
 
@@ -27,15 +42,18 @@ void topology::change_input_layout(primitive_id id, const layout& new_layout) {
         new_layout.data_type != data_types::i64)
         throw std::invalid_argument("Unknown data_type of layout.");
 
-    _impl->change_input_layout(id, new_layout);
+    auto& inp_layout = this->at(id);
+    if (inp_layout->type != input_layout::type_id()) {
+        throw std::runtime_error("Primitive: " + id + " is not input_layout.");
+    }
+    auto inp_lay_prim = static_cast<input_layout*>(inp_layout.get());
+    inp_lay_prim->change_layout(new_layout);
 }
 
-void topology::add_primitive(std::shared_ptr<primitive> desc) {
-    _impl->add(desc);
-}
-
-const std::shared_ptr<primitive>& topology::at(const primitive_id& id) const {
-    return _impl->at(id);
+const std::vector<primitive_id> topology::get_primitives_ids() const {
+    std::vector<primitive_id> prim_ids;
+    for (const auto& prim : _primitives) prim_ids.push_back(prim.first);
+    return prim_ids;
 }
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/tests/module_tests/graph_manipulation_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/graph_manipulation_gpu_test.cpp
index 25e6260ea12..71c81d17196 100644
--- a/inference-engine/thirdparty/clDNN/tests/module_tests/graph_manipulation_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/module_tests/graph_manipulation_gpu_test.cpp
@@ -7,17 +7,17 @@
 #include "cldnn/runtime/engine.hpp"
 #include "cldnn/runtime/memory.hpp"
 
-#include "program_impl.h"
-#include "topology_impl.h"
+#include "cldnn/graph/topology.hpp"
+#include "cldnn/graph/program.hpp"
 #include "data_inst.h"
 #include "activation_inst.h"
 #include "convolution_inst.h"
 #include "crop_inst.h"
-#include "network_impl.h"
+#include "cldnn/graph/network.hpp"
 #include "reshape_inst.h"
 #include "pass_manager.h"
 
-#include "program_impl_wrapper.h"
+#include "program_wrapper.h"
 
 #include <memory>
 
@@ -49,13 +49,11 @@ TEST(basic, test1) {
     topology.add(concatenation("concat", { "reorder1", "weights2" }, concatenation::along_x));
     topology.add(convolution("conv2", { "reorder2" }, { "concat" }));
 
-    program_impl::ptr prog = program_impl::build_program(engine, *topology.get(), build_opt, false);
-    std::shared_ptr<cldnn::network_impl> net = network_impl::allocate_network(engine, prog);
-    network network(net);
+    program::ptr prog = program::build_program(engine, topology, build_opt, false);
+    network::ptr network = network::allocate_network(engine, prog);
+    network->set_input_data("input", input);
 
-    network.set_input_data("input", input);
-
-    auto outputs = network.execute();
+    auto outputs = network->execute();
 
     float epsilon = 1e-2f;
     for (auto& it : outputs)
@@ -66,7 +64,7 @@ TEST(basic, test1) {
 }
 
 // This test creates a program without optimization passes, even the compilation is being run manualy.
-// Thus, a single method from program_impl like add_intermediate might be tested separately.
+// Thus, a single method from program like add_intermediate might be tested separately.
 TEST(add_intermediate_gpu, test1)
 {
     build_options build_opt;
@@ -94,16 +92,15 @@ TEST(add_intermediate_gpu, test1)
     topology.add(cldnn::convolution("conv1b", { "input" }, { "weights" }));
     topology.add(cldnn::convolution("conv2a", { "conv1a" }, { "weights2" }));
     auto new_reorder = std::make_shared<reorder>("reorder","nothing", input->get_layout());
-    program_impl::ptr prog = program_impl::build_program(engine, *topology.get(), build_opt, false, true);
+    program::ptr prog = program::build_program(engine, topology, build_opt, false, true);
     prog->add_intermediate(new_reorder, prog->get_node("conv1a"), 0);
     prog->dump_program("custom_dump", true);
 
-    program_impl_wrapper::build(*prog);
+    program_wrapper::build(*prog);
 
-    std::shared_ptr<cldnn::network_impl> net = network_impl::allocate_network(engine, prog);
-    network network(net);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
+    network::ptr network = network::allocate_network(engine, prog);
+    network->set_input_data("input", input);
+    auto outputs = network->execute();
 
     std::vector<float> expected_output_vec = {
         32.2f, 60.2f, 66.6f, 126.6f,
@@ -123,7 +120,7 @@ TEST(add_intermediate_gpu, test1)
     }
 }
 
-/* This test shows how to use private members (here: add_connection) of program_impl using program_impl_wraper */
+/* This test shows how to use private members (here: add_connection) of program using program_wraper */
 // Disabled for now as it produces wrong results
 TEST(add_intermediate_gpu, test2)
 {
@@ -156,18 +153,17 @@ TEST(add_intermediate_gpu, test2)
     w_vec.push_back("weights");
     auto new_conv = std::make_shared<convolution>("conv1a", "input", w_vec);
     auto weights_node = std::make_shared<data>("weights", weights);
-    program_impl::ptr prog = program_impl::build_program(engine, *topology.get(), build_opt, false, true);
+    program::ptr prog = program::build_program(engine, topology, build_opt, false, true);
 
     prog->add_intermediate(new_conv, prog->get_node("conv2a"), 0, true, true);
-    program_impl_wrapper::add_connection(*prog, prog->get_or_create(weights_node), prog->get_or_create(new_conv));
+    program_wrapper::add_connection(*prog, prog->get_or_create(weights_node), prog->get_or_create(new_conv));
     prog->dump_program("custom_dump", true);
 
-    program_impl_wrapper::build(*prog);
+    program_wrapper::build(*prog);
 
-    std::shared_ptr<cldnn::network_impl> net = network_impl::allocate_network(engine, prog);
-    network network(net);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
+    network::ptr network = network::allocate_network(engine, prog);
+    network->set_input_data("input", input);
+    auto outputs = network->execute();
 
     std::vector<float> expected_output_vec = {
         514.22f, 532.7f, 1075.26f, 1113.9f
diff --git a/inference-engine/thirdparty/clDNN/tests/module_tests/prepare_conv_eltw_fusing.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/prepare_conv_eltw_fusing.cpp
index 1ca951b244d..3e03d2e2b4d 100644
--- a/inference-engine/thirdparty/clDNN/tests/module_tests/prepare_conv_eltw_fusing.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/module_tests/prepare_conv_eltw_fusing.cpp
@@ -6,13 +6,13 @@
 
 #include "cldnn/runtime/engine.hpp"
 
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "data_inst.h"
 #include "eltwise_inst.h"
-#include "network_impl.h"
+#include "cldnn/graph/network.hpp"
 #include "pass_manager.h"
 
-#include "program_impl_wrapper.h"
+#include "program_wrapper.h"
 
 #include <memory>
 
@@ -66,20 +66,19 @@ std::map<primitive_id, network_output> test_prepare_conv_eltw_fusing(bool eltw1,
     {
         topology.add(eltwise("eltw3", "conv1", "conv2", cldnn::eltwise_mode::sum));
     }
-    program_impl::ptr prog = program_impl::build_program(engine, *topology.get(), build_opt, false, true);
+    program::ptr prog = program::build_program(engine, topology, build_opt, false, true);
 
     layout_optimizer lo;
-    program_impl_wrapper::apply_opt_pass<prepare_conv_eltw_fusing>(*prog, lo);
+    program_wrapper::apply_opt_pass<prepare_conv_eltw_fusing>(*prog, lo);
 
-    program_impl_wrapper::run_graph_compilation(*prog);
-    program_impl_wrapper::prepare_memory_dependencies(*prog);
-    program_impl_wrapper::compile(*prog);
-    program_impl_wrapper::init_kernels(*prog);
-    std::shared_ptr<cldnn::network_impl> net = network_impl::allocate_network(engine, prog);
-    network network(net);
-    network.set_input_data("input", input);
+    program_wrapper::run_graph_compilation(*prog);
+    program_wrapper::prepare_memory_dependencies(*prog);
+    program_wrapper::compile(*prog);
+    program_wrapper::init_kernels(*prog);
+    network::ptr network = network::allocate_network(engine, prog);
+    network->set_input_data("input", input);
 
-    return network.execute();
+    return network->execute();
 }
 
 /*
diff --git a/inference-engine/thirdparty/clDNN/tests/module_tests/program_impl_wrapper.h b/inference-engine/thirdparty/clDNN/tests/module_tests/program_wrapper.h
similarity index 51%
rename from inference-engine/thirdparty/clDNN/tests/module_tests/program_impl_wrapper.h
rename to inference-engine/thirdparty/clDNN/tests/module_tests/program_wrapper.h
index 03b7bd27970..872a706b54e 100644
--- a/inference-engine/thirdparty/clDNN/tests/module_tests/program_impl_wrapper.h
+++ b/inference-engine/thirdparty/clDNN/tests/module_tests/program_wrapper.h
@@ -7,40 +7,40 @@
 namespace cldnn
 {
     struct program_node;
-    struct program_impl;
-    // This class is intended to allow using private methods from program_impl within tests_core_internal project.
+    struct program;
+    // This class is intended to allow using private methods from program within tests_core_internal project.
     // Once needed, more methods wrapper should be added here.
-    class program_impl_wrapper
+    class program_wrapper
     {
     public:
-        static void add_connection(program_impl& p, program_node& prev, program_node& next)
+        static void add_connection(program& p, program_node& prev, program_node& next)
         {
             p.add_connection(prev, next);
         }
         template <class Pass, typename... Args>
-        static void apply_opt_pass(program_impl& p, Args&&... args)
+        static void apply_opt_pass(program& p, Args&&... args)
         {
             p.apply_opt_pass<Pass>(std::forward<Args>(args)...);
         }
-        static void run_graph_compilation(program_impl& p)
+        static void run_graph_compilation(program& p)
         {
             p.run_graph_compilation();
         }
-        static void compile(program_impl& p)
+        static void compile(program& p)
         {
             p.compile();
         }
-        static void build(program_impl& p)
+        static void build(program& p)
         {
-            program_impl_wrapper::run_graph_compilation(p);
-            program_impl_wrapper::compile(p);
-            program_impl_wrapper::init_kernels(p);
+            program_wrapper::run_graph_compilation(p);
+            program_wrapper::compile(p);
+            program_wrapper::init_kernels(p);
         }
-        static void init_kernels(program_impl& p)
+        static void init_kernels(program& p)
         {
             p.init_kernels();
         }
-        static void prepare_memory_dependencies(program_impl& p)
+        static void prepare_memory_dependencies(program& p)
         {
             p.prepare_memory_dependencies();
         }
diff --git a/inference-engine/thirdparty/clDNN/tests/module_tests/reorder_inputs_test.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/reorder_inputs_test.cpp
index f67512fc3a4..434f7159ed9 100644
--- a/inference-engine/thirdparty/clDNN/tests/module_tests/reorder_inputs_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/module_tests/reorder_inputs_test.cpp
@@ -6,14 +6,14 @@
 
 #include "cldnn/runtime/engine.hpp"
 
-#include "program_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "data_inst.h"
 #include "eltwise_inst.h"
-#include "network_impl.h"
+#include "cldnn/graph/network.hpp"
 #include "pass_manager.h"
 #include "to_string_utils.h"
 
-#include "program_impl_wrapper.h"
+#include "program_wrapper.h"
 
 #include <memory>
 
@@ -44,7 +44,7 @@ TEST(reorder_inputs, propagation) {
 
     build_options build_opts;
     build_opts.set_option(build_option::optimize_data(true));
-    auto prog = program(engine, topology, build_opts);
+    auto prog = program::build_program(engine, topology, build_opts);
 
     auto prog_impl = prog.get();
 
@@ -90,8 +90,7 @@ TEST(reorder_inputs, impl_forcing_basic_format) {
     network.set_input_data("input", input);
     network.execute();
 
-    auto network_impl = network.get();
-    const auto& prog = network_impl->get_program();
+    const auto& prog = network.get_program();
     auto& pool_node = prog->get_node("pool");
     auto pool_layout = pool_node.get_output_layout();
 
@@ -145,8 +144,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) {
     network.set_input_data("input", input);
     network.execute();
 
-    auto network_impl = network.get();
-    auto prog = network_impl->get_program();
+    auto prog = network.get_program();
     auto& node = prog->get_node("actv");
     auto actv_layout = node.get_output_layout();
     auto kernel_name = node.get_selected_impl()->get_kernel_name();
@@ -199,8 +197,8 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) {
 //        network.execute();
 //
 //
-//        auto network_impl = api_cast(network.get());
-//        auto& prog = network_impl->get_program();
+//        auto network = api_cast(network.get());
+//        auto& prog = network->get_program();
 //        auto& conv_node = prog.get_node("conv");
 //        auto conv_sel_impl = conv_node.get_selected_impl();
 //        auto conv_layout = conv_node.get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/tests/module_tests/usm_memory_test.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/usm_memory_test.cpp
index 7da7874929f..dd414e9a0c8 100644
--- a/inference-engine/thirdparty/clDNN/tests/module_tests/usm_memory_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/module_tests/usm_memory_test.cpp
@@ -7,21 +7,21 @@
 #include "cldnn/runtime/engine.hpp"
 #include "cldnn/runtime/memory.hpp"
 #include "cldnn/runtime/device_query.hpp"
+#include "cldnn/graph/topology.hpp"
 #include "runtime/ocl/ocl_stream.hpp"
 #include "runtime/ocl/ocl_memory.hpp"
 #include "runtime/ocl/ocl_common.hpp"
 #include "runtime/ocl/ocl_base_event.hpp"
 
-#include "program_impl.h"
-#include "topology_impl.h"
+#include "cldnn/graph/program.hpp"
 #include "data_inst.h"
 #include "activation_inst.h"
 #include "convolution_inst.h"
 #include "crop_inst.h"
-#include "network_impl.h"
+#include "cldnn/graph/network.hpp"
 #include "reshape_inst.h"
 #include "pass_manager.h"
-#include "program_impl_wrapper.h"
+#include "program_wrapper.h"
 
 #include <memory>
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp
index 1760a85fdf6..742c3061d39 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp
@@ -1438,7 +1438,7 @@ TEST(activation_f32_fw_gpu, b_fs_yx_fsv16_prelu) {
         cldnn::reorder("out", "actv", cldnn::format::bfyx, cldnn::data_types::f32)
     );
 
-    auto net = cldnn::network(eng, topo);
+    cldnn::network net(eng, topo);
     set_values(in_mem, flatten_4d(format::bfyx, in_data));
     net.set_input_data("in", in_mem);
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/cache_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/cache_test.cpp
index e43fc33078f..98c2f24d3ac 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/cache_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/cache_test.cpp
@@ -217,7 +217,7 @@ public:
             cldnn::build_option::tuning_config(tune_conf),
             cldnn::build_option::optimize_data(true)
         );
-        auto network = cldnn::network(_engine, topology, build_opts);
+        cldnn::network network(_engine, topology, build_opts);
         auto in_mem = _engine.allocate_memory(cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 16, 3, 3 }));
         network.set_input_data("input", in_mem);
         network.execute();
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
index d2b37886fc9..11e79771a94 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
@@ -7687,9 +7687,9 @@ public:
             build_option::optimize_data(true),
             build_option::force_implementations({ {"conv", {input_format(), ""}} })
         );
-        auto prog = program(engine, topo, build_opts);
+        auto prog = program::build_program(engine, topo, build_opts);
 
-        auto net = network(prog, 0);
+        cldnn::network net(prog, 0);
 
         auto input_lay = layout(input_type(), format::bfyx, input_size(), padding_size());
         auto input_mem = engine.allocate_memory(input_lay);
@@ -8047,9 +8047,9 @@ public:
             build_option::optimize_data(true),
             build_option::force_implementations({ {"conv", { this->input_format(), ""}} })
         );
-        auto prog = program(engine, topo, build_opts);
+        auto prog = program::build_program(engine, topo, build_opts);
 
-        auto net = network(prog, 0);
+        cldnn::network net(prog, 0);
 
         auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4,  this->input_size(), this->padding_size());
         auto input_mem = engine.allocate_memory(input_lay);
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
index 70ff25a3210..3d7a7440250 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
@@ -761,7 +761,7 @@ TEST(crop_gpu, basic_in1x4x1x1_split) {
     set_values(input, input_vec);
     build_options bo;
     bo.set_option(build_option::optimize_data(true));
-    bo.set_option(build_option::outputs(topology.get_primitive_ids()));
+    bo.set_option(build_option::outputs(topology.get_primitives_ids()));
 
     network network(engine, topology, bo);
     network.set_input_data("input", input);
@@ -877,7 +877,7 @@ TEST(crop_gpu, basic_i32_in1x4x1x1_split) {
     set_values(input, input_vec);
     build_options bo;
     bo.set_option(build_option::optimize_data(true));
-    bo.set_option(build_option::outputs(topology.get_primitive_ids()));
+    bo.set_option(build_option::outputs(topology.get_primitives_ids()));
 
     network network(engine, topology, bo);
     network.set_input_data("input", input);
@@ -952,7 +952,7 @@ TEST(crop_gpu, basic_i64_in1x4x1x1_split) {
     set_values(input, input_vec);
     build_options bo;
     bo.set_option(build_option::optimize_data(true));
-    bo.set_option(build_option::outputs(topology.get_primitive_ids()));
+    bo.set_option(build_option::outputs(topology.get_primitives_ids()));
 
     network network(engine, topology, bo);
     network.set_input_data("input", input);
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp
index f64f876744b..7eac6c8a4a5 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp
@@ -2558,7 +2558,7 @@ public:
             build_opts.set_option(cldnn::build_option::force_implementations({ { "deconv", params.deconv_desc } }));
         }
 
-        auto net = cldnn::network(eng, topo, build_opts);
+        cldnn::network net(eng, topo, build_opts);
         net.set_input_data("input", in_mem);
 
         auto result = net.execute();
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp
index d5d829b04c7..2942268c839 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp
@@ -1013,7 +1013,7 @@ TEST(depth_concatenate_f32_gpu, basic_bfwzyx_along_w) {
 //////////////////////////////////////////////////////////////////////////////
 
 //TODO: this should be done using TEST_P or some equivallent construct
-static network setup_depth_concatatenate_network(const std::vector<data_types> dts, const std::vector<tensor> ts, const std::vector<cldnn::format> fmt) {
+static network::ptr setup_depth_concatatenate_network(const std::vector<data_types> dts, const std::vector<tensor> ts, const std::vector<cldnn::format> fmt) {
     assert(dts.size() == ts.size());
     const size_t sz = ts.size();
 
@@ -1034,7 +1034,7 @@ static network setup_depth_concatatenate_network(const std::vector<data_types> d
     //TODO: ask Uzi if something tests cases where there's missing input_names (nodes not present in the topology, etc.)
     topology.add(concatenation("depth_concat_node", input_names, concatenation::along_f));
 
-    return network(engine, topology);
+    return network::build_network(engine, topology);
 }
 
 TEST(NegativeDepthConcatenateTest, DISABLED_TestAll) {
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
index 2fe220ec04f..6fcd3aecb55 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
@@ -3279,7 +3279,7 @@ struct eltwise_same_input_test : testing::TestWithParam<eltwise_same_input_test_
         auto build_ops = build_options();
         build_ops.set_option(build_option::outputs({"eltwise"}));
 
-        auto net = network(engine, topo, build_ops);
+        cldnn::network net(engine, topo, build_ops);
         net.set_input_data("input1", input);
         net.set_input_data("input2", input);
 
@@ -3870,7 +3870,7 @@ struct eltwise_random_test : testing::TestWithParam<eltwise_random_test_params>
         build_ops.set_option(build_option::outputs({"eltwise"}));
         build_ops.set_option(build_option::force_implementations({ {"eltwise", {params.in_format, "generic_eltwise_ref"}} }));
 
-        auto net = network(engine, topo, build_ops);
+        cldnn::network net(engine, topo, build_ops);
         net.set_input_data("input1", input1);
         net.set_input_data("input2", input2);
 
@@ -3886,7 +3886,7 @@ struct eltwise_random_test : testing::TestWithParam<eltwise_random_test_params>
         auto buildops_opt = build_options();
         buildops_opt.set_option(build_option::outputs({"eltwise_opt"}));
 
-        auto net_opt = network(engine, topo_opt, buildops_opt);
+        cldnn::network net_opt(engine, topo_opt, buildops_opt);
         net_opt.set_input_data("input1", input1);
         net_opt.set_input_data("input2", input2);
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
index 0fea8be6648..00fea3995cc 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp
@@ -18,6 +18,7 @@
 #include <cldnn/primitives/permute.hpp>
 #include <cldnn/primitives/gather.hpp>
 #include <cldnn/primitives/gather_nd.hpp>
+#include <cldnn/primitives/gather_elements.hpp>
 #include <cldnn/primitives/scatter_update.hpp>
 #include <cldnn/primitives/scatter_nd_update.hpp>
 #include <cldnn/primitives/scatter_elements_update.hpp>
@@ -7203,6 +7204,7 @@ struct eltwise_test_params {
 #define CASE_ELTWISE_FP32_5         {1,  5, 4, 4}, data_types::f32, data_types::f32, format::b_fs_yx_fsv4,  data_types::f32,  format::b_fs_yx_fsv4,    eltwise_mode::sum
 #define CASE_ELTWISE_FP32_6         {2, 32, 4, 8}, data_types::f32, data_types::f32, format::b_fs_yx_fsv4,  data_types::f32,  format::b_fs_yx_fsv4,    eltwise_mode::sum
 #define CASE_ELTWISE_FP16_5         {2, 32, 4, 8}, data_types::f16, data_types::f16, format::b_fs_yx_fsv4,  data_types::f16,  format::b_fs_yx_fsv4,    eltwise_mode::sum
+#define CASE_ELTWISE_FP16_6         {1, 32, 4, 8}, data_types::f16, data_types::f16, format::byxf,          data_types::f16,  format::byxf,            eltwise_mode::sum
 #define CASE_ELTWISE_I8_4           {2, 16, 4, 4}, data_types::i8,  data_types::i8,  format::b_fs_yx_fsv4,  data_types::f32,  format::b_fs_yx_fsv4,    eltwise_mode::sum
 #define CASE_ELTWISE_U8_4           {2, 16, 4, 4}, data_types::u8,  data_types::u8,  format::b_fs_yx_fsv4,  data_types::f32,  format::b_fs_yx_fsv4,    eltwise_mode::sum
 
@@ -7556,6 +7558,30 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu,
                             eltwise_test_params{CASE_ELTWISE_FP32_4, 3, 4},
                         }));
 
+class eltwise_fp16_byxf : public EltwiseFusingTest {};
+TEST_P(eltwise_fp16_byxf, add) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+                      input_layout("input2", get_input_layout2(p)),
+                      data("add_data", get_mem(get_per_channel_layout(p), -10, 10)),
+                      eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type),
+                      eltwise("add", {"eltwise", "add_data"}, eltwise_mode::sum),
+                      activation("activation", "add", activation_func::negative),
+                      reorder("out", "activation", p.default_format, data_types::f32));
+
+    implementation_desc eltw_impl = { format::byxf, "generic_eltwise_ref" };
+    bo_fused.set_option(build_option::force_implementations({ {"eltwise", eltw_impl} }));
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu,
+                        eltwise_fp16_byxf,
+                        ::testing::ValuesIn(std::vector<eltwise_test_params>{
+                            eltwise_test_params{CASE_ELTWISE_FP16_6, 3, 5}
+                        }));
+
 /* ----------------------------------------------------------------------------------------------------- */
 /* ---------------------------------------- Scale cases ------------------------------------------------ */
 /* ----------------------------------------------------------------------------------------------------- */
@@ -8413,3 +8439,228 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gather_nd_activation_scale_eltwise,
         gather_nd_test_params{ CASE_GATHER_ND_FP32_6D_3, 2, 5 },
         gather_nd_test_params{ CASE_GATHER_ND_FP32_6D_4, 2, 5 },
 }));
+
+
+
+/* ----------------------------------------------------------------------------------------------------- */
+/* ------------------------------------------ GatherElements cases ------------------------------------- */
+/* ----------------------------------------------------------------------------------------------------- */
+struct gather_elements_test_params {
+    data_types data_type;
+
+    format input_format;
+    tensor input_shape;
+
+    format indices_format;
+    tensor indices_shape;
+
+    format output_format;
+    tensor output_shape;
+
+    cldnn::gather_elements::gather_elements_axis axis;
+
+    data_types default_type;
+    format default_format;
+
+    size_t expected_fused_primitives;
+    size_t expected_not_fused_primitives;
+};
+
+#define CASE_GATHER_ELEMENTS_FP16_4D_1 data_types::f16, format::bfyx, {3, 7, 9, 8}, format::bfyx, {3, 7, 9, 8}, format::bfyx, {3, 7, 9, 8}, cldnn::gather_elements::gather_elements_axis::along_y, data_types::f16, format::bfyx
+#define CASE_GATHER_ELEMENTS_FP16_4D_2 data_types::f16, format::bfyx, {3, 2, 8, 3}, format::bfyx, {2, 2, 8, 3}, format::bfyx, {2, 2, 8, 3}, cldnn::gather_elements::gather_elements_axis::along_b, data_types::f16, format::bfyx
+#define CASE_GATHER_ELEMENTS_FP16_4D_3 data_types::f16, format::bfyx, {1, 3, 2, 9}, format::bfyx, {1, 3, 5, 9}, format::bfyx, {1, 3, 5, 9}, cldnn::gather_elements::gather_elements_axis::along_x, data_types::f16, format::bfyx
+
+#define CASE_GATHER_ELEMENTS_FP16_5D_1 data_types::f16, format::bfzyx, {3, 2, 5, 2, 3}, format::bfzyx, {3, 2, 2, 2, 3}, format::bfzyx, {3, 2, 2, 2, 3}, cldnn::gather_elements::gather_elements_axis::along_x, data_types::f16, format::bfzyx
+#define CASE_GATHER_ELEMENTS_FP16_5D_2 data_types::f16, format::bfzyx, {5, 4, 7, 4, 4}, format::bfzyx, {5, 4, 7, 4, 3}, format::bfzyx, {5, 4, 7, 4, 3}, cldnn::gather_elements::gather_elements_axis::along_z, data_types::f16, format::bfzyx
+
+#define CASE_GATHER_ELEMENTS_FP16_6D_1 data_types::f16, format::bfwzyx, {5, 4, 6, 7, 8, 2}, format::bfwzyx, {5, 2, 6, 7, 8, 2}, format::bfwzyx, {5, 2, 6, 7, 8, 2}, cldnn::gather_elements::gather_elements_axis::along_f, data_types::f16, format::bfwzyx
+#define CASE_GATHER_ELEMENTS_FP16_6D_2 data_types::f16, format::bfwzyx, {2, 1, 2, 3, 2, 1}, format::bfwzyx, {2, 1, 2, 3, 2, 3}, format::bfwzyx, {2, 1, 2, 3, 2, 3}, cldnn::gather_elements::gather_elements_axis::along_w, data_types::f16, format::bfwzyx
+#define CASE_GATHER_ELEMENTS_FP16_6D_3 data_types::f16, format::bfwzyx, {2, 2, 3, 4, 4, 2}, format::bfwzyx, {2, 2, 6, 4, 4, 2}, format::bfwzyx, {2, 2, 6, 4, 4, 2}, cldnn::gather_elements::gather_elements_axis::along_x, data_types::f16, format::bfwzyx
+
+
+#define CASE_GATHER_ELEMENTS_FP32_4D_1 data_types::f32, format::bfyx, {3, 7, 9, 8}, format::bfyx, {3, 7, 9, 8}, format::bfyx, {3, 7, 9, 8}, cldnn::gather_elements::gather_elements_axis::along_y, data_types::f32, format::bfyx
+#define CASE_GATHER_ELEMENTS_FP32_4D_2 data_types::f32, format::bfyx, {3, 2, 8, 3}, format::bfyx, {2, 2, 8, 3}, format::bfyx, {2, 2, 8, 3}, cldnn::gather_elements::gather_elements_axis::along_b, data_types::f32, format::bfyx
+#define CASE_GATHER_ELEMENTS_FP32_4D_3 data_types::f32, format::bfyx, {1, 3, 2, 9}, format::bfyx, {1, 3, 5, 9}, format::bfyx, {1, 3, 5, 9}, cldnn::gather_elements::gather_elements_axis::along_x, data_types::f32, format::bfyx
+
+#define CASE_GATHER_ELEMENTS_FP32_5D_1 data_types::f32, format::bfzyx, {3, 2, 5, 2, 3}, format::bfzyx, {3, 2, 2, 2, 3}, format::bfzyx, {3, 2, 2, 2, 3}, cldnn::gather_elements::gather_elements_axis::along_x, data_types::f32, format::bfzyx
+#define CASE_GATHER_ELEMENTS_FP32_5D_2 data_types::f32, format::bfzyx, {5, 4, 7, 4, 4}, format::bfzyx, {5, 4, 7, 4, 3}, format::bfzyx, {5, 4, 7, 4, 3}, cldnn::gather_elements::gather_elements_axis::along_z, data_types::f32, format::bfzyx
+
+#define CASE_GATHER_ELEMENTS_FP32_6D_1 data_types::f32, format::bfwzyx, {5, 4, 6, 7, 8, 2}, format::bfwzyx, {5, 2, 6, 7, 8, 2}, format::bfwzyx, {5, 2, 6, 7, 8, 2}, cldnn::gather_elements::gather_elements_axis::along_f, data_types::f32, format::bfwzyx
+#define CASE_GATHER_ELEMENTS_FP32_6D_2 data_types::f32, format::bfwzyx, {2, 1, 2, 3, 2, 1}, format::bfwzyx, {2, 1, 2, 3, 2, 3}, format::bfwzyx, {2, 1, 2, 3, 2, 3}, cldnn::gather_elements::gather_elements_axis::along_w, data_types::f32, format::bfwzyx
+#define CASE_GATHER_ELEMENTS_FP32_6D_3 data_types::f32, format::bfwzyx, {2, 2, 3, 4, 4, 2}, format::bfwzyx, {2, 2, 6, 4, 4, 2}, format::bfwzyx, {2, 2, 6, 4, 4, 2}, cldnn::gather_elements::gather_elements_axis::along_x, data_types::f32, format::bfwzyx
+
+class GatherElementsPrimitiveFusingTest : public ::BaseFusingTest<gather_elements_test_params> {
+public:
+    void execute(gather_elements_test_params& p) {
+        auto input_prim = get_mem(get_input_layout(p));
+        network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
+        network network_fused(this->engine, this->topology_fused, bo_fused);
+        network_fused.set_input_data("input", input_prim);
+        network_not_fused.set_input_data("input", input_prim);
+        compare(network_not_fused, network_fused, p);
+    }
+
+    size_t get_axis_dim(gather_elements_test_params& p) {
+        switch (p.axis) {
+            case cldnn::gather_elements::gather_elements_axis::along_x:
+                return p.input_shape.spatial[0];
+            case cldnn::gather_elements::gather_elements_axis::along_y:
+                return p.input_shape.spatial[1];
+            case cldnn::gather_elements::gather_elements_axis::along_z:
+                return p.input_shape.spatial[2];
+            case cldnn::gather_elements::gather_elements_axis::along_w:
+                return p.input_shape.spatial[3];
+            case cldnn::gather_elements::gather_elements_axis::along_f:
+                return p.input_shape.feature[0];
+            case cldnn::gather_elements::gather_elements_axis::along_b:
+                return p.input_shape.batch[0];
+            default:
+                return 1;
+        }
+    }
+
+    layout get_input_layout(gather_elements_test_params& p) {
+        return layout{ p.data_type, p.input_format, p.input_shape };
+    }
+
+    layout get_indices_layout(gather_elements_test_params& p) {
+        return layout{ p.data_type, p.indices_format, p.indices_shape };
+    }
+
+    layout get_output_layout(gather_elements_test_params& p) {
+        return layout{ p.data_type, p.output_format, p.output_shape };
+    }
+
+    layout get_per_channel_layout(gather_elements_test_params& p) {
+        return layout{ p.default_type, p.default_format, tensor{1, p.output_shape.feature[0], 1, 1} };
+    }
+};
+
+class gather_elements_quantize : public GatherElementsPrimitiveFusingTest {};
+TEST_P(gather_elements_quantize, basic) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+        data("gather_elements_indices", get_mem(get_indices_layout(p), 0, static_cast<int>(get_axis_dim(p))-1)),
+        data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
+        data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
+        data("out_lo", get_mem(get_single_element_layout(p), -127)),
+        data("out_hi", get_mem(get_single_element_layout(p), 127)),
+        gather_elements("gather_elements_prim", "input", "gather_elements_indices", p.output_format, p.output_shape, p.axis),
+        quantize("quantize", "gather_elements_prim", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
+        reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32)
+    );
+    tolerance = 1.f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu, gather_elements_quantize,
+    ::testing::ValuesIn(std::vector<gather_elements_test_params>{
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_4D_1, 2, 3 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_4D_2, 2, 3 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_4D_3, 2, 3 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_5D_1, 2, 3 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_5D_2, 2, 3 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_6D_1, 2, 3 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_6D_2, 2, 3 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_6D_3, 2, 3 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_4D_1, 2, 3 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_4D_2, 2, 3 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_4D_3, 2, 3 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_5D_1, 2, 3 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_5D_2, 2, 3 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_6D_1, 2, 3 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_6D_2, 2, 3 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_6D_3, 2, 3 },
+}));
+
+
+class gather_elements_scale_activation : public GatherElementsPrimitiveFusingTest {};
+TEST_P(gather_elements_scale_activation, basic) {
+    auto p = GetParam();
+    create_topologies(input_layout("input", get_input_layout(p)),
+        data("gather_elements_indices", get_mem(get_indices_layout(p), 0, static_cast<int>(get_axis_dim(p))-1)),
+        data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
+        gather_elements("gather_elements_prim", "input", "gather_elements_indices", p.output_format, p.output_shape, p.axis),
+        activation("activation", "gather_elements_prim", activation_func::abs),
+        scale("scale", "activation", "scale_data"),
+        reorder("reorder_bfyx", "scale", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu, gather_elements_scale_activation,
+    ::testing::ValuesIn(std::vector<gather_elements_test_params>{
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_4D_1, 2, 4 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_4D_2, 2, 4 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_4D_3, 2, 4 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_5D_1, 2, 4 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_5D_2, 2, 4 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_6D_1, 2, 4 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_6D_2, 2, 4 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_6D_3, 2, 4 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_4D_1, 2, 4 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_4D_2, 2, 4 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_4D_3, 2, 4 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_5D_1, 2, 4 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_5D_2, 2, 4 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_6D_1, 2, 4 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_6D_2, 2, 4 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_6D_3, 2, 4 },
+}));
+
+
+class gather_elements_activation_scale_eltwise : public GatherElementsPrimitiveFusingTest {};
+TEST_P(gather_elements_activation_scale_eltwise, basic) {
+    auto p = GetParam();
+
+    create_topologies(input_layout("input", get_input_layout(p)),
+        data("gather_elements_indices", get_mem(get_indices_layout(p), 0, static_cast<int>(get_axis_dim(p))-1)),
+        data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / 255)),
+        data("eltwise_data", get_mem(get_output_layout(p))),
+        gather_elements("gather_elements_prim", "input", "gather_elements_indices", p.output_format, p.output_shape, p.axis),
+        activation("activation", "gather_elements_prim", activation_func::abs),
+        scale("scale", "activation", "scale_data"),
+        eltwise("eltwise", { "scale", "eltwise_data" }, eltwise_mode::sum, p.data_type),
+        reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
+    );
+
+    tolerance = 1e-5f;
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu, gather_elements_activation_scale_eltwise,
+    ::testing::ValuesIn(std::vector<gather_elements_test_params>{
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_4D_1, 2, 5 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_4D_2, 2, 5 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_4D_3, 2, 5 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_5D_1, 2, 5 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_5D_2, 2, 5 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_6D_1, 2, 5 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_6D_2, 2, 5 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP16_6D_3, 2, 5 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_4D_1, 2, 5 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_4D_2, 2, 5 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_4D_3, 2, 5 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_5D_1, 2, 5 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_5D_2, 2, 5 },
+
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_6D_1, 2, 5 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_6D_2, 2, 5 },
+        gather_elements_test_params{ CASE_GATHER_ELEMENTS_FP32_6D_3, 2, 5 },
+}));
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/gather_elements_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/gather_elements_gpu_test.cpp
new file mode 100644
index 00000000000..034f9f6699a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/gather_elements_gpu_test.cpp
@@ -0,0 +1,1141 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+
+#include <cldnn/primitives/input_layout.hpp>
+#include <cldnn/primitives/gather_elements.hpp>
+#include <cldnn/runtime/memory.hpp>
+#include <cldnn/graph/topology.hpp>
+#include <cldnn/graph/network.hpp>
+
+#include <cstddef>
+#include <gtest/gtest.h>
+
+using namespace cldnn;
+using namespace ::tests;
+
+inline void DoTest(engine& engine,
+    const cldnn::memory::ptr& input0, // data
+    const cldnn::memory::ptr& input1, // indices
+    const std::vector<float>& expected_results,
+    const tensor& output_tensor,
+    const cldnn::gather_elements::gather_elements_axis axis) {
+    topology topology;
+    topology.add(input_layout("InputData", input0->get_layout()));
+    topology.add(input_layout("InputIndices", input1->get_layout()));
+    topology.add(
+        gather_elements("gather_elements", "InputData", "InputIndices", input1->get_layout().format, output_tensor, axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("InputData", input0);
+    network.set_input_data("InputIndices", input1);
+    auto outputs = network.execute();
+    auto output = outputs.at("gather_elements").get_memory();
+    cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(gather_elements_gpu_fp16, d3283_i2283_a0) {
+    auto& engine = get_test_engine();
+
+    auto axis = cldnn::gather_elements::gather_elements_axis::along_b;
+    auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 8, 3 } }); // data
+    auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 8, 3 } }); // indices
+
+    set_values(input0, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(8), FLOAT16(5), FLOAT16(5), FLOAT16(2), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(7), FLOAT16(10), FLOAT16(4), FLOAT16(5), FLOAT16(9), FLOAT16(0), FLOAT16(0), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(4), FLOAT16(0), FLOAT16(4), FLOAT16(7), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(1), FLOAT16(7), FLOAT16(4), FLOAT16(7), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(8), FLOAT16(3), FLOAT16(6), FLOAT16(8), FLOAT16(10), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(10), FLOAT16(7), FLOAT16(8), FLOAT16(7), FLOAT16(0), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(2), FLOAT16(4), FLOAT16(8), FLOAT16(5), FLOAT16(2), FLOAT16(3), FLOAT16(3), FLOAT16(1), 
+        FLOAT16(5), FLOAT16(9), FLOAT16(10), FLOAT16(0), FLOAT16(9), FLOAT16(5), FLOAT16(5), FLOAT16(3), 
+        FLOAT16(10), FLOAT16(5), FLOAT16(2), FLOAT16(0), FLOAT16(10), FLOAT16(0), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(10), FLOAT16(5), FLOAT16(5), FLOAT16(10), FLOAT16(0), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(9), FLOAT16(1), FLOAT16(0), FLOAT16(7), FLOAT16(9), FLOAT16(6), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(9), FLOAT16(2), FLOAT16(3), FLOAT16(3), FLOAT16(5), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(4), FLOAT16(9), FLOAT16(2), FLOAT16(4), FLOAT16(5), FLOAT16(5), FLOAT16(3), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(6), FLOAT16(8), FLOAT16(0), FLOAT16(5), FLOAT16(5), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(6), FLOAT16(9), FLOAT16(6), FLOAT16(9), FLOAT16(1), FLOAT16(2), FLOAT16(7), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(3), FLOAT16(0), FLOAT16(4), FLOAT16(0), FLOAT16(7), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(3), FLOAT16(9), FLOAT16(7), FLOAT16(1), FLOAT16(7), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(6), FLOAT16(9), FLOAT16(6), FLOAT16(10), FLOAT16(6), FLOAT16(1), 
+    });
+
+    set_values(input1, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(2), 
+    });
+
+    std::vector<float> expected_results = {
+        FLOAT16(0), FLOAT16(4), FLOAT16(2), FLOAT16(4), FLOAT16(5), FLOAT16(2), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(1), FLOAT16(10), FLOAT16(4), FLOAT16(5), FLOAT16(9), FLOAT16(0), FLOAT16(5), FLOAT16(3), 
+        FLOAT16(6), FLOAT16(5), FLOAT16(6), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(6), FLOAT16(1), 
+        FLOAT16(3), FLOAT16(5), FLOAT16(5), FLOAT16(4), FLOAT16(4), FLOAT16(7), FLOAT16(8), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(7), FLOAT16(9), FLOAT16(8), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(6), FLOAT16(9), FLOAT16(6), FLOAT16(0), FLOAT16(6), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(9), FLOAT16(2), FLOAT16(4), FLOAT16(5), FLOAT16(2), FLOAT16(3), FLOAT16(7), 
+        FLOAT16(7), FLOAT16(10), FLOAT16(4), FLOAT16(0), FLOAT16(5), FLOAT16(0), FLOAT16(5), FLOAT16(3), 
+        FLOAT16(6), FLOAT16(9), FLOAT16(2), FLOAT16(0), FLOAT16(4), FLOAT16(2), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(9), FLOAT16(3), FLOAT16(0), FLOAT16(4), FLOAT16(10), FLOAT16(7), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(9), FLOAT16(3), FLOAT16(0), FLOAT16(7), FLOAT16(6), FLOAT16(8), FLOAT16(8), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(10), FLOAT16(7), FLOAT16(3), FLOAT16(3), FLOAT16(10), FLOAT16(6), FLOAT16(1), 
+    };
+
+    DoTest(engine, input0, input1, expected_results, tensor(2, 2, 8, 3), axis);
+}
+
+TEST(gather_elements_gpu_fp16, d2235_i2235_a3) {
+    auto& engine = get_test_engine();
+
+    auto axis = cldnn::gather_elements::gather_elements_axis::along_x;
+    auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 3, 5 } }); // data
+    auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 3, 5 } }); // indices
+    set_values(input0, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(5), FLOAT16(7), FLOAT16(0), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(7), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(1), 
+        FLOAT16(7), FLOAT16(4), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(8), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(8), FLOAT16(3), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(10), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(0), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(2), FLOAT16(4), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(2), FLOAT16(3), 
+        FLOAT16(3), FLOAT16(1), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(10), FLOAT16(0), 
+    });
+
+    set_values(input1, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(2),
+    });
+
+    std::vector<float> expected_results = {
+        FLOAT16(0), FLOAT16(1), FLOAT16(8), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(5), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(0), FLOAT16(9), FLOAT16(0), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(6), FLOAT16(7), FLOAT16(10), 
+        FLOAT16(5), FLOAT16(9), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(8), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(8), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(8), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(4), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(9), 
+        FLOAT16(4), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(3), FLOAT16(3), FLOAT16(5), 
+        FLOAT16(5), FLOAT16(3), FLOAT16(3), 
+        FLOAT16(9), FLOAT16(9), FLOAT16(0),
+    };
+
+    DoTest(engine, input0, input1, expected_results, tensor(2, 2, 3, 5), axis);
+}
+
+TEST(gather_elements_gpu_fp16, d1329_i1359_an1) {
+    auto& engine = get_test_engine();
+
+    auto axis = cldnn::gather_elements::gather_elements_axis::along_x;
+    auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 3, 2, 9 } }); // data
+    auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 3, 5, 9 } }); // indices
+    set_values(input0, {
+        FLOAT16(0), FLOAT16(1), 
+        FLOAT16(8), FLOAT16(5), 
+        FLOAT16(5), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(7), 
+        FLOAT16(7), FLOAT16(10), 
+        FLOAT16(4), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(0), 
+        FLOAT16(4), FLOAT16(0), 
+        FLOAT16(4), FLOAT16(7), 
+        FLOAT16(6), FLOAT16(10), 
+        FLOAT16(9), FLOAT16(5), 
+        FLOAT16(1), FLOAT16(7), 
+        FLOAT16(4), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(8), 
+        FLOAT16(2), FLOAT16(0), 
+        FLOAT16(8), FLOAT16(3), 
+        FLOAT16(6), FLOAT16(8), 
+        FLOAT16(10), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(8), 
+        FLOAT16(7), FLOAT16(0), 
+        FLOAT16(6), FLOAT16(9), 
+        FLOAT16(2), FLOAT16(4), 
+        FLOAT16(8), FLOAT16(5), 
+        FLOAT16(2), FLOAT16(3), 
+    });
+
+    set_values(input1, {
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+    });
+
+    std::vector<float> expected_results = {
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(8), FLOAT16(8), FLOAT16(8), FLOAT16(5), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(5), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(0), FLOAT16(7), FLOAT16(0), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(7), FLOAT16(7), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(4), FLOAT16(4), FLOAT16(5), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(9), FLOAT16(9), 
+        FLOAT16(5), FLOAT16(0), FLOAT16(0), FLOAT16(5), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(4), FLOAT16(4), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(4), FLOAT16(7), 
+        FLOAT16(6), FLOAT16(6), FLOAT16(6), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(5), FLOAT16(9), FLOAT16(5), FLOAT16(9), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(1), FLOAT16(7), FLOAT16(1), FLOAT16(7), 
+        FLOAT16(4), FLOAT16(4), FLOAT16(4), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(8), FLOAT16(10), FLOAT16(10), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(8), FLOAT16(8), FLOAT16(3), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(6), FLOAT16(6), FLOAT16(6), FLOAT16(8), FLOAT16(6), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(10), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(10), FLOAT16(2), FLOAT16(2), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(8), FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(0), FLOAT16(7), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(6), FLOAT16(9), FLOAT16(9), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(4), FLOAT16(2), FLOAT16(4), 
+        FLOAT16(5), FLOAT16(8), FLOAT16(8), FLOAT16(5), FLOAT16(8), 
+        FLOAT16(3), FLOAT16(3), FLOAT16(2), FLOAT16(3), FLOAT16(3), 
+    };
+
+    DoTest(engine, input0, input1, expected_results, tensor(1, 3, 5, 9), axis);
+}
+
+TEST(gather_elements_gpu_fp16, d12853_i12923_a3) {
+    auto& engine = get_test_engine();
+
+    auto axis = cldnn::gather_elements::gather_elements_axis::along_y;
+    auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 1, 2, 8, 5, 3 } }); // data
+    auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 1, 2, 8, 2, 3 } }); // indices
+
+    set_values(input0, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(8), FLOAT16(5), FLOAT16(5), FLOAT16(2), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(7), FLOAT16(10), FLOAT16(4), FLOAT16(5), FLOAT16(9), FLOAT16(0), FLOAT16(0), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(4), FLOAT16(0), FLOAT16(4), FLOAT16(7), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(1), FLOAT16(7), FLOAT16(4), FLOAT16(7), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(8), FLOAT16(3), FLOAT16(6), FLOAT16(8), FLOAT16(10), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(10), FLOAT16(7), FLOAT16(8), FLOAT16(7), FLOAT16(0), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(2), FLOAT16(4), FLOAT16(8), FLOAT16(5), FLOAT16(2), FLOAT16(3), FLOAT16(3), FLOAT16(1), 
+        FLOAT16(5), FLOAT16(9), FLOAT16(10), FLOAT16(0), FLOAT16(9), FLOAT16(5), FLOAT16(5), FLOAT16(3), 
+        FLOAT16(10), FLOAT16(5), FLOAT16(2), FLOAT16(0), FLOAT16(10), FLOAT16(0), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(10), FLOAT16(5), FLOAT16(5), FLOAT16(10), FLOAT16(0), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(9), FLOAT16(1), FLOAT16(0), FLOAT16(7), FLOAT16(9), FLOAT16(6), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(9), FLOAT16(2), FLOAT16(3), FLOAT16(3), FLOAT16(5), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(4), FLOAT16(9), FLOAT16(2), FLOAT16(4), FLOAT16(5), FLOAT16(5), FLOAT16(3), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(6), FLOAT16(8), FLOAT16(0), FLOAT16(5), FLOAT16(5), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(6), FLOAT16(9), FLOAT16(6), FLOAT16(9), FLOAT16(1), FLOAT16(2), FLOAT16(7), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(3), FLOAT16(0), FLOAT16(4), FLOAT16(0), FLOAT16(7), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(3), FLOAT16(9), FLOAT16(7), FLOAT16(1), FLOAT16(7), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(6), FLOAT16(9), FLOAT16(6), FLOAT16(10), FLOAT16(6), FLOAT16(1), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(1), FLOAT16(6), FLOAT16(2), FLOAT16(5), FLOAT16(5), FLOAT16(10), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(3), FLOAT16(6), FLOAT16(1), FLOAT16(7), FLOAT16(6), FLOAT16(8), 
+        FLOAT16(2), FLOAT16(5), FLOAT16(4), FLOAT16(2), FLOAT16(0), FLOAT16(9), FLOAT16(4), FLOAT16(1), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(1), FLOAT16(9), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(8), FLOAT16(5), FLOAT16(3), FLOAT16(4), FLOAT16(8), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(2), FLOAT16(7), FLOAT16(9), FLOAT16(2), FLOAT16(9), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(8), FLOAT16(5), FLOAT16(10), FLOAT16(6), FLOAT16(4), FLOAT16(9), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(10), FLOAT16(10), FLOAT16(9), FLOAT16(3), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(1), FLOAT16(4), FLOAT16(6), FLOAT16(9), FLOAT16(4), FLOAT16(8), FLOAT16(9), FLOAT16(7), 
+        FLOAT16(8), FLOAT16(7), FLOAT16(8), FLOAT16(0), FLOAT16(9), FLOAT16(5), FLOAT16(5), FLOAT16(0), 
+        FLOAT16(7), FLOAT16(5), FLOAT16(7), FLOAT16(7), FLOAT16(2), FLOAT16(10), FLOAT16(9), FLOAT16(9), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(4), FLOAT16(10), FLOAT16(2), FLOAT16(4), FLOAT16(3), FLOAT16(5), 
+    });
+
+    set_values(input1, {
+        FLOAT16(0), FLOAT16(2), FLOAT16(4), FLOAT16(3), FLOAT16(4), FLOAT16(0), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(3), FLOAT16(1), FLOAT16(4), FLOAT16(2), FLOAT16(4), FLOAT16(2), FLOAT16(1), FLOAT16(3), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(4), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(3), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(4), FLOAT16(0), 
+        FLOAT16(3), FLOAT16(4), FLOAT16(3), FLOAT16(4), FLOAT16(4), FLOAT16(1), FLOAT16(0), FLOAT16(3), 
+        FLOAT16(2), FLOAT16(4), FLOAT16(4), FLOAT16(4), FLOAT16(4), FLOAT16(0), FLOAT16(4), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(4), FLOAT16(3), FLOAT16(0), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(3), FLOAT16(4), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(3), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(3), FLOAT16(3), FLOAT16(4), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(3), 
+        FLOAT16(3), FLOAT16(4), FLOAT16(3), FLOAT16(3), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(3), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(4), FLOAT16(0), FLOAT16(4), 
+    });
+
+    std::vector<float> expected_results = {
+        FLOAT16(0), FLOAT16(0), FLOAT16(8), FLOAT16(7), FLOAT16(6), FLOAT16(2), FLOAT16(0), FLOAT16(5), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(4), FLOAT16(5), FLOAT16(9), FLOAT16(2), FLOAT16(0), FLOAT16(5), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(5), FLOAT16(0), FLOAT16(10), FLOAT16(5), FLOAT16(3), FLOAT16(4), 
+        FLOAT16(5), FLOAT16(4), FLOAT16(10), FLOAT16(5), FLOAT16(2), FLOAT16(0), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(6), FLOAT16(9), FLOAT16(2), FLOAT16(4), FLOAT16(5), FLOAT16(6), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(1), FLOAT16(9), FLOAT16(8), FLOAT16(9), FLOAT16(1), FLOAT16(5), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(2), FLOAT16(3), FLOAT16(6), FLOAT16(1), FLOAT16(7), FLOAT16(6), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(3), FLOAT16(0), FLOAT16(6), FLOAT16(2), FLOAT16(7), FLOAT16(6), FLOAT16(1), 
+        FLOAT16(7), FLOAT16(8), FLOAT16(8), FLOAT16(5), FLOAT16(0), FLOAT16(9), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(7), FLOAT16(5), FLOAT16(3), FLOAT16(9), FLOAT16(4), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(1), FLOAT16(7), FLOAT16(7), FLOAT16(4), FLOAT16(8), FLOAT16(5), FLOAT16(9), 
+        FLOAT16(1), FLOAT16(7), FLOAT16(10), FLOAT16(0), FLOAT16(9), FLOAT16(4), FLOAT16(5), FLOAT16(5), 
+    };
+
+    DoTest(engine, input0, input1, expected_results, tensor(1, 2, 8, 2, 3), axis);
+}
+
+TEST(gather_elements_gpu_fp16, d25441_i22441_an4) {
+    auto& engine = get_test_engine();
+
+    auto axis = cldnn::gather_elements::gather_elements_axis::along_f;
+    auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 5, 4, 4, 1 } }); // data
+    auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 2, 4, 4, 1 } }); // indices
+
+    set_values(input0, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(8), FLOAT16(5), 
+        FLOAT16(5), FLOAT16(2), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(7), FLOAT16(10), FLOAT16(4), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(0), FLOAT16(0), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(4), FLOAT16(0), 
+        FLOAT16(4), FLOAT16(7), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(1), FLOAT16(7), 
+        FLOAT16(4), FLOAT16(7), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(8), FLOAT16(3), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(10), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(10), FLOAT16(7), FLOAT16(8), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(2), FLOAT16(4), FLOAT16(8), FLOAT16(5), 
+        FLOAT16(2), FLOAT16(3), FLOAT16(3), FLOAT16(1), 
+        FLOAT16(5), FLOAT16(9), FLOAT16(10), FLOAT16(0), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(5), FLOAT16(3), 
+        FLOAT16(10), FLOAT16(5), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(10), FLOAT16(0), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(10), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(10), FLOAT16(0), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(9), FLOAT16(1), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(9), FLOAT16(6), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(9), FLOAT16(2), FLOAT16(3), 
+        FLOAT16(3), FLOAT16(5), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(4), FLOAT16(9), FLOAT16(2), FLOAT16(4), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(3), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(6), FLOAT16(8), FLOAT16(0), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(6), FLOAT16(9), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(7), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(3), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(0), FLOAT16(7), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(3), FLOAT16(9), FLOAT16(7), 
+        FLOAT16(1), FLOAT16(7), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(6), FLOAT16(10), FLOAT16(6), FLOAT16(1), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(1), FLOAT16(6), 
+        FLOAT16(2), FLOAT16(5), FLOAT16(5), FLOAT16(10), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(3), FLOAT16(6), 
+        FLOAT16(1), FLOAT16(7), FLOAT16(6), FLOAT16(8), 
+
+    });
+
+    set_values(input1, {
+        FLOAT16(0), FLOAT16(2), FLOAT16(4), FLOAT16(3), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(3), FLOAT16(1), FLOAT16(4), FLOAT16(2), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(1), FLOAT16(3), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(4), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(3), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(4), FLOAT16(0), 
+        FLOAT16(3), FLOAT16(4), FLOAT16(3), FLOAT16(4), 
+        FLOAT16(4), FLOAT16(1), FLOAT16(0), FLOAT16(3), 
+        FLOAT16(2), FLOAT16(4), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(4), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(0), FLOAT16(2), FLOAT16(4), 
+    });
+
+    std::vector<float> expected_results = {
+        FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(5), 
+        FLOAT16(10), FLOAT16(2), FLOAT16(0), FLOAT16(10), 
+        FLOAT16(3), FLOAT16(10), FLOAT16(1), FLOAT16(5), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(3), 
+        FLOAT16(10), FLOAT16(8), FLOAT16(6), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(5), FLOAT16(7), FLOAT16(5), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(6), FLOAT16(3), 
+        FLOAT16(10), FLOAT16(9), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(1), FLOAT16(6), FLOAT16(5), FLOAT16(7), 
+        FLOAT16(5), FLOAT16(2), FLOAT16(6), FLOAT16(6), 
+        FLOAT16(1), FLOAT16(5), FLOAT16(6), FLOAT16(1), 
+        FLOAT16(6), FLOAT16(4), FLOAT16(1), FLOAT16(6), 
+        FLOAT16(2), FLOAT16(6), FLOAT16(5), FLOAT16(7), 
+        FLOAT16(1), FLOAT16(9), FLOAT16(2), FLOAT16(6), 
+        FLOAT16(6), FLOAT16(5), FLOAT16(10), FLOAT16(8), 
+    };
+
+    DoTest(engine, input0, input1, expected_results, tensor(2, 2, 4, 4, 1), axis);
+}
+
+TEST(gather_elements_gpu_fp16, d32843_i12843_a0) {
+    auto& engine = get_test_engine();
+
+    auto axis = cldnn::gather_elements::gather_elements_axis::along_b;
+    auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 8, 4, 3 } }); // data
+    auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 1, 2, 8, 4, 3 } }); // indices
+
+    set_values(input0, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(8), FLOAT16(5), FLOAT16(5), FLOAT16(2), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(7), FLOAT16(10), FLOAT16(4), FLOAT16(5), FLOAT16(9), FLOAT16(0), FLOAT16(0), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(4), FLOAT16(0), FLOAT16(4), FLOAT16(7), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(1), FLOAT16(7), FLOAT16(4), FLOAT16(7), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(8), FLOAT16(3), FLOAT16(6), FLOAT16(8), FLOAT16(10), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(10), FLOAT16(7), FLOAT16(8), FLOAT16(7), FLOAT16(0), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(2), FLOAT16(4), FLOAT16(8), FLOAT16(5), FLOAT16(2), FLOAT16(3), FLOAT16(3), FLOAT16(1), 
+        FLOAT16(5), FLOAT16(9), FLOAT16(10), FLOAT16(0), FLOAT16(9), FLOAT16(5), FLOAT16(5), FLOAT16(3), 
+        FLOAT16(10), FLOAT16(5), FLOAT16(2), FLOAT16(0), FLOAT16(10), FLOAT16(0), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(10), FLOAT16(5), FLOAT16(5), FLOAT16(10), FLOAT16(0), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(9), FLOAT16(1), FLOAT16(0), FLOAT16(7), FLOAT16(9), FLOAT16(6), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(9), FLOAT16(2), FLOAT16(3), FLOAT16(3), FLOAT16(5), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(4), FLOAT16(9), FLOAT16(2), FLOAT16(4), FLOAT16(5), FLOAT16(5), FLOAT16(3), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(6), FLOAT16(8), FLOAT16(0), FLOAT16(5), FLOAT16(5), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(6), FLOAT16(9), FLOAT16(6), FLOAT16(9), FLOAT16(1), FLOAT16(2), FLOAT16(7), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(3), FLOAT16(0), FLOAT16(4), FLOAT16(0), FLOAT16(7), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(3), FLOAT16(9), FLOAT16(7), FLOAT16(1), FLOAT16(7), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(6), FLOAT16(9), FLOAT16(6), FLOAT16(10), FLOAT16(6), FLOAT16(1), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(1), FLOAT16(6), FLOAT16(2), FLOAT16(5), FLOAT16(5), FLOAT16(10), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(3), FLOAT16(6), FLOAT16(1), FLOAT16(7), FLOAT16(6), FLOAT16(8), 
+        FLOAT16(2), FLOAT16(5), FLOAT16(4), FLOAT16(2), FLOAT16(0), FLOAT16(9), FLOAT16(4), FLOAT16(1), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(1), FLOAT16(9), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(8), FLOAT16(5), FLOAT16(3), FLOAT16(4), FLOAT16(8), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(2), FLOAT16(7), FLOAT16(9), FLOAT16(2), FLOAT16(9), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(8), FLOAT16(5), FLOAT16(10), FLOAT16(6), FLOAT16(4), FLOAT16(9), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(10), FLOAT16(10), FLOAT16(9), FLOAT16(3), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(1), FLOAT16(4), FLOAT16(6), FLOAT16(9), FLOAT16(4), FLOAT16(8), FLOAT16(9), FLOAT16(7), 
+        FLOAT16(8), FLOAT16(7), FLOAT16(8), FLOAT16(0), FLOAT16(9), FLOAT16(5), FLOAT16(5), FLOAT16(0), 
+        FLOAT16(7), FLOAT16(5), FLOAT16(7), FLOAT16(7), FLOAT16(2), FLOAT16(10), FLOAT16(9), FLOAT16(9), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(4), FLOAT16(10), FLOAT16(2), FLOAT16(4), FLOAT16(3), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(4), FLOAT16(5), FLOAT16(8), FLOAT16(4), FLOAT16(2), FLOAT16(10), FLOAT16(1), 
+        FLOAT16(6), FLOAT16(6), FLOAT16(0), FLOAT16(0), FLOAT16(8), FLOAT16(8), FLOAT16(3), FLOAT16(4), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(2), FLOAT16(9), FLOAT16(7), FLOAT16(9), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(8), FLOAT16(6), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(4), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(7), FLOAT16(3), FLOAT16(8), FLOAT16(8), FLOAT16(4), FLOAT16(3), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(10), FLOAT16(2), FLOAT16(9), FLOAT16(1), FLOAT16(4), 
+        FLOAT16(6), FLOAT16(1), FLOAT16(9), FLOAT16(1), FLOAT16(10), FLOAT16(2), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(6), FLOAT16(7), FLOAT16(8), FLOAT16(7), FLOAT16(8), FLOAT16(7), FLOAT16(6), 
+        FLOAT16(0), FLOAT16(6), FLOAT16(2), FLOAT16(3), FLOAT16(7), FLOAT16(1), FLOAT16(8), FLOAT16(5), 
+        FLOAT16(6), FLOAT16(6), FLOAT16(3), FLOAT16(7), FLOAT16(1), FLOAT16(1), FLOAT16(5), FLOAT16(9), 
+        FLOAT16(8), FLOAT16(6), FLOAT16(8), FLOAT16(3), FLOAT16(1), FLOAT16(5), FLOAT16(3), FLOAT16(6), 
+        FLOAT16(5), FLOAT16(4), FLOAT16(2), FLOAT16(4), FLOAT16(4), FLOAT16(4), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(0), FLOAT16(4), FLOAT16(2), FLOAT16(7), FLOAT16(7), FLOAT16(5), FLOAT16(8), 
+        FLOAT16(7), FLOAT16(10), FLOAT16(5), FLOAT16(10), FLOAT16(3), FLOAT16(5), FLOAT16(5), FLOAT16(7), 
+        FLOAT16(4), FLOAT16(6), FLOAT16(10), FLOAT16(1), FLOAT16(7), FLOAT16(3), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(0), FLOAT16(3), FLOAT16(7), FLOAT16(6), FLOAT16(10), FLOAT16(2), FLOAT16(10), 
+        FLOAT16(2), FLOAT16(9), FLOAT16(7), FLOAT16(5), FLOAT16(8), FLOAT16(0), FLOAT16(1), FLOAT16(7), 
+        FLOAT16(7), FLOAT16(4), FLOAT16(6), FLOAT16(8), FLOAT16(10), FLOAT16(7), FLOAT16(3), FLOAT16(8), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(5), FLOAT16(0), FLOAT16(1), FLOAT16(9), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(6), FLOAT16(5), FLOAT16(0), FLOAT16(5), FLOAT16(4), FLOAT16(2), 
+        FLOAT16(4), FLOAT16(6), FLOAT16(7), FLOAT16(7), FLOAT16(5), FLOAT16(3), FLOAT16(8), FLOAT16(4), 
+        FLOAT16(7), FLOAT16(3), FLOAT16(0), FLOAT16(1), FLOAT16(5), FLOAT16(8), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(7), FLOAT16(3), FLOAT16(0), FLOAT16(5), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(4), FLOAT16(1), FLOAT16(3), FLOAT16(9), FLOAT16(7), FLOAT16(6), FLOAT16(7), FLOAT16(3), 
+        FLOAT16(0), FLOAT16(10), FLOAT16(5), FLOAT16(0), FLOAT16(9), FLOAT16(0), FLOAT16(4), FLOAT16(5), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(7), FLOAT16(5), FLOAT16(0), FLOAT16(1), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(3), FLOAT16(6), FLOAT16(6), FLOAT16(1), FLOAT16(6), FLOAT16(10), FLOAT16(3), FLOAT16(9), 
+        FLOAT16(10), FLOAT16(2), FLOAT16(2), FLOAT16(4), FLOAT16(8), FLOAT16(9), FLOAT16(2), FLOAT16(8), 
+        FLOAT16(7), FLOAT16(4), FLOAT16(2), FLOAT16(7), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(6), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(6), FLOAT16(4), FLOAT16(0), FLOAT16(7), FLOAT16(4), FLOAT16(9), 
+        FLOAT16(1), FLOAT16(10), FLOAT16(0), FLOAT16(0), FLOAT16(5), FLOAT16(8), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(3), FLOAT16(8), FLOAT16(5), FLOAT16(8), FLOAT16(7), FLOAT16(7), FLOAT16(8), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(6), FLOAT16(7), FLOAT16(6), FLOAT16(4), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(7), FLOAT16(1), FLOAT16(8), FLOAT16(1), FLOAT16(0), FLOAT16(7), FLOAT16(1), FLOAT16(10), 
+        FLOAT16(5), FLOAT16(6), FLOAT16(10), FLOAT16(0), FLOAT16(6), FLOAT16(7), FLOAT16(5), FLOAT16(0), 
+        FLOAT16(4), FLOAT16(5), FLOAT16(8), FLOAT16(0), FLOAT16(4), FLOAT16(10), FLOAT16(5), FLOAT16(3), 
+        FLOAT16(4), FLOAT16(8), FLOAT16(2), FLOAT16(1), FLOAT16(4), FLOAT16(10), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(5), FLOAT16(1), FLOAT16(5), FLOAT16(1), FLOAT16(9), FLOAT16(4), 
+        FLOAT16(4), FLOAT16(3), FLOAT16(7), FLOAT16(6), FLOAT16(9), FLOAT16(8), FLOAT16(9), FLOAT16(7), 
+        FLOAT16(4), FLOAT16(10), FLOAT16(6), FLOAT16(3), FLOAT16(5), FLOAT16(5), FLOAT16(4), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(4), FLOAT16(5), FLOAT16(3), FLOAT16(1), FLOAT16(2), FLOAT16(8), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(9), FLOAT16(2), FLOAT16(7), FLOAT16(2), FLOAT16(4), FLOAT16(0), FLOAT16(5), 
+
+    });
+
+    set_values(input1, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(2), 
+    });
+
+    std::vector<float> expected_results = {
+        FLOAT16(0), FLOAT16(8), FLOAT16(5), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(4), FLOAT16(10), FLOAT16(4), FLOAT16(5), FLOAT16(9), FLOAT16(0), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(4), FLOAT16(4), FLOAT16(7), FLOAT16(9), FLOAT16(5), FLOAT16(8), FLOAT16(6), FLOAT16(4), 
+        FLOAT16(8), FLOAT16(5), FLOAT16(8), FLOAT16(1), FLOAT16(4), FLOAT16(7), FLOAT16(5), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(5), FLOAT16(7), FLOAT16(7), FLOAT16(2), FLOAT16(8), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(4), FLOAT16(1), FLOAT16(3), FLOAT16(9), FLOAT16(7), FLOAT16(0), FLOAT16(6), FLOAT16(3), 
+        FLOAT16(9), FLOAT16(10), FLOAT16(5), FLOAT16(0), FLOAT16(9), FLOAT16(3), FLOAT16(4), FLOAT16(1), 
+        FLOAT16(5), FLOAT16(9), FLOAT16(10), FLOAT16(5), FLOAT16(0), FLOAT16(5), FLOAT16(3), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(6), FLOAT16(2), FLOAT16(9), FLOAT16(10), FLOAT16(10), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(2), FLOAT16(2), FLOAT16(4), FLOAT16(0), FLOAT16(0), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(4), FLOAT16(4), FLOAT16(7), FLOAT16(7), FLOAT16(9), FLOAT16(6), FLOAT16(4), FLOAT16(6), 
+        FLOAT16(10), FLOAT16(9), FLOAT16(2), FLOAT16(10), FLOAT16(2), FLOAT16(7), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(1), FLOAT16(9), FLOAT16(2), FLOAT16(4), FLOAT16(10), FLOAT16(5), FLOAT16(3), FLOAT16(2), 
+        FLOAT16(3), FLOAT16(6), FLOAT16(5), FLOAT16(0), FLOAT16(5), FLOAT16(8), FLOAT16(7), FLOAT16(8), 
+        FLOAT16(0), FLOAT16(6), FLOAT16(2), FLOAT16(9), FLOAT16(6), FLOAT16(1), FLOAT16(7), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(3), FLOAT16(3), FLOAT16(7), FLOAT16(0), FLOAT16(7), FLOAT16(5), FLOAT16(9), 
+        FLOAT16(8), FLOAT16(3), FLOAT16(10), FLOAT16(3), FLOAT16(1), FLOAT16(5), FLOAT16(4), FLOAT16(6), 
+        FLOAT16(4), FLOAT16(5), FLOAT16(6), FLOAT16(4), FLOAT16(4), FLOAT16(10), FLOAT16(5), FLOAT16(1), 
+        FLOAT16(3), FLOAT16(4), FLOAT16(2), FLOAT16(1), FLOAT16(7), FLOAT16(7), FLOAT16(5), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(1), FLOAT16(5), FLOAT16(10), FLOAT16(3), FLOAT16(1), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(3), FLOAT16(7), FLOAT16(1), FLOAT16(7), FLOAT16(8), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(4), FLOAT16(4), FLOAT16(3), FLOAT16(3), FLOAT16(5), FLOAT16(10), FLOAT16(4), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(9), FLOAT16(7), FLOAT16(5), FLOAT16(3), FLOAT16(4), FLOAT16(8), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(4), FLOAT16(6), FLOAT16(8), FLOAT16(2), FLOAT16(7), FLOAT16(3), FLOAT16(5), 
+    };
+
+    DoTest(engine, input0, input1, expected_results, tensor(1, 2, 8, 4, 3), axis);
+}
+
+TEST(gather_elements_gpu_fp16, d223442_i226442_a5) {
+    auto& engine = get_test_engine();
+
+    auto axis = cldnn::gather_elements::gather_elements_axis::along_x;
+    auto input0 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 3, 4, 4, 2 } }); // data
+    auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 6, 4, 4, 2 } }); // indices
+
+    set_values(input0, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(5), FLOAT16(7), FLOAT16(0), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(7), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(1), 
+        FLOAT16(7), FLOAT16(4), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(8), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(8), FLOAT16(3), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(10), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(0), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(2), FLOAT16(4), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(2), FLOAT16(3), 
+        FLOAT16(3), FLOAT16(1), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(10), FLOAT16(0), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(3), FLOAT16(10), FLOAT16(5), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(10), 
+        FLOAT16(0), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(10), FLOAT16(5), 
+        FLOAT16(5), FLOAT16(10), FLOAT16(0), 
+        FLOAT16(8), FLOAT16(8), FLOAT16(9), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(9), FLOAT16(6), FLOAT16(8), 
+        FLOAT16(7), FLOAT16(10), FLOAT16(9), 
+        FLOAT16(2), FLOAT16(3), FLOAT16(3), 
+        FLOAT16(5), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(4), FLOAT16(9), FLOAT16(2), 
+        FLOAT16(4), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(3), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(0), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(10), 
+        FLOAT16(8), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(6), FLOAT16(9), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(7), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(3), FLOAT16(0), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(3), FLOAT16(9), FLOAT16(7), 
+        FLOAT16(1), FLOAT16(7), FLOAT16(4), 
+        FLOAT16(4), FLOAT16(5), FLOAT16(1), 
+        FLOAT16(6), FLOAT16(9), FLOAT16(6), 
+        FLOAT16(10), FLOAT16(6), FLOAT16(1), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(1), 
+        FLOAT16(6), FLOAT16(2), FLOAT16(5), 
+        FLOAT16(5), FLOAT16(10), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(3), FLOAT16(6), 
+        FLOAT16(1), FLOAT16(7), FLOAT16(6), 
+        FLOAT16(8), FLOAT16(2), FLOAT16(5), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(9), FLOAT16(4), FLOAT16(1), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(1), 
+        FLOAT16(9), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(4), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(8), FLOAT16(5), 
+        FLOAT16(3), FLOAT16(4), FLOAT16(8), 
+        FLOAT16(10), FLOAT16(7), FLOAT16(2), 
+        FLOAT16(7), FLOAT16(9), FLOAT16(2), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(10), FLOAT16(6), 
+        FLOAT16(4), FLOAT16(9), FLOAT16(7), 
+        FLOAT16(7), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(9), FLOAT16(3), FLOAT16(5), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(4), 
+        FLOAT16(6), FLOAT16(9), FLOAT16(4), 
+        FLOAT16(8), FLOAT16(9), FLOAT16(7), 
+        FLOAT16(8), FLOAT16(7), FLOAT16(8), 
+        FLOAT16(0), FLOAT16(9), FLOAT16(5), 
+        FLOAT16(5), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(5), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(2), FLOAT16(10), FLOAT16(9), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(1), 
+        FLOAT16(4), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(4), FLOAT16(3), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(4), FLOAT16(5), 
+        FLOAT16(8), FLOAT16(4), FLOAT16(2), 
+        FLOAT16(10), FLOAT16(1), FLOAT16(6), 
+        FLOAT16(6), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(8), FLOAT16(8), FLOAT16(3), 
+        FLOAT16(4), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(2), FLOAT16(9), FLOAT16(7), 
+        FLOAT16(9), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(8), FLOAT16(6), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(10), FLOAT16(10), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(7), FLOAT16(3), 
+        FLOAT16(8), FLOAT16(8), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(9), FLOAT16(1), FLOAT16(4), 
+        FLOAT16(6), FLOAT16(1), FLOAT16(9), 
+        FLOAT16(1), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(6), FLOAT16(7), FLOAT16(8), 
+        FLOAT16(7), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(6), FLOAT16(0), FLOAT16(6), 
+        FLOAT16(2), FLOAT16(3), FLOAT16(7), 
+        FLOAT16(1), FLOAT16(8), FLOAT16(5), 
+        FLOAT16(6), FLOAT16(6), FLOAT16(3), 
+        FLOAT16(7), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(5), FLOAT16(9), FLOAT16(8), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(3), 
+        FLOAT16(1), FLOAT16(5), FLOAT16(3), 
+        FLOAT16(6), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(4), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(5), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(5), FLOAT16(10), 
+        FLOAT16(3), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(4), FLOAT16(6), 
+        FLOAT16(10), FLOAT16(1), FLOAT16(7), 
+        FLOAT16(3), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(0), FLOAT16(3), 
+        FLOAT16(7), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(2), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(9), FLOAT16(7), FLOAT16(5), 
+        FLOAT16(8), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(4), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(3), FLOAT16(8), 
+    });
+
+    set_values(input1, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+    });
+
+    std::vector<float> expected_results = {
+        FLOAT16(0), FLOAT16(1), FLOAT16(8), FLOAT16(8), FLOAT16(8), FLOAT16(0), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(2), FLOAT16(5), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(5), FLOAT16(4), FLOAT16(5), FLOAT16(4), FLOAT16(10), FLOAT16(5), 
+        FLOAT16(0), FLOAT16(9), FLOAT16(0), FLOAT16(0), FLOAT16(9), FLOAT16(9), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(0), FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(0), FLOAT16(4), FLOAT16(4), FLOAT16(4), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(10), FLOAT16(10), FLOAT16(10), FLOAT16(7), FLOAT16(7), FLOAT16(10), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(9), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(2), FLOAT16(10), FLOAT16(8), FLOAT16(8), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(8), FLOAT16(8), FLOAT16(0), FLOAT16(3), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(6), FLOAT16(10), FLOAT16(10), FLOAT16(10), FLOAT16(8), FLOAT16(6), 
+        FLOAT16(4), FLOAT16(10), FLOAT16(2), FLOAT16(10), FLOAT16(2), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(8), FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(0), FLOAT16(6), FLOAT16(6), FLOAT16(9), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(8), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(4), FLOAT16(2), 
+        FLOAT16(5), FLOAT16(3), FLOAT16(3), FLOAT16(5), FLOAT16(3), FLOAT16(5), 
+        FLOAT16(3), FLOAT16(1), FLOAT16(1), FLOAT16(3), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(10), FLOAT16(9), FLOAT16(0), FLOAT16(10), FLOAT16(9), FLOAT16(0), 
+        FLOAT16(9), FLOAT16(9), FLOAT16(5), FLOAT16(5), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(10), FLOAT16(10), FLOAT16(10), FLOAT16(3), FLOAT16(5), FLOAT16(10), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(2), FLOAT16(0), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(0), FLOAT16(5), FLOAT16(4), FLOAT16(4), FLOAT16(5), FLOAT16(0), 
+        FLOAT16(10), FLOAT16(3), FLOAT16(5), FLOAT16(5), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(10), FLOAT16(5), FLOAT16(10), FLOAT16(0), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(8), FLOAT16(9), FLOAT16(8), FLOAT16(9), FLOAT16(8), FLOAT16(9), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(0), FLOAT16(7), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(8), FLOAT16(9), FLOAT16(6), FLOAT16(8), FLOAT16(8), FLOAT16(6), 
+        FLOAT16(9), FLOAT16(9), FLOAT16(7), FLOAT16(10), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(3), FLOAT16(3), FLOAT16(2), FLOAT16(3), 
+        FLOAT16(6), FLOAT16(6), FLOAT16(9), FLOAT16(6), FLOAT16(6), FLOAT16(5), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(9), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(4), FLOAT16(4), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(3), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(8), FLOAT16(8), FLOAT16(6), FLOAT16(6), FLOAT16(0), 
+        FLOAT16(10), FLOAT16(10), FLOAT16(5), FLOAT16(10), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(9), FLOAT16(9), FLOAT16(8), FLOAT16(9), 
+        FLOAT16(9), FLOAT16(6), FLOAT16(6), FLOAT16(1), FLOAT16(9), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(1), FLOAT16(7), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(3), FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(0), FLOAT16(4), FLOAT16(4), FLOAT16(7), FLOAT16(4), FLOAT16(0), 
+        FLOAT16(10), FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(3), FLOAT16(3), FLOAT16(3), FLOAT16(9), FLOAT16(9), FLOAT16(7), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(1), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(4), FLOAT16(4), FLOAT16(1), FLOAT16(1), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(6), FLOAT16(6), FLOAT16(6), FLOAT16(9), FLOAT16(9), 
+        FLOAT16(6), FLOAT16(10), FLOAT16(6), FLOAT16(10), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(1), FLOAT16(10), FLOAT16(1), FLOAT16(10), FLOAT16(1), FLOAT16(10), 
+        FLOAT16(2), FLOAT16(5), FLOAT16(6), FLOAT16(2), FLOAT16(2), FLOAT16(6), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(5), FLOAT16(1), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(6), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(6), FLOAT16(7), FLOAT16(7), FLOAT16(6), 
+        FLOAT16(8), FLOAT16(5), FLOAT16(5), FLOAT16(8), FLOAT16(8), FLOAT16(2), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(1), FLOAT16(9), FLOAT16(4), FLOAT16(9), FLOAT16(9), FLOAT16(4), 
+        FLOAT16(1), FLOAT16(4), FLOAT16(1), FLOAT16(4), FLOAT16(4), FLOAT16(10), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(9), FLOAT16(1), FLOAT16(9), FLOAT16(1), 
+        FLOAT16(4), FLOAT16(4), FLOAT16(0), FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(8), FLOAT16(1), FLOAT16(1), FLOAT16(1), FLOAT16(5), FLOAT16(8), 
+        FLOAT16(3), FLOAT16(4), FLOAT16(3), FLOAT16(3), FLOAT16(3), FLOAT16(8), 
+        FLOAT16(10), FLOAT16(10), FLOAT16(7), FLOAT16(10), FLOAT16(10), FLOAT16(2), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(2), FLOAT16(9), FLOAT16(9), FLOAT16(9), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(5), FLOAT16(9), FLOAT16(9), FLOAT16(9), 
+        FLOAT16(8), FLOAT16(8), FLOAT16(8), FLOAT16(8), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(6), FLOAT16(6), FLOAT16(5), FLOAT16(10), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(9), FLOAT16(7), FLOAT16(7), FLOAT16(9), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(10), FLOAT16(7), FLOAT16(10), FLOAT16(7), FLOAT16(10), 
+        FLOAT16(5), FLOAT16(3), FLOAT16(9), FLOAT16(3), FLOAT16(9), FLOAT16(3), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(1), FLOAT16(4), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(9), FLOAT16(9), FLOAT16(9), FLOAT16(4), FLOAT16(6), FLOAT16(6), 
+        FLOAT16(9), FLOAT16(8), FLOAT16(8), FLOAT16(8), FLOAT16(7), FLOAT16(9), 
+        FLOAT16(8), FLOAT16(8), FLOAT16(7), FLOAT16(8), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(9), FLOAT16(0), FLOAT16(9), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(5), FLOAT16(7), FLOAT16(7), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(5), FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(2), FLOAT16(9), FLOAT16(2), FLOAT16(9), FLOAT16(9), FLOAT16(10), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(5), FLOAT16(1), FLOAT16(5), FLOAT16(9), 
+        FLOAT16(4), FLOAT16(10), FLOAT16(2), FLOAT16(10), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(5), FLOAT16(3), FLOAT16(4), FLOAT16(3), FLOAT16(4), FLOAT16(5), 
+        FLOAT16(5), FLOAT16(9), FLOAT16(9), FLOAT16(5), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(4), FLOAT16(8), FLOAT16(8), FLOAT16(2), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(10), FLOAT16(10), FLOAT16(10), FLOAT16(1), FLOAT16(10), FLOAT16(6), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(6), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(3), FLOAT16(8), FLOAT16(8), FLOAT16(3), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(4), FLOAT16(7), FLOAT16(4), FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(9), FLOAT16(2), FLOAT16(7), FLOAT16(9), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(9), FLOAT16(0), FLOAT16(9), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(8), FLOAT16(8), FLOAT16(8), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(0), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(10), FLOAT16(10), FLOAT16(10), FLOAT16(10), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(2), FLOAT16(3), FLOAT16(7), FLOAT16(3), 
+        FLOAT16(4), FLOAT16(8), FLOAT16(8), FLOAT16(8), FLOAT16(8), FLOAT16(8), 
+        FLOAT16(3), FLOAT16(0), FLOAT16(3), FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(10), FLOAT16(10), FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(9), FLOAT16(4), FLOAT16(1), FLOAT16(1), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(6), FLOAT16(1), FLOAT16(6), FLOAT16(9), FLOAT16(6), FLOAT16(1), 
+        FLOAT16(10), FLOAT16(2), FLOAT16(1), FLOAT16(10), FLOAT16(1), FLOAT16(10), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(1), FLOAT16(2), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(8), FLOAT16(6), FLOAT16(6), FLOAT16(8), FLOAT16(6), FLOAT16(6), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(6), FLOAT16(6), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(7), FLOAT16(3), FLOAT16(3), FLOAT16(2), FLOAT16(7), FLOAT16(3), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(1), FLOAT16(5), FLOAT16(8), FLOAT16(5), 
+        FLOAT16(6), FLOAT16(6), FLOAT16(6), FLOAT16(6), FLOAT16(6), FLOAT16(6), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(7), FLOAT16(1), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(8), FLOAT16(8), FLOAT16(5), FLOAT16(9), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(8), FLOAT16(6), FLOAT16(6), FLOAT16(6), 
+        FLOAT16(3), FLOAT16(5), FLOAT16(3), FLOAT16(5), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(6), FLOAT16(5), FLOAT16(4), FLOAT16(5), FLOAT16(6), FLOAT16(5), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(4), FLOAT16(4), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(4), FLOAT16(5), FLOAT16(4), FLOAT16(4), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(3), FLOAT16(0), FLOAT16(4), FLOAT16(3), FLOAT16(4), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(2), FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(5), FLOAT16(7), FLOAT16(8), FLOAT16(7), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(10), FLOAT16(5), FLOAT16(10), FLOAT16(10), FLOAT16(10), FLOAT16(5), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(5), FLOAT16(3), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(6), FLOAT16(6), FLOAT16(7), FLOAT16(7), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(1), FLOAT16(7), FLOAT16(1), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(5), FLOAT16(5), FLOAT16(3), FLOAT16(5), 
+        FLOAT16(0), FLOAT16(9), FLOAT16(3), FLOAT16(9), FLOAT16(0), FLOAT16(3), 
+        FLOAT16(6), FLOAT16(6), FLOAT16(6), FLOAT16(10), FLOAT16(10), FLOAT16(6), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(2), FLOAT16(10), FLOAT16(10), FLOAT16(10), 
+        FLOAT16(5), FLOAT16(9), FLOAT16(7), FLOAT16(7), FLOAT16(5), FLOAT16(9), 
+        FLOAT16(0), FLOAT16(8), FLOAT16(0), FLOAT16(1), FLOAT16(1), FLOAT16(8), 
+        FLOAT16(7), FLOAT16(7), FLOAT16(4), FLOAT16(4), FLOAT16(4), FLOAT16(4), 
+        FLOAT16(8), FLOAT16(10), FLOAT16(8), FLOAT16(6), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(3), FLOAT16(3), FLOAT16(7), FLOAT16(8), FLOAT16(3), FLOAT16(8), 
+    };
+
+    DoTest(engine, input0, input1, expected_results, tensor(2, 2, 6, 4, 4, 2), axis);
+}
+
+TEST(gather_elements_gpu_fp16, d124251_i124221_an3) {
+    auto& engine = get_test_engine();
+
+    auto axis = cldnn::gather_elements::gather_elements_axis::along_z;
+    auto input0 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 1, 2, 4, 2, 5, 1 } }); // data
+    auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 1, 2, 4, 2, 2, 1 } }); // indices
+
+    set_values(input0, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(8), FLOAT16(5), 
+        FLOAT16(5), FLOAT16(2), FLOAT16(0), FLOAT16(7), 
+        FLOAT16(7), FLOAT16(10), FLOAT16(4), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(0), FLOAT16(0), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(4), FLOAT16(0), 
+        FLOAT16(4), FLOAT16(7), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(1), FLOAT16(7), 
+        FLOAT16(4), FLOAT16(7), FLOAT16(10), FLOAT16(8), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(8), FLOAT16(3), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(10), FLOAT16(4), 
+        FLOAT16(2), FLOAT16(10), FLOAT16(7), FLOAT16(8), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(2), FLOAT16(4), FLOAT16(8), FLOAT16(5), 
+        FLOAT16(2), FLOAT16(3), FLOAT16(3), FLOAT16(1), 
+        FLOAT16(5), FLOAT16(9), FLOAT16(10), FLOAT16(0), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(5), FLOAT16(3), 
+        FLOAT16(10), FLOAT16(5), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(10), FLOAT16(0), FLOAT16(5), FLOAT16(4), 
+        FLOAT16(3), FLOAT16(10), FLOAT16(5), FLOAT16(5), 
+        FLOAT16(10), FLOAT16(0), FLOAT16(8), FLOAT16(8), 
+    });
+
+    set_values(input1, {
+        FLOAT16(0), FLOAT16(2), FLOAT16(4), FLOAT16(3), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(3), FLOAT16(1), FLOAT16(4), FLOAT16(2), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(1), FLOAT16(3), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), FLOAT16(4), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), FLOAT16(4), 
+    });
+
+    std::vector<float> expected_results = {
+        FLOAT16(0), FLOAT16(0), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(6), FLOAT16(2), FLOAT16(0), FLOAT16(5), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(4), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(2), FLOAT16(0), FLOAT16(5), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(5), FLOAT16(0), 
+        FLOAT16(10), FLOAT16(5), FLOAT16(3), FLOAT16(4), 
+        FLOAT16(5), FLOAT16(4), FLOAT16(10), FLOAT16(5), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(5), FLOAT16(8), 
+    };
+
+    DoTest(engine, input0, input1, expected_results, tensor(1, 2, 4, 2, 2, 1), axis);
+}
+
+TEST(gather_elements_gpu_fp16, d233113_i233115_a2) {
+    auto& engine = get_test_engine();
+
+    auto axis = cldnn::gather_elements::gather_elements_axis::along_w;
+    auto input0 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 3, 3, 1, 1, 3 } }); // data
+    auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 3, 3, 1, 1, 5 } }); // indices
+
+    set_values(input0, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(5), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(7), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(4), FLOAT16(5), 
+        FLOAT16(9), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(5), FLOAT16(7), FLOAT16(0), 
+        FLOAT16(4), FLOAT16(0), FLOAT16(4), 
+        FLOAT16(7), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(1), 
+        FLOAT16(7), FLOAT16(4), FLOAT16(7), 
+        FLOAT16(10), FLOAT16(8), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(8), FLOAT16(3), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(10), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(0), FLOAT16(6), FLOAT16(9), 
+        FLOAT16(2), FLOAT16(4), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(2), FLOAT16(3), 
+    });
+
+    set_values(input1, {
+        FLOAT16(0), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(2), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(1), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(1), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(2), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(0), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(0), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(2), FLOAT16(0), FLOAT16(2), 
+    });
+
+    std::vector<float> expected_results = {
+        FLOAT16(0), FLOAT16(5), FLOAT16(7), 
+        FLOAT16(0), FLOAT16(7), FLOAT16(8), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(7), 
+        FLOAT16(0), FLOAT16(1), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(1), FLOAT16(2), 
+        FLOAT16(9), FLOAT16(7), FLOAT16(0), 
+        FLOAT16(5), FLOAT16(0), FLOAT16(0), 
+        FLOAT16(9), FLOAT16(4), FLOAT16(0), 
+        FLOAT16(9), FLOAT16(4), FLOAT16(0), 
+        FLOAT16(5), FLOAT16(4), FLOAT16(5), 
+        FLOAT16(7), FLOAT16(5), FLOAT16(1), 
+        FLOAT16(7), FLOAT16(6), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(0), FLOAT16(1), 
+        FLOAT16(4), FLOAT16(5), FLOAT16(1), 
+        FLOAT16(9), FLOAT16(5), FLOAT16(1), 
+        FLOAT16(7), FLOAT16(4), FLOAT16(3), 
+        FLOAT16(10), FLOAT16(8), FLOAT16(3), 
+        FLOAT16(0), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(0), FLOAT16(4), FLOAT16(7), 
+        FLOAT16(7), FLOAT16(4), FLOAT16(3), 
+        FLOAT16(7), FLOAT16(8), FLOAT16(10), 
+        FLOAT16(4), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(4), FLOAT16(2), FLOAT16(10), 
+        FLOAT16(7), FLOAT16(8), FLOAT16(10), 
+        FLOAT16(6), FLOAT16(8), FLOAT16(7), 
+        FLOAT16(5), FLOAT16(4), FLOAT16(9), 
+        FLOAT16(0), FLOAT16(2), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(4), FLOAT16(3), 
+        FLOAT16(0), FLOAT16(6), FLOAT16(8), 
+        FLOAT16(5), FLOAT16(6), FLOAT16(3), 
+    };
+
+    DoTest(engine, input0, input1, expected_results, tensor(2, 3, 3, 1, 1, 5), axis);
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp
index 222b50b9a7e..9ba4fdc025a 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp
@@ -39,18 +39,17 @@ TEST(loop_gpu, basic_no_concat)
         1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
         -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
     };
-    set_values(input_mem, input_data);
-
     std::vector<float> eltwise_operand {
         1.f, -2.f, 3.f, -4.f, 3.0f, -2.0f, 1.f, -2.f, 3.0f, -4.0f,
         3.f, -2.f, 1.f, -2.f, 3.5f, -4.5f, 5.f, -4.f, 3.5f, -2.2f
     };
-    set_values(operand_mem, eltwise_operand);
-
     int trip_count = 8;
-    set_values(trip_count_mem, {trip_count});
-
     int initial_condition = 1;
+
+    // initialize input buffers
+    set_values(input_mem, input_data);
+    set_values(operand_mem, eltwise_operand);
+    set_values(trip_count_mem, { trip_count });
     set_values(initial_condition_mem, {initial_condition});
 
     topology body(
@@ -91,10 +90,35 @@ TEST(loop_gpu, basic_no_concat)
     EXPECT_EQ(output_layout.size.spatial[1], 5);
 
     // value check
-    mem_lock<float> output_ptr{output, get_test_stream()};
-    EXPECT_EQ(output_ptr.size(), input_data.size());
-    for (size_t i = 0, iend = input_data.size(); i<iend; ++i) {
-        EXPECT_FLOAT_EQ(output_ptr[i], input_data[i] + eltwise_operand[i] * trip_count);
+    {
+        mem_lock<float> output_ptr{ output, get_test_stream() };
+        EXPECT_EQ(output_ptr.size(), input_data.size());
+        for (size_t i = 0, iend = input_data.size(); i < iend; ++i) {
+            ASSERT_FLOAT_EQ(output_ptr[i], input_data[i] + eltwise_operand[i] * trip_count);
+        }
+    }
+
+    // allocate new output memory
+    layout loop_l = network.get_output_memory("loop")->get_layout();
+    auto output_mem = engine.allocate_memory(loop_l);
+    network.set_output_memory("loop", output_mem);
+
+    //one more execute
+    set_values(input_mem, input_data);
+    set_values(operand_mem, eltwise_operand);
+    set_values(trip_count_mem, { trip_count });
+    set_values(initial_condition_mem, { initial_condition });
+    outputs = network.execute();
+
+    // check everything once again
+    EXPECT_EQ(outputs.size(), 1);
+    auto output2 = outputs.begin()->second.get_memory();
+    {
+        mem_lock<float> output_ptr2{ output2, get_test_stream() };
+        EXPECT_EQ(output_ptr2.size(), input_data.size());
+        for (size_t i = 0, iend = input_data.size(); i < iend; ++i) {
+            ASSERT_FLOAT_EQ(output_ptr2[i], input_data[i] + eltwise_operand[i] * trip_count);
+        }
     }
 }
 
@@ -112,17 +136,16 @@ TEST(loop_gpu, basic_concat)
         1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
         -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
     };
-    set_values(input_mem, input_data);
-
     std::vector<float> eltwise_operand {
         1.f, -2.f, 3.f, -4.f
     };
-    set_values(operand_mem, eltwise_operand);
-
     size_t trip_count = input_data.size()/eltwise_operand.size();
-    set_values(trip_count_mem, {trip_count});
-
     int initial_condition = 1;
+
+    // initialize input buffers
+    set_values(input_mem, input_data);
+    set_values(operand_mem, eltwise_operand);
+    set_values(trip_count_mem, {trip_count});
     set_values(initial_condition_mem, {initial_condition});
 
     topology body(
@@ -162,11 +185,33 @@ TEST(loop_gpu, basic_concat)
     EXPECT_EQ(output_layout.size.spatial[1], 5);
 
     // value check
-    mem_lock<float> output_ptr{output, get_test_stream()};
-    for (size_t i=0, iend = input_data.size(); i<iend; ++i) {
-        const size_t j = i % eltwise_operand.size();
-        float expected = input_data[i] + eltwise_operand[j];
-        EXPECT_FLOAT_EQ(output_ptr[i], expected);
+    {
+        mem_lock<float> output_ptr{ output, get_test_stream() };
+        for (size_t i = 0, iend = input_data.size(); i < iend; ++i) {
+            const size_t j = i % eltwise_operand.size();
+            float expected = input_data[i] + eltwise_operand[j];
+            ASSERT_FLOAT_EQ(output_ptr[i], expected);
+        }
+    }
+
+    // allocate new output memory
+    layout loop_l = network.get_output_memory("loop")->get_layout();
+    auto output_mem = engine.allocate_memory(loop_l);
+    network.set_output_memory("loop", output_mem);
+
+    set_values(input_mem, input_data);
+    set_values(operand_mem, eltwise_operand);
+    set_values(trip_count_mem, { trip_count });
+    set_values(initial_condition_mem, { initial_condition });
+    outputs = network.execute();
+    auto output2 = outputs.begin()->second.get_memory();
+    {
+        mem_lock<float> output_ptr2{ output2, get_test_stream() };
+        for (size_t i = 0, iend = input_data.size(); i < iend; ++i) {
+            const size_t j = i % eltwise_operand.size();
+            float expected = input_data[i] + eltwise_operand[j];
+            ASSERT_FLOAT_EQ(output_ptr2[i], expected);
+        }
     }
 }
 
@@ -190,25 +235,22 @@ TEST(loop_gpu, basic_concat_nested)
         1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
         -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
     };
-    set_values(input_mem, input_data);
 
     std::vector<float> inner_eltwise_operand {
         1.f, -2.f, 3.f, -4.f
     };
-    set_values(inner_operand_mem, inner_eltwise_operand);
 
     size_t inner_trip_count = input_data.size() / inner_eltwise_operand.size();
-    set_values(inner_trip_count_mem, {inner_trip_count});
-
     int inner_initial_condition = 1;
-    set_values(inner_initial_condition_mem, {inner_initial_condition});
-
     int outer_trip_count = 8;
-    set_values(trip_count_mem, {outer_trip_count});
-
     int outer_initial_condition = 1;
-    set_values(initial_condition_mem, {outer_initial_condition});
 
+    set_values(input_mem, input_data);
+    set_values(inner_operand_mem, inner_eltwise_operand);
+    set_values(inner_trip_count_mem, { inner_trip_count });
+    set_values(inner_initial_condition_mem, { inner_initial_condition });
+    set_values(trip_count_mem, { outer_trip_count });
+    set_values(initial_condition_mem, { outer_initial_condition });
 
     /////////////////////////////////
     // set inner loop body
@@ -295,11 +337,34 @@ TEST(loop_gpu, basic_concat_nested)
     EXPECT_EQ(output_layout.size.feature[0], 1);
     EXPECT_EQ(output_layout.size.spatial[0], 4);
     EXPECT_EQ(output_layout.size.spatial[1], 5);
-
+    
     // check output values
     EXPECT_EQ(output_layout.count(), expected.size());
-    mem_lock<float> output_ptr{output, get_test_stream()};
-    for (size_t i=0 ;i<output_layout.count(); ++i) {
-        EXPECT_FLOAT_EQ(output_ptr[i], expected.at(i));
+    {
+        mem_lock<float> output_ptr{ output, get_test_stream() };
+        for (size_t i = 0; i < output_layout.count(); ++i) {
+            ASSERT_FLOAT_EQ(output_ptr[i], expected.at(i));
+        }
+    }
+
+    // allocate new output memory, run and test everything once again
+    layout loop_l = network.get_output_memory("loop")->get_layout();
+    auto output_mem = engine.allocate_memory(loop_l);
+    network.set_output_memory("loop", output_mem);
+
+    set_values(input_mem, input_data);
+    set_values(inner_operand_mem, inner_eltwise_operand);
+    set_values(inner_trip_count_mem, { inner_trip_count });
+    set_values(inner_initial_condition_mem, { inner_initial_condition });
+    set_values(trip_count_mem, { outer_trip_count });
+    set_values(initial_condition_mem, { outer_initial_condition });
+
+    outputs = network.execute();
+    auto output2 = outputs.begin()->second.get_memory();
+    {
+        mem_lock<float> output_ptr{ output2, get_test_stream() };
+        for (size_t i = 0; i < output_layout.count(); ++i) {
+            ASSERT_FLOAT_EQ(output_ptr[i], expected.at(i));
+        }
     }
 }
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/non_max_suppression_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/non_max_suppression_test.cpp
index e192b0e20fd..9dfabbce5b8 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/non_max_suppression_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/non_max_suppression_test.cpp
@@ -99,7 +99,7 @@ TYPED_TEST(non_max_suppression_basic, basic) {
     build_options build_opts(
         build_option::optimize_data(true)
     );
-    auto net = network(engine, topo, build_opts);
+    cldnn::network net(engine, topo, build_opts);
 
     auto boxes_mem = this->get_boxes_memory(engine);
     auto scores_mem = this->get_scores_memory(engine);
@@ -142,7 +142,7 @@ TYPED_TEST(non_max_suppression_basic, num_per_class) {
     build_options build_opts(
         build_option::optimize_data(true)
     );
-    auto net = network(engine, topo, build_opts);
+    cldnn::network net(engine, topo, build_opts);
 
     auto boxes_mem = this->get_boxes_memory(engine);
     auto scores_mem = this->get_scores_memory(engine);
@@ -188,7 +188,7 @@ TYPED_TEST(non_max_suppression_basic, iou_threshold) {
     build_options build_opts(
         build_option::optimize_data(true)
     );
-    auto net = network(engine, topo, build_opts);
+    cldnn::network net(engine, topo, build_opts);
 
     auto boxes_mem = this->get_boxes_memory(engine);
     auto scores_mem = this->get_scores_memory(engine);
@@ -237,7 +237,7 @@ TYPED_TEST(non_max_suppression_basic, score_threshold) {
     build_options build_opts(
         build_option::optimize_data(true)
     );
-    auto net = network(engine, topo, build_opts);
+    cldnn::network net(engine, topo, build_opts);
 
     auto boxes_mem = this->get_boxes_memory(engine);
     auto scores_mem = this->get_scores_memory(engine);
@@ -289,7 +289,7 @@ TYPED_TEST(non_max_suppression_basic, soft_nms_sigma) {
     build_options build_opts(
         build_option::optimize_data(true)
     );
-    auto net = network(engine, topo, build_opts);
+    cldnn::network net(engine, topo, build_opts);
 
     auto boxes_mem = this->get_boxes_memory(engine);
     auto scores_mem = this->get_scores_memory(engine);
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp
index b596cb87995..0443137e933 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp
@@ -2486,7 +2486,7 @@ public:
         auto opts = build_options(
             build_option::optimize_data(true)
         );
-        auto net = network(eng, topo, opts);
+        cldnn::network net(eng, topo, opts);
 
         auto input_size = tensor(batch(batch_num()), feature(input_features()), spatial(input_x(), input_y(), input_z()));
         auto input_lay = layout(input_type(),
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp
index 8fb70a6b4ba..9bafacd906d 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp
@@ -108,7 +108,7 @@ TYPED_TEST(pyramid_roi_align_typed_test, smoke_4levels) {
                                { P2_scale, P3_scale, P4_scale, P5_scale },
                                starting_level));
 
-    auto net = network(engine, topo);
+    cldnn::network net(engine, topo);
     net.set_input_data("rois", rois_mem);
 
     std::vector<float> expected_out = {
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp
index d401d6ede4a..a6bd31cf9f4 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp
@@ -701,7 +701,7 @@ struct resample_random_test : testing::TestWithParam<resample_random_test_params
         auto build_opts = build_options(
             build_option::force_implementations({ {"resample", {params.out_format, ""}} })
         );
-        auto net = network(engine, topo, build_opts);
+        cldnn::network net(engine, topo, build_opts);
 
         auto in_mem = engine.allocate_memory(in_layout);
         fill_random(in_mem);
@@ -874,7 +874,7 @@ struct caffe_resample_random_test : testing::TestWithParam<caffe_resample_random
         build_opts.set_option(build_option::outputs({"resample"}));
         build_opts.set_option(build_option::force_implementations({ {"resample", {params.in_format, "resample_ref"}} }));
 
-        auto net = network(engine, topo, build_opts);
+        cldnn::network net(engine, topo, build_opts);
         net.set_input_data("in", in_mem);
 
         auto result = net.execute();
@@ -893,7 +893,7 @@ struct caffe_resample_random_test : testing::TestWithParam<caffe_resample_random
         build_opts_opt.set_option(build_option::outputs({"resample_opt"}));
         build_opts.set_option(build_option::force_implementations({ {"resample_opt", {params.in_format, "resample_opt"}} }));
 
-        auto net_opt = network(engine, topo_opt, build_opts_opt);
+        cldnn::network net_opt(engine, topo_opt, build_opts_opt);
 
         // Use in_mem from ref network
         net_opt.set_input_data("in", in_mem);
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/set_output_memory_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/set_output_memory_gpu_test.cpp
new file mode 100644
index 00000000000..1cbe1dd1ba0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/set_output_memory_gpu_test.cpp
@@ -0,0 +1,345 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include <cstddef>
+
+#include "test_utils.h"
+
+#include <cldnn/primitives/arg_max_min.hpp>
+#include <cldnn/primitives/mutable_data.hpp>
+#include <cldnn/primitives/reshape.hpp>
+#include <cldnn/primitives/concatenation.hpp>
+
+using namespace cldnn;
+using namespace tests;
+
+template<typename T = float>
+static std::vector<T> generateVector(size_t sz) {
+    std::vector<T> vec(sz);
+    T n = 0;
+    std::generate(vec.begin(), vec.end(), [&n]() {
+            return n++;
+        });
+    return vec;
+}
+
+TEST(set_output_memory_gpu, basic) {
+    auto& engine = get_test_engine();
+
+    const int b = 3;
+    const int f = 2;
+    const int y = 5;
+    const int x = 5;
+
+    auto input_data = engine.allocate_memory({ data_types::f32, format::bfyx, { b, f, x, y } });
+    auto output_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { b, f, x, y } });
+
+    const int inputSize = input_data->get_layout().count();
+    auto inputVals = generateVector(inputSize);
+    set_values(input_data, inputVals);
+
+    topology topology;
+    topology.add(input_layout("Input", input_data->get_layout()));
+    topology.add(
+        reorder("reorder", "Input", input_data->get_layout())
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input", input_data);
+    network.set_output_memory("reorder", output_mem);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reorder").get_memory();
+    EXPECT_TRUE(engine.is_the_same_buffer(*output_mem, *output));
+
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+
+    for (size_t i = 0; i < inputVals.size(); ++i) {
+        EXPECT_TRUE(are_equal(inputVals[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(set_output_memory_gpu, basic_const) {
+    auto& engine = get_test_engine();
+
+    const int b = 3;
+    const int f = 2;
+    const int y = 5;
+    const int x = 5;
+
+    auto input_data = engine.allocate_memory({ data_types::f32, format::bfyx, { b, f, x, y } });
+    auto const_data = engine.allocate_memory({ data_types::f32, format::bfyx, { b, f, x, y } });
+    auto output_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { b, f, x, y } });
+    auto output_const_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { b, f, x, y } });
+
+    const int inputSize = input_data->get_layout().count();
+    auto inputVals = generateVector(inputSize);
+    auto constVals = generateVector(inputSize);
+    set_values(input_data, inputVals);
+    set_values(const_data, constVals);
+
+    topology topology;
+    topology.add(input_layout("Input", input_data->get_layout()));
+    topology.add(data("Const", const_data));
+    topology.add(
+            reorder("reorder_dyn", "Input", input_data->get_layout()),
+            reorder("reorder_const", "Const", input_data->get_layout())
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input", input_data);
+    network.set_output_memory("reorder_dyn", output_mem);
+    network.set_output_memory("reorder_const", output_const_mem);
+
+    auto outputs = network.execute();
+
+    auto output_dyn = outputs.at("reorder_dyn").get_memory();
+    auto output_const = outputs.at("reorder_const").get_memory();
+    EXPECT_TRUE(engine.is_the_same_buffer(*output_mem, *output_dyn));
+
+    cldnn::mem_lock<float> output_dyn_ptr(output_dyn, get_test_stream());
+    cldnn::mem_lock<float> output_const_ptr(output_const, get_test_stream());
+
+    for (size_t i = 0; i < inputVals.size(); ++i) {
+        EXPECT_TRUE(are_equal(inputVals[i], output_dyn_ptr[i])) << i;
+    }
+
+    for (size_t i = 0; i < inputVals.size(); ++i) {
+        EXPECT_TRUE(are_equal(inputVals[i], output_const_ptr[i])) << i;
+    }
+}
+
+TEST(set_output_memory_gpu, basic_mutable) {
+    auto& engine = get_test_engine();
+
+    const int b = 3;
+    const int f = 2;
+    const int y = 5;
+    const int x = 5;
+    auto input_data = engine.allocate_memory({ data_types::f32, format::bfyx, { b, f, x, y } });
+    auto md = engine.allocate_memory({ data_types::f32, format::bfyx, { b, f, x, y } });
+    auto output_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { b, f, x, y } });
+    auto output_mutable_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { b, f, x, y } });
+    const int inputSize = input_data->get_layout().count();
+    auto inputVals = generateVector(inputSize);
+    auto mutableVals = generateVector(inputSize);
+    set_values(input_data, inputVals);
+    set_values(md, mutableVals);
+
+    topology topology;
+    topology.add(input_layout("Input", input_data->get_layout()));
+    topology.add(mutable_data("Mutable", md));
+    topology.add(
+            reorder("reorder_dyn", "Input", input_data->get_layout()),
+            reorder("reorder_mutable", "Mutable", input_data->get_layout())
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input", input_data);
+    network.set_output_memory("reorder_dyn", output_mem);
+    network.set_output_memory("reorder_mutable", output_mutable_mem);
+
+    auto outputs = network.execute();
+
+    auto output_dyn = outputs.at("reorder_dyn").get_memory();
+    auto output_mutable = outputs.at("reorder_mutable").get_memory();
+    EXPECT_TRUE(engine.is_the_same_buffer(*output_mem, *output_dyn));
+    EXPECT_TRUE(engine.is_the_same_buffer(*output_mutable_mem, *output_mutable));
+
+    cldnn::mem_lock<float> output_dyn_ptr(output_dyn, get_test_stream());
+    cldnn::mem_lock<float> output_mutable_mem_ptr(output_mutable_mem, get_test_stream());
+
+    for (size_t i = 0; i < inputVals.size(); ++i) {
+        EXPECT_TRUE(are_equal(inputVals[i], output_dyn_ptr[i])) << i;
+    }
+
+    for (size_t i = 0; i < inputVals.size(); ++i) {
+        EXPECT_TRUE(are_equal(inputVals[i], output_mutable_mem_ptr[i])) << i;
+    }
+}
+
+TEST(set_output_memory_gpu, top_k1) {
+    static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2;
+    auto& engine = get_test_engine();
+    const int top_k = 1;
+    auto input = engine.allocate_memory({data_types::f32, format::bfyx, {batch_num, feature_num, x_size, y_size}});
+    auto top_k_input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 1, 1, 1}});
+    auto output_mem =
+        engine.allocate_memory({data_types::f32, format::bfyx, {top_k, feature_num, x_size, y_size}});
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(cldnn::data("const", top_k_input));
+    topology.add(arg_max_min("arg_max", { "input", "const" }, arg_max_min::min, top_k, arg_max_min::batch));
+    topology.add(reorder("reorder", "arg_max", output_mem->get_layout()));
+
+    std::vector<float> input_vec = {
+            //y0x0 y0x1 y1x0 y1x1
+            /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
+            /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
+            /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
+            /*b0f3*/0.2f, 0.2f,  -10.f, 4.2f,
+
+            /*b1f0*/3.f,  0.5f,  7.f,   10.f,
+            /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
+            /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f,
+            /*b1f3*/4.f,  0.5f,  8.f,   8.2f
+    };
+    set_values(input, input_vec);
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_output_memory("reorder", output_mem);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reorder").get_memory();
+    EXPECT_TRUE(engine.is_the_same_buffer(*output_mem, *output));
+
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<float> output_mem_ptr(output_mem, get_test_stream());
+
+    for (size_t i = 0; i < output_ptr.size(); ++i) {
+        EXPECT_TRUE(are_equal(output_mem_ptr[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(set_output_memory_gpu, top_k2) {
+    static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2;
+    auto& engine = get_test_engine();
+    const int top_k = 2;
+    auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
+    auto top_k_input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto second_output = engine.allocate_memory({ data_types::f32, format::bfyx, { top_k, feature_num, x_size , y_size } });
+    auto second_output_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { top_k, feature_num, x_size , y_size } });
+    topology topology;
+    topology.add(input_layout("input", input->get_layout()));
+    topology.add(cldnn::data("const", top_k_input));
+    topology.add(mutable_data("second_output", second_output));
+    topology.add(arg_max_min("arg_max", { "input", "const", "second_output" }, arg_max_min::min, top_k, arg_max_min::batch));
+    topology.add(reorder("reorder", "arg_max", second_output->get_layout()));
+
+    std::vector<float> input_vec = {
+            //y0x0 y0x1 y1x0 y1x1
+            /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
+            /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
+            /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
+            /*b0f3*/0.2f, 0.2f,  -10.f, 4.2f,
+
+            /*b1f0*/3.f,  0.5f,  7.f,   10.f,
+            /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
+            /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f,
+            /*b1f3*/4.f,  0.5f,  8.f,   8.2f
+    };
+    set_values(input, input_vec);
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_output_memory("reorder", second_output_mem);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reorder").get_memory();
+    EXPECT_TRUE(engine.is_the_same_buffer(*second_output_mem, *output));
+
+    cldnn::mem_lock<float> output_ptr(output, get_test_stream());
+    cldnn::mem_lock<float> output_mem_ptr(second_output_mem, get_test_stream());
+
+    for (size_t i = 0; i < output_ptr.size(); ++i) {
+        EXPECT_TRUE(are_equal(output_mem_ptr[i], output_ptr[i])) << i;
+    }
+}
+
+TEST(set_output_memory_gpu, basic_opt) {
+    GTEST_SKIP();
+    static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 1;
+    auto& engine = get_test_engine();
+
+    tensor ishape = { batch_num, feature_num, x_size , y_size };
+    layout il = { data_types::f32, format::bfyx, ishape };
+
+    tensor oshape = { batch_num*2, feature_num, x_size , y_size };
+    layout ol = { data_types::f32, format::bfyx, oshape };
+
+    auto input1 = engine.allocate_memory(il);
+    std::vector<float> input_vec1 = {
+        //y0x0 y0x1 y1x0 y1x1
+        /*b0f0*/0.1f, -2.1f, -3.1f, -4.1f,
+        /*b0f1*/2.1f,  2.1f,  3.1f,  4.1f,
+        /*b0f2*/3.1f, -3.1f,  3.1f,  5.1f,
+        /*b0f3*/1.1f,  1.1f,  1.1f,  1.1f
+    };
+    set_values(input1, input_vec1);
+
+    auto input2 = engine.allocate_memory(il);
+    std::vector<float> input_vec2 = {
+        //y0x0 y0x1 y1x0 y1x1
+        /*b1f0*/0.2f, -2.2f, -3.2f, -4.2f,
+        /*b1f1*/2.2f,  2.2f,  3.2f,  4.2f,
+        /*b1f2*/3.2f, -3.2f,  3.2f,  5.2f,
+        /*b1f3*/1.2f,  1.2f,  1.2f, -1.2f
+    };
+    set_values(input2, input_vec2);
+
+    activation_additional_params params1 = { 0.5f, 2.5f };
+    activation_additional_params params2 = { -2.5f, 0.5f };
+
+    std::vector<float> output_vec = {
+        //y0x0 y0x1 y1x0 y1x1
+        /*b0f0*/0.5f, 0.5f, 0.5f, 0.5f,
+        /*b0f1*/2.1f, 2.1f, 2.5f, 2.5f,
+        /*b0f2*/2.5f, 0.5f, 2.5f, 2.5f,
+        /*b0f3*/1.1f, 1.1f, 1.1f, 1.1f,
+
+        /*b1f0*/0.2f, -2.2f, -2.5f, -2.5f,
+        /*b1f1*/0.5f,  0.5f,  0.5f,  0.5f,
+        /*b1f2*/0.5f, -2.5f,  0.5f,  0.5f,
+        /*b1f3*/0.5f,  0.5f,  0.5f, -1.2f
+    };
+    auto output_mem = engine.allocate_memory(ol);
+
+    topology topology;
+    topology.add(input_layout("input1", il));
+    topology.add(activation("clamp1", "input1", activation_func::clamp, params1));
+    topology.add(input_layout("input2", il));
+    topology.add(activation("clamp2", "input2", activation_func::clamp, params2));
+    topology.add(reshape("reshape1", "clamp1", ishape));
+    topology.add(reshape("reshape2", "clamp2", ishape));
+    topology.add(concatenation("concat", { "reshape1", "reshape2" },
+        concatenation::concatenation_axis::along_b, data_types::f32));
+    topology.add(reshape("reshape3", "concat", oshape));
+    topology.add(reorder("reorder", "reshape3", ol));
+    topology.add(reorder("reorder2", "reorder", ol));
+
+    primitive_id outputID = "reorder3";
+    topology.add(reorder(outputID, "concat", ol));
+
+    build_options bo;
+    bo.set_option(build_option::optimize_data(true));
+    network network(engine, topology, bo);
+
+    network.set_input_data("input1", input1);
+    network.set_input_data("input2", input2);
+    network.set_output_memory(outputID, output_mem);
+ 
+    auto outputs = network.execute();
+    auto output = outputs.at(outputID).get_memory();
+    //  check for correct output memory setting
+    EXPECT_TRUE(engine.is_the_same_buffer(*output_mem, *output));
+    //  check for memory set propagation
+    EXPECT_TRUE(engine.is_the_same_buffer(*output_mem, *network.get_output_memory("concat")));
+    EXPECT_TRUE(engine.is_the_same_buffer(*output_mem, *network.get_output_memory("clamp1")));
+    EXPECT_TRUE(engine.is_the_same_buffer(*output_mem, *network.get_output_memory("clamp2")));
+
+    //  check for correct result
+    cldnn::mem_lock<float> output_ptr(output_mem, get_test_stream());
+    for (size_t i = 0; i < output_ptr.size(); ++i) {
+        EXPECT_TRUE(are_equal(output_ptr[i], output_vec[i])) << i;
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/streams_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/streams_test.cpp
index ed1dfa1110a..e7bbe9b553a 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/streams_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/streams_test.cpp
@@ -72,7 +72,7 @@ TEST(gpu_streams, check_networks_can_use_the_same_weights) {
             convolution("conv", "input", { "weights" }, { 1,1,1,2 }));
 
     set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f });
-    program prog(engine, topology, build_options());
+    auto prog = program::build_program(engine, topology, build_options());
     network network0(prog, 0);
     network network1(prog, 1);
 
@@ -136,7 +136,7 @@ TEST(gpu_streams, check_networks_use_unique_mutable_data_per_stream) {
             convolution("conv", "input", { "weights" }, { 1,1,1,2 }));
 
     set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f });
-    program prog(engine, topology, build_options());
+    auto prog = program::build_program(engine, topology, build_options());
     network network0(prog, 0);
     network network1(prog, 1);
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp
index d22af574bfb..97704b4e02a 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp
@@ -15,8 +15,8 @@
 #include <cldnn/primitives/softmax.hpp>
 #include <cldnn/primitives/activation.hpp>
 #include <cldnn/primitives/concatenation.hpp>
+#include <cldnn/graph/topology.hpp>
 
-#include <include/topology_impl.h>
 
 #include <iostream>
 #include <deque>
diff --git a/inference-engine/thirdparty/clDNN/tests/test_utils/network_test.h b/inference-engine/thirdparty/clDNN/tests/test_utils/network_test.h
index 9fcf27ae319..018b247643d 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_utils/network_test.h
+++ b/inference-engine/thirdparty/clDNN/tests/test_utils/network_test.h
@@ -338,11 +338,11 @@ public:
         return add_node(id, reference_tensor_typed<T, 4>(output_data), {input, weights, bias});
     }
 
-    cldnn::network build_network(cldnn::build_options opts) {
+    cldnn::network::ptr build_network(cldnn::build_options opts) {
         opts.set_option(cldnn::build_option::force_implementations(forced_impls));
-        auto net = cldnn::network(eng, topo, opts);
+        auto net = cldnn::network::build_network(eng, topo, opts);
         for (auto& in_data : inputs) {
-            net.set_input_data(in_data.first, in_data.second);
+            net->set_input_data(in_data.first, in_data.second);
         }
         return net;
     }
@@ -351,14 +351,14 @@ public:
         auto net = build_network(opts);
         std::stringstream network_info;
         network_info << "Executed kernels: " << std::endl;
-        for (auto info : net.get_primitives_info()) {
+        for (auto info : net->get_primitives_info()) {
             if (info.kernel_id == "")
                 continue;
             network_info << "  " << info.original_id << " " << info.kernel_id << std::endl;
         }
         SCOPED_TRACE("\n" + network_info.str());
 
-        auto result = net.execute();
+        auto result = net->execute();
         for (auto out : result) {
             auto out_id = out.first;
             bool tested = false;
diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn
index d8304554b2c..8840c3faf6c 160000
--- a/inference-engine/thirdparty/mkl-dnn
+++ b/inference-engine/thirdparty/mkl-dnn
@@ -1 +1 @@
-Subproject commit d8304554b2caff2ba4e906ff8fcb6efc3b425f7c
+Subproject commit 8840c3faf6c4e1131c5408e8d6795093d4f4a815
diff --git a/inference-engine/thirdparty/movidius/XLink/pc/protocols/pcie_host.c b/inference-engine/thirdparty/movidius/XLink/pc/protocols/pcie_host.c
index 2ef75094a69..6a12be70dda 100644
--- a/inference-engine/thirdparty/movidius/XLink/pc/protocols/pcie_host.c
+++ b/inference-engine/thirdparty/movidius/XLink/pc/protocols/pcie_host.c
@@ -81,15 +81,6 @@ enum mx_fw_status {
 };
 /**         MXLK data end       */
 
-#if !(defined(_WIN32) || defined(_WIN64))
-static inline void timeout_to_timeval(unsigned int timeout_ms,
-                                      struct timeval *timeval)
-{
-    timeval->tv_sec = timeout_ms / 1000;
-    timeval->tv_usec = (timeout_ms - (timeval->tv_sec * 1000)) * 1000;
-}
-#endif
-
 static inline void sleepForSeconds(const unsigned int seconds) {
 #if (!defined(_WIN32) && !defined(_WIN64))
     sleep(seconds);
diff --git a/inference-engine/tools/compile_tool/CMakeLists.txt b/inference-engine/tools/compile_tool/CMakeLists.txt
index 3d18ea94596..704b05cfcde 100644
--- a/inference-engine/tools/compile_tool/CMakeLists.txt
+++ b/inference-engine/tools/compile_tool/CMakeLists.txt
@@ -15,10 +15,8 @@ target_include_directories(${TARGET_NAME} SYSTEM PRIVATE
     ${IE_MAIN_SOURCE_DIR}/src/vpu/common/include
 )
 
-if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    target_compile_options(${TARGET_NAME} PRIVATE
-        "-Wall"
-    )
+if (CMAKE_COMPILER_IS_GNUCXX)
+    target_compile_options(${TARGET_NAME} PRIVATE -Wall)
 endif()
 
 target_link_libraries(${TARGET_NAME} PRIVATE
diff --git a/inference-engine/tools/vpu/vpu_compile/CMakeLists.txt b/inference-engine/tools/vpu/vpu_compile/CMakeLists.txt
index 1282fab0088..c590bc0e5b0 100644
--- a/inference-engine/tools/vpu/vpu_compile/CMakeLists.txt
+++ b/inference-engine/tools/vpu/vpu_compile/CMakeLists.txt
@@ -10,10 +10,8 @@ file(GLOB SRCS
 
 add_executable(${TARGET_NAME} ${SRCS})
 
-if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    target_compile_options(${TARGET_NAME} PRIVATE
-        "-Wall"
-    )
+if (CMAKE_COMPILER_IS_GNUCXX)
+    target_compile_options(${TARGET_NAME} PRIVATE -Wall)
 endif()
 
 target_link_libraries(${TARGET_NAME} PRIVATE
diff --git a/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt b/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt
index 58915e56c77..89b7cabd253 100644
--- a/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt
+++ b/inference-engine/tools/vpu/vpu_perfcheck/CMakeLists.txt
@@ -8,7 +8,7 @@ function(add_perfcheck_target TARGET_NAME PLUGIN_NAME)
     add_executable(${TARGET_NAME} ${SOURCES})
 
     # TODO: enable some day and fix all warnings
-#    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+#    if(CMAKE_COMPILER_IS_GNUCXX)
 #        target_compile_options(${TARGET_NAME}
 #            PRIVATE
 #                "-Wall")
diff --git a/install_build_dependencies.sh b/install_build_dependencies.sh
index ebf5293a52e..c9323e8fc7c 100755
--- a/install_build_dependencies.sh
+++ b/install_build_dependencies.sh
@@ -32,6 +32,7 @@ if [ -f /etc/lsb-release ]; then
     sudo -E apt update
     sudo -E apt-get install -y \
             build-essential \
+            cmake \
             curl \
             wget \
             libssl-dev \
@@ -47,7 +48,11 @@ if [ -f /etc/lsb-release ]; then
             libtool \
             autoconf \
             shellcheck \
-            python \
+            patchelf \
+            libenchant1c2a \
+            python3-pip \
+            python3-enchant \
+            python3-setuptools \
             libcairo2-dev \
             libpango1.0-dev \
             libglib2.0-dev \
diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt
index e4dc75cb214..a7cedc9af26 100644
--- a/model-optimizer/automation/package_BOM.txt
+++ b/model-optimizer/automation/package_BOM.txt
@@ -11,7 +11,7 @@ extensions/back/__init__.py
 extensions/back/AvgPool.py
 extensions/back/blob_normalizer.py
 extensions/back/CellNormalizer.py
-extensions/back/ChangeCastOutputType.py
+extensions/back/ChangeOutputTypeAttributes.py
 extensions/back/ClampNormalizer.py
 extensions/back/compress_quantized_weights.py
 extensions/back/ConvolutionNormalizer.py
@@ -438,11 +438,11 @@ extensions/front/tf/identity_ext.py
 extensions/front/tf/identityN_to_identity.py
 extensions/front/tf/InterpolateTransposes.py
 extensions/front/tf/IteratorGetNext_ext.py
-extensions/front/tf/KerasRNNTransformation.py
 extensions/front/tf/log_softmax_ext.py
 extensions/front/tf/LookupTableInsert_ext.py
 extensions/front/tf/LoopCond_ext.py
 extensions/front/tf/lrn_ext.py
+extensions/front/tf/MapFNTransformation.py
 extensions/front/tf/mask_rcnn_support.json
 extensions/front/tf/mask_rcnn_support_api_v1.11.json
 extensions/front/tf/mask_rcnn_support_api_v1.13.json
@@ -944,7 +944,7 @@ mo/main_caffe.py
 mo/main_kaldi.py
 mo/main_mxnet.py
 mo/main_onnx.py
-mo/main_pdpd.py
+mo/main_paddle.py
 mo/main_tf.py
 mo/middle/__init__.py
 mo/middle/passes/__init__.py
@@ -1090,6 +1090,7 @@ mo_caffe.py
 mo_kaldi.py
 mo_mxnet.py
 mo_onnx.py
+mo_paddle.py
 mo_tf.py
 requirements.txt
 requirements_caffe.txt
diff --git a/model-optimizer/extensions/back/ChangeCastOutputType.py b/model-optimizer/extensions/back/ChangeCastOutputType.py
deleted file mode 100644
index 976b6b50a29..00000000000
--- a/model-optimizer/extensions/back/ChangeCastOutputType.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (C) 2018-2021 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import logging as log
-
-import numpy as np
-
-from mo.back.replacement import BackReplacementPattern
-from mo.graph.graph import Graph
-from mo.middle.passes.convert_data_type import data_type_str_to_np
-
-
-class ChangeCastOutputType(BackReplacementPattern):
-    """
-    Change the Cast dst_type from fp64 to fp32 since not all plugins support fp64 data type.
-    Change the Cast dst_type from fp32 to fp16 when generating IR for fp16.
-    But leave fp32 if node returns shape value even if --data_type=FP16 (look extensions/back/MarkNodesWithShapeValues.py).
-    """
-    enabled = True
-    force_shape_inference = True
-
-    def run_after(self):
-        from extensions.back.MarkNodesWithShapeValues import MarkNodesWithShapeValues
-        return [MarkNodesWithShapeValues]
-
-    def run_before(self):
-        return []
-
-    def find_and_replace_pattern(self, graph: Graph):
-        for node in graph.get_op_nodes(op='Cast'):
-            if node.dst_type == np.float64:
-                log.warning('Change data type from {} to {} for node {}'.format(node.dst_type, np.float32, node.name))
-                node.dst_type = np.float32
-
-            ir_data_type = data_type_str_to_np(node.graph.graph['cmd_params'].data_type)
-            if node.dst_type == np.float32 and ir_data_type == np.float16 and not node.has_and_set('returns_shape_value'):
-                log.warning('Change data type from {} to {} for node {}'.format(node.dst_type, ir_data_type, node.name))
-                node.dst_type = ir_data_type
-            elif node.has_and_set('returns_shape_value') and node.dst_type == np.float16:
-                # return back FP32 for all Convert nodes with shape values
-                log.warning('Change data type from {} to {} for node {} in ShapeOf subgraph'.
-                            format(node.dst_type, np.float32, node.name))
-                node.dst_type = np.float32
diff --git a/model-optimizer/extensions/back/ChangeOutputTypeAttributes.py b/model-optimizer/extensions/back/ChangeOutputTypeAttributes.py
new file mode 100644
index 00000000000..b75c7a86c76
--- /dev/null
+++ b/model-optimizer/extensions/back/ChangeOutputTypeAttributes.py
@@ -0,0 +1,100 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import logging as log
+
+import numpy as np
+
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph
+from mo.graph.graph import Node
+from mo.middle.passes.convert_data_type import data_type_str_to_np
+from mo.utils.error import Error
+
+operations_with_data_type_attributes = {
+    'Cast': {'attr_name': 'dst_type', 'in_ports_to_check': (0,)},
+    'Range': {'attr_name': 'output_type', 'in_ports_to_check': (0, 1, 2)},
+}
+
+
+class ChangeOutputTypeAttributes(BackReplacementPattern):
+    """
+    The transformation changes output type for the specific operations defined in the
+    operations_with_data_type_attributes dictionary if one of the following conditions is met:
+    - The operation output type is fp64. Since not all plugins support fp64 data type it is converted to fp32.
+    - Changes output type from fp32 to fp16 (and ensure that this is possible) when generating fp16 IR.
+    - Keep operation output type equal to fp32 for operations located in the shape calculation sub-graphs to
+    avoid floating point overflow.
+    """
+    enabled = True
+    force_shape_inference = True
+
+    def run_after(self):
+        from extensions.back.MarkNodesWithShapeValues import MarkNodesWithShapeValues
+        return [MarkNodesWithShapeValues]
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        ir_data_type = data_type_str_to_np(graph.graph['cmd_params'].data_type)
+
+        for node in graph.get_op_nodes():
+            if node.op in operations_with_data_type_attributes:
+                dst_type = operations_with_data_type_attributes[node.op]['attr_name']
+                node_name = node.soft_get('name', node.id)
+                assert node.has_valid(dst_type), '{} attribute is missing for node {}'.format(dst_type, node_name)
+
+                final_type = None
+                if node[dst_type] == np.float64:
+                    final_type = np.float32
+
+                if node[dst_type] in [np.float32, np.float64] and ir_data_type == np.float16 and \
+                        not node.has_and_set('returns_shape_value'):
+                    final_type = np.float16
+                elif node.has_and_set('returns_shape_value') and node.dst_type == np.float16:
+                    # return back FP32 for all nodes with shape values
+                    final_type = np.float32
+
+                if final_type is not None:
+                    log.warning('Change data type from {} to {} for node {}'.format(node[dst_type], final_type,
+                                                                                    node_name))
+                    node[dst_type] = final_type
+
+                if final_type == np.float16:
+                    assert_that_is_castable_to_fp16(node)
+
+
+def assert_that_is_castable_to_fp16(node: Node):
+    op_name = node.soft_get('op')
+    node_name = node.soft_get('name', node.id)
+
+    for i in operations_with_data_type_attributes[op_name]['in_ports_to_check']:
+        val = node.in_port(i).data.get_value()
+        if val is None:
+            return
+
+        if np.any(val > np.finfo(np.float16).max) or np.any(val < np.finfo(np.float16).min):
+            raise Error("Try to convert with --data_type=FP32 argument. "
+                        "This model can not be converted to FP16 precision, since "
+                        "'{}' node value {} exceeds FP16 allowed limits: [{}, {}]"
+                        .format(node_name, val, np.finfo(np.float16).min, np.finfo(np.float16).max))
+        # further this input values will be rewritten since force_shape_inference=True
+        node.in_port(i).data.set_value(val.astype(np.float16))
+
+    original_output = node.out_port(0).data.get_value()
+    node.infer(node)
+    casted_output = node.out_port(0).data.get_value()
+    original_output_len = len(original_output) if hasattr(original_output, '__len__') else None
+    casted_output_len = len(casted_output) if hasattr(casted_output, '__len__') else None
+
+    if original_output_len != casted_output_len:
+        raise Error("Try to convert with --data_type=FP32 argument. "
+                    "This model can not be converted to FP16 precision, since "
+                    "after conversion of '{}' node to FP16 output shape {} differs from the original {}."
+                    .format(node_name, casted_output_len, original_output_len))
+
+    diff_count = np.count_nonzero(np.subtract(original_output, casted_output) > 1.e-4)
+    if diff_count > 0:
+        log.warning("{} elements of {} of Range node '{}' output differ from the original values while "
+                    "converting network to FP16 precision".format(diff_count, len(original_output), node_name))
diff --git a/model-optimizer/extensions/back/MarkNodesWithShapeValues.py b/model-optimizer/extensions/back/MarkNodesWithShapeValues.py
index 087a7cb4dcf..fa3721bb35c 100644
--- a/model-optimizer/extensions/back/MarkNodesWithShapeValues.py
+++ b/model-optimizer/extensions/back/MarkNodesWithShapeValues.py
@@ -23,7 +23,6 @@ shape_accepting_ops = {
     'Tile': [1],  # repeats input
     'TopK': [1],  # K input
     'Pad': [1, 2],  # pads_begin, pads_end
-    'Range': [0, 1, 2],  # start, stop, step inputs
     'OneHot': [1],  # depth input
 }
 
diff --git a/model-optimizer/extensions/front/tf/KerasRNNTransformation.py b/model-optimizer/extensions/front/tf/MapFNTransformation.py
similarity index 57%
rename from model-optimizer/extensions/front/tf/KerasRNNTransformation.py
rename to model-optimizer/extensions/front/tf/MapFNTransformation.py
index 70c853181a9..167989cdfeb 100644
--- a/model-optimizer/extensions/front/tf/KerasRNNTransformation.py
+++ b/model-optimizer/extensions/front/tf/MapFNTransformation.py
@@ -1,12 +1,15 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import logging as log
+
 import numpy as np
 
 from extensions.front.tf.WhileNormalize import WhileNormalize
 from extensions.ops.loop import Loop
 from mo.front.common.partial_infer.utils import int64_array
 from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.front.tf.custom_subgraph_call import skip_nodes_by_condition
 from mo.front.tf.graph_utils import create_op_with_const_inputs
 from mo.graph.graph import Graph, Node, rename_nodes
 from mo.middle.pattern_match import find_pattern_matches, inverse_dict
@@ -15,19 +18,6 @@ from mo.ops.squeeze import Squeeze
 from mo.ops.unsqueeze import Unsqueeze
 
 
-def compute_input_port_idx(req_node: Node, loop_node: Node):
-    """
-    Computes input port index by which requested node is passed to Loop node
-    :param req_node: a node for which to find input port index is requested
-    :param loop_node: a node that can receive input data from requested node by some input port
-    :return: input port index
-    """
-    for destination in req_node.out_port(0).get_destinations():
-        if loop_node.id == destination.node.id:
-            return destination.idx
-    return None
-
-
 def find_subgraph_match_to_pattern(graph: Graph, body_pattern: dict):
     """
     Finds sub-graph matches corresponding pattern in graph
@@ -45,26 +35,18 @@ def find_subgraph_match_to_pattern(graph: Graph, body_pattern: dict):
     return matches
 
 
-class KerasRNNInputSlicing(FrontReplacementSubgraph):
+class MapFNInputSlicing(FrontReplacementSubgraph):
     """
-    The transformation detects TensorFlow 2 pattern that corresponds to subsequent slicing of input.
-    It avoids TensorListFromTensor and TensorFlowGetItem operations and replaces the original sub-graph
-    by adding axis attribute for corresponding input port of Loop node.
-    The transformation is applicable to TensorFlow 2 Keras Simple RNN, GRU, and LSTM layers.
+    The transformation handles inputs slicing in While loop created by TensorFlow 2 Map Function primitive
+    (see tf.map_fn). It avoids TensorListFromTensor and TensorFlowGetItem operations and replaces the original
+    sub-graph by adding axis attribute in Loop node for slicing inputs.
+    The transformation is also applicable to TensorFlow 2 Keras Simple RNN, GRU, and LSTM layers.
     """
     enabled = True
 
     def run_before(self):
         return [WhileNormalize]
 
-    @staticmethod
-    def pattern(**kwargs):
-        return dict(
-            nodes=[('unstack', dict(op='TensorListFromTensor')),
-                   ('while', dict(op='Loop'))],
-            edges=[('unstack', 'while')]
-        )
-
     @staticmethod
     def get_body_pattern():
         return dict(
@@ -84,7 +66,7 @@ class KerasRNNInputSlicing(FrontReplacementSubgraph):
         )
 
     @staticmethod
-    def transform_keras_rnn_input_slicing(external_match: dict, internal_match: dict):
+    def transform_map_fn_input_slicing(external_match: dict, internal_match: dict):
         """
         Transforms TensorFlow 2 input slicing into use of axis attribute for input port of Loop node
         :param external_match: a match used for handling a part of the main graph responsible for input slicing
@@ -115,51 +97,48 @@ class KerasRNNInputSlicing(FrontReplacementSubgraph):
         # remove TensorListFromTensor and pass a tensor to Loop as is
         unstack_node.out_port(0).get_connection().set_source(unstack_node.in_port(0).get_connection().get_source())
 
-    def replace_sub_graph(self, graph: Graph, external_match: dict):
-        loop_node = external_match['while']
-        body_graph = loop_node['body']
-        body_pattern = KerasRNNInputSlicing.get_body_pattern()
-        internal_matches = find_subgraph_match_to_pattern(body_graph, body_pattern)
+    def find_and_replace_pattern(self, graph: Graph):
+        for loop_node in graph.get_op_nodes(op='Loop'):
+            loop_name = loop_node.soft_get('name', loop_node.id)
+            body_graph = loop_node['body']
+            body_pattern = MapFNInputSlicing.get_body_pattern()
+            internal_matches = find_subgraph_match_to_pattern(body_graph, body_pattern)
 
-        # a case of multiple matches is not handled since it is not clear how to select corresponding match
-        if len(internal_matches) == 1:
-            internal_match = internal_matches[0]
-            loop_node = external_match['while']
-            unstack_port_idx = compute_input_port_idx(external_match['unstack'], loop_node)
-            # check that back edges connect correct Parameter and Result nodes in the body
-            # check connections between body input ports and external inputs ports of Loop node
-            if Loop.back_edge_exists(loop_node.back_edges,
-                                     internal_match['increment_iteration_result'].internal_layer_id,
-                                     internal_match['current_iteration'].internal_layer_id) and \
-                    Loop.inter_edge_exists(loop_node.input_port_map, unstack_port_idx,
-                                           internal_match['tensor_list'].internal_layer_id):
-                # only if inter-graph match passed it starts to process the sub-graph
-                KerasRNNInputSlicing.transform_keras_rnn_input_slicing(external_match, internal_match)
+            for internal_match in internal_matches:
+                # check if TensorListGetItem from the body graph is connected with TensorListFromTensor
+                # from the main graph. If yes, the transformation detects input slicing by this port
+                # and can use Loop axis attribute
+                unstack_node = Loop.get_external_nodes_by_internal_id(loop_node,
+                                                                      internal_match['tensor_list'].internal_layer_id)
+                unstack_node = unstack_node[0] if (len(unstack_node) == 1
+                                                   and unstack_node[0].op == 'TensorListFromTensor') else None
+                if unstack_node is None:
+                    log.info("A sub-graph around the loop node {} does not match "
+                             "TensorFlow 2 MapFN pattern for input slicing".format(loop_name))
+                    continue
+
+                external_match = {'while': loop_node,
+                                  'unstack': unstack_node}
+                # check that back edges connect correct Parameter and Result nodes in the body
+                # check connections between body input ports and external inputs ports of Loop node
+                if Loop.back_edge_exists(loop_node.back_edges,
+                                         internal_match['increment_iteration_result'].internal_layer_id,
+                                         internal_match['current_iteration'].internal_layer_id):
+                    MapFNInputSlicing.transform_map_fn_input_slicing(external_match, internal_match)
 
 
-class KerasRNNOutputConcatenation(FrontReplacementSubgraph):
+class MapFNOutputConcatenation(FrontReplacementSubgraph):
     """
-    The transformation detects TensorFlow 2 pattern that corresponds to concatenation of intermediate results
-    generated in each iteration of While operation.
-    It avoids TensorListReserve, TensorListStack, and TensorListSetItem operations and replaces the original sub-graph
-    by adding axis attribute for corresponding output port of Loop node.
-    The transformation is applicable to TensorFlow 2 Keras Simple RNN, GRU, and LSTM layers.
+    The transformation handles inputs slicing in While loop created by TensorFlow 2 Map Function primitive
+    (see tf.map_fn). It avoids TensorListReserve, TensorListStack, and TensorListSetItem operations and replaces
+    the original sub-graph by adding axis attribute in Loop node for concatenation of intermediate output results.
+    The transformation is also applicable to TensorFlow 2 Keras Simple RNN, GRU, and LSTM layers.
     """
     enabled = True
 
     def run_before(self):
         return [WhileNormalize]
 
-    @staticmethod
-    def pattern(**kwargs):
-        return dict(
-            nodes=[('reserve', dict(op='TensorListReserve')),
-                   ('while', dict(op='Loop')),
-                   ('stack', dict(op='TensorListStack'))],
-            edges=[('reserve', 'while'),
-                   ('while', 'stack')]
-        )
-
     @staticmethod
     def get_body_pattern():
         return dict(
@@ -184,7 +163,7 @@ class KerasRNNOutputConcatenation(FrontReplacementSubgraph):
         )
 
     @staticmethod
-    def transform_keras_rnn_output_concatenation(external_match: dict, internal_match: dict):
+    def transform_map_fn_output_concatenation(external_match: dict, internal_match: dict):
         """
         Transforms TensorFlow 2 output concatenation into use of axis attribute for output port of Loop node
         :param external_match: a match used for handling a part of the main graph responsible for output concatenation
@@ -229,27 +208,50 @@ class KerasRNNOutputConcatenation(FrontReplacementSubgraph):
                 const_true = Const(body_graph, {'value': np.array(True, dtype=np.bool)}).create_node()
                 exec_cond_node.in_port(0).get_connection().set_source(const_true.out_port(0))
 
-    def replace_sub_graph(self, graph: Graph, external_match: dict):
-        loop_node = external_match['while']
-        body_graph = loop_node['body']
-        body_pattern = KerasRNNOutputConcatenation.get_body_pattern()
+    def find_and_replace_pattern(self, graph: Graph):
+        for loop_node in graph.get_op_nodes(op='Loop'):
+            loop_name = loop_node.soft_get('name', loop_node.id)
+            body_graph = loop_node['body']
+            body_pattern = MapFNOutputConcatenation.get_body_pattern()
+            internal_matches = find_subgraph_match_to_pattern(body_graph, body_pattern)
 
-        internal_matches = find_subgraph_match_to_pattern(body_graph, body_pattern)
+            for internal_match in internal_matches:
+                # check if TensorListReserve from the main graph is connected with Parameter node from the body graph
+                # that is assigned for storing intermediate output results of While Loop. If yes, the transformation
+                # detects intermediate outputs concatentation by this port and can use Loop axis attribute
+                reserve_node = Loop.get_external_nodes_by_internal_id(loop_node,
+                                                                      internal_match['container'].internal_layer_id)
+                reserve_node = reserve_node[0] if (len(reserve_node) == 1 and
+                                                   reserve_node[0].op == 'TensorListReserve') else None
+                if reserve_node is None:
+                    log.info("A sub-graph around the loop node {} does not match "
+                             "TensorFlow 2 MapFN pattern for intermediate outputs concatenation".format(loop_name))
+                    continue
+                stack_node = Loop.get_external_nodes_by_internal_id(
+                    loop_node, internal_match['concatenation_result'].internal_layer_id)
+                stack_node = stack_node[0] if len(stack_node) == 1 else None
 
-        if len(internal_matches) == 1:
-            internal_match = internal_matches[0]
-            reserve_port_idx = compute_input_port_idx(external_match['reserve'], loop_node)
-            stack_port_idx = external_match['stack'].in_port(0).get_source().idx
-            # check that back edges connect correct Parameter and Result nodes in the body
-            # check connections between body input ports and external inputs ports of Loop node
-            # check connections between body output ports and external output ports of Loop node
-            if Loop.back_edge_exists(loop_node.back_edges, internal_match['concatenation_result'].internal_layer_id,
-                                     internal_match['container'].internal_layer_id) and \
-                    Loop.back_edge_exists(loop_node.back_edges,
-                                          internal_match['increment_iteration_result'].internal_layer_id,
-                                          internal_match['current_iteration'].internal_layer_id) and \
-                    Loop.inter_edge_exists(loop_node.input_port_map, reserve_port_idx,
-                                           internal_match['container'].internal_layer_id) and \
-                    Loop.inter_edge_exists(loop_node.output_port_map, stack_port_idx,
-                                           internal_match['concatenation_result'].internal_layer_id):
-                KerasRNNOutputConcatenation.transform_keras_rnn_output_concatenation(external_match, internal_match)
+                if stack_node is None:
+                    log.info("A sub-graph around the loop node {} does not match "
+                             "TensorFlow 2 MapFN pattern for intermediate outputs concatenation".format(loop_name))
+                    continue
+
+                # skip StopGradient node if it exists between While loop output port and TensorListStack operation
+                stack_node = skip_nodes_by_condition(stack_node, lambda x: x.has_and_set('identity'), True)
+                stack_node = stack_node if stack_node.op == 'TensorListStack' else None
+                if stack_node is None:
+                    log.info("A sub-graph around the loop node {} does not match "
+                             "TensorFlow 2 MapFN pattern for intermediate outputs concatenation".format(loop_name))
+                    continue
+
+                external_match = {'while': loop_node,
+                                  'reserve': reserve_node,
+                                  'stack': stack_node}
+                # check that back edges connect Parameter node (or container with intermediate output results)
+                # and concatenation result produced by TensorListSetItem node
+                if Loop.back_edge_exists(loop_node.back_edges, internal_match['concatenation_result'].internal_layer_id,
+                                         internal_match['container'].internal_layer_id) and \
+                        Loop.back_edge_exists(loop_node.back_edges,
+                                              internal_match['increment_iteration_result'].internal_layer_id,
+                                              internal_match['current_iteration'].internal_layer_id):
+                    MapFNOutputConcatenation.transform_map_fn_output_concatenation(external_match, internal_match)
diff --git a/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py b/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
index 736457284c6..9f1228aec58 100644
--- a/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
+++ b/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
@@ -11,11 +11,12 @@ from extensions.front.TransposeOrderNormalizer import TransposeOrderNormalizer
 from extensions.front.split_normalizer import SqueezeAxis
 from extensions.front.tf.CropAndResizeReplacement import CropAndResizeReplacement
 from extensions.front.tf.FakeQuantWithMinMaxVars import FakeQuantWithMinMaxVarsToQuantize
-from extensions.front.tf.KerasRNNTransformation import KerasRNNInputSlicing, KerasRNNOutputConcatenation
+from extensions.front.tf.MapFNTransformation import MapFNInputSlicing, MapFNOutputConcatenation
 from extensions.front.tf.TFSliceToSlice import TFSliceToSliceReplacer
 from extensions.front.tf.pad_tf_to_pad import PadTFToPad
 from extensions.middle.InsertLayoutPropagationTransposes import mark_as_correct_data_layout, \
     mark_input_as_in_correct_layout, mark_output_as_in_correct_layout
+from extensions.ops.Cast import Cast
 from extensions.ops.DetectionOutput import DetectionOutput
 from extensions.ops.ReduceOps import ReduceMean
 from extensions.ops.activation_ops import Sigmoid
@@ -25,17 +26,21 @@ from extensions.ops.parameter import Parameter
 from extensions.ops.priorbox_clustered import PriorBoxClusteredOp
 from extensions.ops.proposal import ProposalOp
 from extensions.ops.psroipooling import PSROIPoolingOp
+from extensions.ops.split import Split
 from extensions.ops.transpose import Transpose
 from mo.front.common.layout import get_batch_dim, get_height_dim, get_width_dim
 from mo.front.common.partial_infer.utils import int64_array
 from mo.front.common.replacement import FrontReplacementPattern
 from mo.front.extractor import output_user_data_repack, add_output_ops
 from mo.front.subgraph_matcher import SubgraphMatch
+from mo.front.tf.custom_subgraph_call import skip_nodes_by_condition
 from mo.front.tf.graph_utils import add_activation_function_after_node, add_convolution_to_swap_xy_coordinates, \
     mark_squeeze_reshape_concat_before_detection_output, add_fake_background_loc, create_op_node_with_second_input, \
     create_op_with_const_inputs
 from mo.front.tf.replacement import FrontReplacementFromConfigFileSubGraph, FrontReplacementFromConfigFileGeneral
 from mo.graph.graph import Graph, Node
+from mo.middle.passes.convert_data_type import data_type_str_to_np
+from mo.ops.clamp import AttributedClamp
 from mo.ops.concat import Concat
 from mo.ops.const import Const
 from mo.ops.crop import Crop
@@ -45,9 +50,12 @@ from mo.ops.result import Result
 from mo.ops.roipooling import ROIPooling
 from mo.ops.shape import Shape
 from mo.ops.softmax import Softmax
+from mo.ops.squeeze import Squeeze
+from mo.ops.tile import Tile
 from mo.utils.error import Error
 from mo.utils.graph import backward_bfs_for_operation, bfs_search, clear_tensor_names_info, sub_graph_between_nodes
 from mo.utils.pipeline_config import PipelineConfig
+from mo.utils.shape import node_to_get_shape_value_of_indices
 
 missing_param_error = 'To convert the model specify path to the pipeline configuration file which was used to ' \
                       'generate the model. Please use "--tensorflow_object_detection_api_pipeline_config" option:\n' \
@@ -346,12 +354,6 @@ def swap_weights_xy(graph: Graph, nodes: list):
                 insert_weights_swap_xy_sub_graph(graph, m.in_port(1).get_connection())
 
 
-def skip_nodes_by_condition(current_node: Node, condition: callable):
-    while condition(current_node):
-        current_node = current_node.in_node()
-    return current_node
-
-
 def calculate_shape_keeping_aspect_ratio(height: int, width: int, min_size: int, max_size: int,
                                          pad_to_max_dimension: bool = False):
     """
@@ -529,7 +531,7 @@ class ObjectDetectionAPITransformationsFinish(FrontReplacementPattern):
         # is removed during removing nodes from the DO sub-graph so the first input to Transpose is missing which
         # results in TransposeOrderNormalizer transformation failure.
         return [Pack, TransposeOrderNormalizer, PadTFToPad, SqueezeAxis, TFSliceToSliceReplacer,
-                KerasRNNOutputConcatenation, KerasRNNInputSlicing]
+                MapFNOutputConcatenation, MapFNInputSlicing]
 
     def find_and_replace_pattern(self, graph: Graph):
         pass
@@ -1079,6 +1081,7 @@ class ObjectDetectionAPIProposalReplacement(FrontReplacementFromConfigFileSubGra
     """
     replacement_id = 'ObjectDetectionAPIProposalReplacement'
     run_not_recursively = True
+    matched_input_nodes_to_keep = 2  # number of matched input nodes to keep
 
     def run_after(self):
         return [ObjectDetectionAPIPreprocessorReplacement, ObjectDetectionAPIPreprocessor2Replacement]
@@ -1091,9 +1094,9 @@ class ObjectDetectionAPIProposalReplacement(FrontReplacementFromConfigFileSubGra
 
     def nodes_to_remove(self, graph: Graph, match: SubgraphMatch):
         new_list = match.matched_nodes_names().copy()
-        # do not remove nodes that produce box predictions and class predictions
-        new_list.remove(match.single_input_node(0)[0].id)
-        new_list.remove(match.single_input_node(1)[0].id)
+        # do not remove nodes that produce box predictions and class predictions and optionally generated anchors
+        for port in range(self.matched_input_nodes_to_keep):
+            new_list.remove(match.single_input_node(port)[0].id)
         return new_list
 
     def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
@@ -1102,6 +1105,11 @@ class ObjectDetectionAPIProposalReplacement(FrontReplacementFromConfigFileSubGra
             raise Error(missing_param_error)
         pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config)
 
+        # the transformation configuration file specifies what operations should be included with this transformation
+        if match.custom_replacement_desc.custom_attributes.get('operation_to_add', 'Proposal') == 'DetectionOutput':
+            self.matched_input_nodes_to_keep = 3  # keep the third input with prior boxes (anchors)
+            return self.insert_detection_output_instead_of_proposal(graph, match, pipeline_config)
+
         max_proposals = _value_or_raise(match, pipeline_config, 'first_stage_max_proposals')
         proposal_ratios = _value_or_raise(match, pipeline_config, 'anchor_generator_aspect_ratios')
         proposal_scales = _value_or_raise(match, pipeline_config, 'anchor_generator_scales')
@@ -1185,14 +1193,175 @@ class ObjectDetectionAPIProposalReplacement(FrontReplacementFromConfigFileSubGra
 
         proposal_node = proposal_op.create_node([reshape_permute_node, anchors_node, input_with_image_size_node],
                                                 dict(name='proposals'))
-
         # models with use_matmul_crop_and_resize = True should not swap order of elements (YX to XY) after the Proposal
         swap_proposals = not match.custom_replacement_desc.custom_attributes.get('do_not_swap_proposals', False) and \
                          not pipeline_config.get_param('use_matmul_crop_and_resize')
-
         if swap_proposals:
             proposal_node = add_convolution_to_swap_xy_coordinates(graph, proposal_node, 5)
 
+        return {'proposal_node': ObjectDetectionAPIProposalReplacement.ie_to_tf_proposals(graph, proposal_node, match,
+                                                                                          max_proposals,
+                                                                                          swap_proposals)}
+
+    @staticmethod
+    def insert_detection_output_instead_of_proposal(graph: Graph, match: SubgraphMatch,
+                                                    pipeline_config: PipelineConfig):
+        """
+        The function inserts DetectionOutput operation instead of Proposal operation which may result in an increase of
+        the accuracy for some models. The function is enabled with the custom attribute "operation_to_insert" with
+        value "DetectionOutput" in the transformation configuration file section for the
+        "ObjectDetectionAPIProposalReplacement" transformation. However, this transformation should not be applied in
+        case when an input image should be scaled before feeding the IR because the DetectionOutput operation does not
+        have information about the original input image size, whilst the Proposal operation has an input which contains
+        this information.
+        :param graph: the graph to operate on
+        :param match: the object containing information about the matched sub-graph
+        :param pipeline_config: object containing information from the pipeline.config file of the model
+        :return: the dictionary with mapping information needed for other transformations
+        """
+        max_proposals = _value_or_raise(match, pipeline_config, 'first_stage_max_proposals')
+
+        # Convolution/matmul node that produces classes predictions
+        # Transpose result of the tensor with classes permissions so it will be in a correct layout for Softmax
+        predictions_nodes = backward_bfs_for_operation(match.single_input_node(1)[0], ['Add'])
+        assert len(predictions_nodes) >= 1, 'Expected to find nodes of type "Add" starting from the node "{}" in ' \
+                                            'backward direction'.format(match.single_input_node(1)[0].id)
+        predictions_node = predictions_nodes[0]
+
+        # prepare input with class probabilities. The DetectionOutput operation which will consume this tensor as a
+        # second input expects probabilities to be normalized with SoftMax operation per each bounding box class. In
+        # order to do this we first reshape the tensor so the last dimension contain probability for 2 classes
+        # (background and foreground) for each bounding box. Before feeding this tensor to the DO operation the tensor
+        # is flattened to the shape [num_batches, num_classes * num_bounding_boxes]
+        reshape_classes_node = create_op_node_with_second_input(graph, Reshape, int64_array([0, -1, 2]),
+                                                                dict(name='predictions/Reshape'))
+        # transpose from NCHW to NHWC will be inserted as input to the Reshape automatically. This is expected
+        predictions_node.out_port(0).disconnect()
+        predictions_node.out_port(0).connect(reshape_classes_node.in_port(0))
+        softmax_conf_node = Softmax(graph, dict(axis=2, name=reshape_classes_node.id + '/Softmax')).create_node([
+            reshape_classes_node])
+        flattened_conf = create_op_node_with_second_input(graph, Reshape, int64_array([0, -1]),
+                                                          dict(name=softmax_conf_node.name + '/Flatten'),
+                                                          softmax_conf_node)
+
+        # prepare input with box logits
+        boxes_logit = backward_bfs_for_operation(match.single_input_node(0)[0], ['Add'])[0]
+        reshape_box_logits = create_op_node_with_second_input(
+            graph, Reshape, int64_array([0, -1]), dict(name=boxes_logit.soft_get('name', boxes_logit.id) + '/Flatten'),
+            boxes_logit)
+
+        yxyx_anchors = match.single_input_node(2)[0]
+
+        variance_height = pipeline_config.get_param('frcnn_variance_height')
+        variance_width = pipeline_config.get_param('frcnn_variance_width')
+        variance_x = pipeline_config.get_param('frcnn_variance_x')
+        variance_y = pipeline_config.get_param('frcnn_variance_y')
+
+        # get the input image height and width to divide the anchors values by it
+        initial_input_node_name = 'input_tensor' if 'input_tensor' in graph.nodes else 'image_tensor'
+        if initial_input_node_name not in graph.nodes():
+            raise Error('Input node "{}" of the graph is not found. Do not run the Model Optimizer with '
+                        '"--input" command line parameter.'.format(initial_input_node_name))
+        parameter_node = Node(graph, initial_input_node_name)
+
+        input_shape = Shape(graph, {'name': parameter_node.name}).create_node([parameter_node])
+        input_image_hw = node_to_get_shape_value_of_indices(input_shape, [1, 2])  # NHWC layout
+        hwhw = create_op_with_const_inputs(graph, Tile, {1: int64_array([2])}, {'name': 'image_hwhw'}, input_image_hw)
+
+        hwhw_float = Cast(graph,
+                          {'dst_type': data_type_str_to_np(graph.graph['cmd_params'].data_type)}).create_node([hwhw])
+        scaled_anchors = Div(graph, {'name': 'scaled_anchors'}).create_node([yxyx_anchors, hwhw_float])
+
+        flattened_anchors = create_op_with_const_inputs(graph, Reshape, {1: int64_array([1, 1, -1])},
+                                                        {'name': 'flattened_anchors'}, scaled_anchors)
+        cropped_anchors = AttributedClamp(graph, {'min': 0.0, 'max': 1.0, 'name': 'clamped_xyxy',
+                                                  'nchw_layout': True}).create_node([flattened_anchors])
+        # the input tensor "scaled_anchors" for the "flattened_anchors" may be 4D. In order to avoid inserting Transpose
+        # operation mark the "flattened_anchors" with the correct data layout
+        mark_as_correct_data_layout(flattened_anchors)
+
+        # create tensor of shape [4] with variance values which then are tiled by the number of boxes which is obtained
+        # from the 'yxyx_anchors' node
+        variances = Const(graph, {'value': np.float32([1.0 / variance_x, 1.0 / variance_y, 1.0 / variance_width,
+                                                       1.0 / variance_height])}).create_node()
+
+        anchors_shape = Shape(graph, {'name': 'anchors_shape'}).create_node([yxyx_anchors])
+        anchors_count = node_to_get_shape_value_of_indices(anchors_shape, [0])
+        tiled_variances = Tile(graph, {'name': 'tiled_variances'}).create_node([variances, anchors_count])
+        reshaped_tiled_variances = create_op_with_const_inputs(graph, Reshape, {1: int64_array([1, 1, -1])},
+                                                               {'name': 'flattened_variances'}, tiled_variances)
+
+        # now we can merge actual anchors coordinates with a tensor with variances as it is expected by the
+        # DetectionOutput operation
+        duplicate_anchors = Concat(graph, {'axis': 1, 'name': 'anchors_with_variances'}).create_node(
+            [cropped_anchors, reshaped_tiled_variances])
+
+        do = DetectionOutput(graph,
+                             {'background_label_id': 0,
+                              'clip_after_nms': True,
+                              'clip_before_nms': False,
+                              'code_type': 'caffe.PriorBoxParameter.CENTER_SIZE',
+                              'confidence_threshold': 0.0,
+                              'decrease_label_id': False,
+                              'input_height': 1,
+                              'input_width': 1,
+                              'keep_top_k': max_proposals,
+                              'normalized': True,
+                              'num_classes': 2,
+                              'objectness_score': 0,
+                              'share_location': True,
+                              'top_k': 6000,
+                              'variance_encoded_in_target': False,
+                              'nms_threshold': _value_or_raise(match, pipeline_config, 'first_stage_nms_iou_threshold'),
+                              'name': 'first_do',
+                              }).create_node([reshape_box_logits, flattened_conf, duplicate_anchors])
+        # DetectionOutput output tensor has YXYX box coordinates order
+        # switch to 3D to avoid issues that part of the model with 4D shapes should be inferred in NCHW layout
+        do_3d = create_op_with_const_inputs(graph, Squeeze, {1: int64_array(0)}, {'name': do.name + '/SqueezeDO'}, do)
+        mark_as_correct_data_layout(do_3d)
+
+        # DetectionOutput output tensor produces a tensor of tuples with the following 7 elements:
+        # [batch_id, class_id, confidence, x1, y1, x2, y2]. Here we split the DetectionOutput result into the 7
+        # tensors with each of these elements for predictions. Then we crop predicted box coordinates (scaled) to be
+        # within [0, 1] range (as it is predicted in the TF model) and then combine tensors back to the Proposal
+        # operation output format: [batch_id, x1, y1, x2, y2].
+        do_split = create_op_node_with_second_input(graph, Split, int64_array(2), {'num_splits': 7, 'nchw_layout': True,
+                                                                                   'name': do.name + '/Split'}, do_3d)
+
+        xyxy_coord = Concat(graph, {'axis': -1, 'nchw_layout': True, 'in_ports_count': 4,
+                                    'name': do_split.name + '/xyxy'}).create_node()
+        # change output from YXYX to XYXY order
+        do_split.out_port(3).connect(xyxy_coord.in_port(1))
+        do_split.out_port(4).connect(xyxy_coord.in_port(0))
+        do_split.out_port(5).connect(xyxy_coord.in_port(3))
+        do_split.out_port(6).connect(xyxy_coord.in_port(2))
+
+        clamped_xyxy_coord = AttributedClamp(graph, {'min': 0.0, 'max': 1.0, 'name': 'clamped_xyxy',
+                                                     'nchw_layout': True}).create_node([xyxy_coord])
+
+        # prepare final proposal boxes [batch_id, x1, y1, x2, y2]
+        proposal_node = Concat(graph, {'axis': -1, 'nchw_layout': True,  'in_ports_count': 2,
+                                       'name': 'proposals'}).create_node()
+        do_split.out_port(0).connect(proposal_node.in_port(0))
+        clamped_xyxy_coord.out_port(0).connect(proposal_node.in_port(1))
+        return {'proposal_node': ObjectDetectionAPIProposalReplacement.ie_to_tf_proposals(graph, proposal_node, match,
+                                                                                          max_proposals, True)}
+
+    @staticmethod
+    def ie_to_tf_proposals(graph: Graph, proposal_node: Node, match: SubgraphMatch, max_proposals: int,
+                           swap_proposals: bool = False):
+        """
+        Builds a graph which converts the proposals data in IE format to the format of TensorFlow. This includes
+        swapping of XYXY to YXYX (if needed), and cropping the IE output of format [batch, x1, y1, x2, y2] to simply
+        [x1, y1, x2, y2] and reshaping tensor to an appropriate shape.
+
+        :param graph: the graph to operate on
+        :param proposal_node: the node producing IE proposals
+        :param match: the object containing information about matched sub-graph
+        :param max_proposals: maximum number of proposal boxes. Needed for the reshaping of the tensor
+        :param swap_proposals: flag to force swapping proposals for CropAndResize op
+        :return: the node producing output in the TF format.
+        """
         proposal_reshape_2d_node = create_op_node_with_second_input(graph, Reshape, int64_array([-1, 5]),
                                                                     dict(name="reshape_swap_proposals_2d"),
                                                                     proposal_node)
@@ -1225,7 +1394,7 @@ class ObjectDetectionAPIProposalReplacement(FrontReplacementFromConfigFileSubGra
         tf_proposals_crop_reshape_3d_node = create_op_node_with_second_input(graph, Reshape, int64_array([0, -1, 4]),
                                                                              dict(name="reshape_crop_3d"), crop_node)
         mark_input_as_in_correct_layout(tf_proposals_crop_reshape_3d_node, 0)
-        return {'proposal_node': tf_proposals_crop_reshape_3d_node}
+        return tf_proposals_crop_reshape_3d_node
 
 
 class ObjectDetectionAPISSDPostprocessorReplacement(FrontReplacementFromConfigFileSubGraph):
diff --git a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.10.json b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.10.json
index 95be086e34f..b28376f6535 100644
--- a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.10.json
+++ b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.10.json
@@ -36,6 +36,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.13.json b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.13.json
index c412a4f36f2..7f290c37eaa 100644
--- a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.13.json
+++ b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.13.json
@@ -36,6 +36,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.14.json b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.14.json
index c5006518429..a049c7d288d 100644
--- a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.14.json
+++ b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.14.json
@@ -36,6 +36,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.15.json b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.15.json
index 0f50d9e022d..7e06ded418f 100644
--- a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.15.json
+++ b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.15.json
@@ -36,6 +36,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.7.json b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.7.json
index 6eba96ffa20..1fde146e509 100644
--- a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.7.json
+++ b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.7.json
@@ -36,6 +36,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": true,
             "clip_after_nms": false
         },
diff --git a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v2.0.json b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v2.0.json
index 179454be90c..daa4538a637 100644
--- a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v2.0.json
+++ b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v2.0.json
@@ -10,6 +10,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v2.4.json b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v2.4.json
index abaec0accf6..a66a7aeefb8 100644
--- a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v2.4.json
+++ b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v2.4.json
@@ -10,6 +10,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/mask_rcnn_support.json b/model-optimizer/extensions/front/tf/mask_rcnn_support.json
index 383cb948eaf..76b31ceeaa7 100644
--- a/model-optimizer/extensions/front/tf/mask_rcnn_support.json
+++ b/model-optimizer/extensions/front/tf/mask_rcnn_support.json
@@ -36,6 +36,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": true,
             "clip_after_nms": false
         },
diff --git a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.11.json b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.11.json
index a5323525d5e..bece918e85b 100644
--- a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.11.json
+++ b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.11.json
@@ -36,6 +36,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.13.json b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.13.json
index 3f5df3d5eba..f0f585a2fea 100644
--- a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.13.json
+++ b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.13.json
@@ -36,6 +36,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.14.json b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.14.json
index 738c6bfe5bd..9be341c53cd 100644
--- a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.14.json
+++ b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.14.json
@@ -36,6 +36,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.15.json b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.15.json
index 9e0b971fec8..39633de5db9 100644
--- a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.15.json
+++ b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.15.json
@@ -36,6 +36,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.7.json b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.7.json
index 075dee8bc7d..422006778e9 100644
--- a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.7.json
+++ b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.7.json
@@ -36,6 +36,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": true,
             "clip_after_nms": false
         },
diff --git a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v2.0.json b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v2.0.json
index ab868a62065..9562530ac69 100644
--- a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v2.0.json
+++ b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v2.0.json
@@ -10,6 +10,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v2.4.json b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v2.4.json
index 359f962c74f..a54d17d234f 100644
--- a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v2.4.json
+++ b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v2.4.json
@@ -10,6 +10,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/rfcn_support_api_v1.10.json b/model-optimizer/extensions/front/tf/rfcn_support_api_v1.10.json
index c0ed3be4fc9..30177547504 100644
--- a/model-optimizer/extensions/front/tf/rfcn_support_api_v1.10.json
+++ b/model-optimizer/extensions/front/tf/rfcn_support_api_v1.10.json
@@ -35,6 +35,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/rfcn_support_api_v1.13.json b/model-optimizer/extensions/front/tf/rfcn_support_api_v1.13.json
index 74c57f06449..a9d79f523a6 100644
--- a/model-optimizer/extensions/front/tf/rfcn_support_api_v1.13.json
+++ b/model-optimizer/extensions/front/tf/rfcn_support_api_v1.13.json
@@ -35,6 +35,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/front/tf/rfcn_support_api_v1.14.json b/model-optimizer/extensions/front/tf/rfcn_support_api_v1.14.json
index 3e75f4961c7..1f59748fcbb 100644
--- a/model-optimizer/extensions/front/tf/rfcn_support_api_v1.14.json
+++ b/model-optimizer/extensions/front/tf/rfcn_support_api_v1.14.json
@@ -35,6 +35,7 @@
     },
     {
         "custom_attributes": {
+            "operation_to_add": "Proposal",
             "clip_before_nms": false,
             "clip_after_nms": true
         },
diff --git a/model-optimizer/extensions/ops/loop.py b/model-optimizer/extensions/ops/loop.py
index 6aca93bebc8..4089983bc3b 100644
--- a/model-optimizer/extensions/ops/loop.py
+++ b/model-optimizer/extensions/ops/loop.py
@@ -61,6 +61,27 @@ class Loop(TensorIterator):
             'Expected 0 or 1 node with `internal_layer_id`={}, {} found'.format(internal_id, len(suitable_nodes))
         return suitable_nodes[0] if len(suitable_nodes) == 1 else None
 
+    @staticmethod
+    def get_external_nodes_by_internal_id(loop_node: Node, internal_layer_id: int) -> list:
+        """
+        Get a list of nodes from the main graph that are connected with a node with internal_layer_id
+        from the body graph
+
+        :param loop_node: The Loop node
+        :param internal_layer_id: Internal layer ID of the node in the body graph
+        :return: A list of external nodes (from the main graph) that are connected with a node with
+        internal_layer_id from the body graph
+        """
+        for map_item in loop_node.input_port_map:
+            if map_item['internal_layer_id'] == internal_layer_id \
+                    and loop_node.is_in_port_connected(map_item['external_port_id']):
+                return [loop_node.in_port(map_item['external_port_id']).get_source().node]
+        for map_item in loop_node.output_port_map:
+            if map_item['internal_layer_id'] == internal_layer_id \
+                    and loop_node.is_out_port_connected(map_item['external_port_id']):
+                return [dest.node for dest in loop_node.out_port(map_item['external_port_id']).get_destinations()]
+        return []
+
     @staticmethod
     def updated_body_parameters_shape(loop_node: Node):
         """
diff --git a/model-optimizer/extensions/ops/range.py b/model-optimizer/extensions/ops/range.py
index 78c934105db..4b9e8523766 100644
--- a/model-optimizer/extensions/ops/range.py
+++ b/model-optimizer/extensions/ops/range.py
@@ -1,8 +1,6 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import logging as log
-
 import numpy as np
 
 from mo.graph.graph import Node, Graph
diff --git a/model-optimizer/mo/back/offline_transformations.py b/model-optimizer/mo/back/offline_transformations.py
index 1df5f6cb7a0..e97b9826872 100644
--- a/model-optimizer/mo/back/offline_transformations.py
+++ b/model-optimizer/mo/back/offline_transformations.py
@@ -17,16 +17,9 @@ def get_available_transformations():
         return {}
 
 
-def apply_offline_transformations(input_model: str, framework: str, transforms: list):
-    # This variable is only needed by GenerateMappingFile transformation
-    # to produce correct mapping
-    extract_names = framework in ['tf', 'mxnet', 'kaldi']
-
-    from openvino.inference_engine import read_network  # pylint: disable=import-error,no-name-in-module
-    from openvino.offline_transformations import ApplyMOCTransformations, GenerateMappingFile  # pylint: disable=import-error,no-name-in-module
-
-    net = read_network(input_model + "_tmp.xml", input_model + "_tmp.bin")
-
+# net should be openvino.inference_engine.IENetwork type, but IE Engine is still optional dependency
+def apply_moc_transformations(net: object, transforms: list):
+    from openvino.offline_transformations import ApplyMOCTransformations  # pylint: disable=import-error,no-name-in-module
     available_transformations = get_available_transformations()
 
     for name, args in transforms:
@@ -36,6 +29,18 @@ def apply_offline_transformations(input_model: str, framework: str, transforms:
         available_transformations[name](net, **args)
 
     ApplyMOCTransformations(net, False)
+
+
+def apply_offline_transformations(input_model: str, framework: str, transforms: list):
+    # This variable is only needed by GenerateMappingFile transformation
+    # to produce correct mapping
+    extract_names = framework in ['tf', 'mxnet', 'kaldi']
+
+    from openvino.inference_engine import read_network  # pylint: disable=import-error,no-name-in-module
+    from openvino.offline_transformations import GenerateMappingFile  # pylint: disable=import-error,no-name-in-module
+
+    net = read_network(input_model + "_tmp.xml", input_model + "_tmp.bin")
+    apply_moc_transformations(net, transforms)
     net.serialize(input_model + ".xml", input_model + ".bin")
     path_to_mapping = input_model + ".mapping"
     GenerateMappingFile(net, path_to_mapping.encode('utf-8'), extract_names)
diff --git a/model-optimizer/mo/front/extractor.py b/model-optimizer/mo/front/extractor.py
index a0376a6328c..139dc6503eb 100644
--- a/model-optimizer/mo/front/extractor.py
+++ b/model-optimizer/mo/front/extractor.py
@@ -451,6 +451,17 @@ def extract_node_attrs(graph: Graph, extractor: callable):
     return graph
 
 
+def raise_no_node(node_name: str):
+    raise Error('No node with name {}'.format(node_name))
+
+
+def raise_node_name_collision(node_name: str, found_nodes: list):
+    raise Error('Name collision was found, there are several nodes for mask "{}": {}. '
+                'If your intention was to specify port for node, please instead specify node names connected to '
+                'this port. If your intention was to specify the node name, please add port to the node '
+                'name'.format(node_name, found_nodes))
+
+
 def get_node_id_with_ports(graph: Graph, node_name: str, skip_if_no_port=True):
     """
     Extracts port and node ID out of user provided name
@@ -483,12 +494,9 @@ def get_node_id_with_ports(graph: Graph, node_name: str, skip_if_no_port=True):
 
             found_names.append((in_port, out_port, name))
     if len(found_names) == 0:
-        raise Error('No node with name {}'.format(node_name))
+        raise_no_node(node_name)
     if len(found_names) > 1:
-        raise Error('Name collision was found, there are several nodes for mask "{}": {}. '
-                    'If your intention was to specify port for node, please instead specify node names connected to '
-                    'this port. If your intention was to specify the node name, please add port to the node '
-                    'name'.format(node_name, [name for _, _, name in found_names]))
+        raise_node_name_collision(node_name, [name for _, _, name in found_names])
     in_port, out_port, name = found_names[0]
     node_id = graph.get_node_id_by_name(name)
     if in_port is not None:
diff --git a/model-optimizer/mo/front/tf/custom_subgraph_call.py b/model-optimizer/mo/front/tf/custom_subgraph_call.py
index 53ec45a7b0c..ab2062f5a29 100644
--- a/model-optimizer/mo/front/tf/custom_subgraph_call.py
+++ b/model-optimizer/mo/front/tf/custom_subgraph_call.py
@@ -149,3 +149,13 @@ def set_tf_custom_call_node_attrs(node_attrs: dict):
     node_attrs['op'] = 'TFCustomSubgraphCall'
     node_attrs['infer'] = tf_subgraph_infer
     node_attrs['kind'] = 'op'
+
+
+def skip_nodes_by_condition(current_node: Node, condition: callable, forward: bool = False):
+    if forward:
+        while condition(current_node):
+            current_node = current_node.out_node()
+    else:
+        while condition(current_node):
+            current_node = current_node.in_node()
+    return current_node
diff --git a/model-optimizer/mo/main_pdpd.py b/model-optimizer/mo/main_paddle.py
similarity index 84%
rename from model-optimizer/mo/main_pdpd.py
rename to model-optimizer/mo/main_paddle.py
index bfa66cb9aae..a7d590ecfce 100644
--- a/model-optimizer/mo/main_pdpd.py
+++ b/model-optimizer/mo/main_paddle.py
@@ -11,4 +11,4 @@ from ngraph.frontend import FrontEndManager  # pylint: disable=no-name-in-module
 if __name__ == "__main__":
     from mo.main import main
     fem = FrontEndManager()
-    sys.exit(main(get_all_cli_parser(fem), fem, 'pdpd'))
+    sys.exit(main(get_all_cli_parser(fem), fem, 'paddle'))
diff --git a/model-optimizer/mo/moc_frontend/extractor.py b/model-optimizer/mo/moc_frontend/extractor.py
index 95f463bb8fb..b5ddd83b1fd 100644
--- a/model-optimizer/mo/moc_frontend/extractor.py
+++ b/model-optimizer/mo/moc_frontend/extractor.py
@@ -1,35 +1,75 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import logging as log
 import re
-from collections import defaultdict
-from copy import copy
-
-import numpy as np
 
+from mo.front.extractor import raise_no_node, raise_node_name_collision
 from mo.utils.error import Error
 
 from ngraph.frontend import InputModel  # pylint: disable=no-name-in-module,import-error
 
+import numpy as np
+
 
 def decode_name_with_port(input_model: InputModel, node_name: str):
     """
     Decode name with optional port specification w/o traversing all the nodes in the graph
-    TODO: in future node_name can specify input/output port groups and indices (58562)
+    TODO: in future node_name can specify input/output port groups as well as indices (58562)
     :param input_model: Input Model
     :param node_name: user provided node name
     :return: decoded place in the graph
     """
-    # Check exact match with one of the names in the graph first
+    found_nodes = []
+    found_node_names = []
+
     node = input_model.get_place_by_tensor_name(node_name)
     if node:
-        return node
+        found_node_names.append('Tensor:' + node_name)
+        found_nodes.append(node)
+
+    node = input_model.get_place_by_operation_name(node_name)
+    if node:
+        found_node_names.append('Operation:' + node_name)
+        found_nodes.append(node)
+
+    regexp_post = r'(.+):(\d+)'
+    match_post = re.search(regexp_post, node_name)
+    if match_post:
+        node_post = input_model.get_place_by_operation_name(match_post.group(1))
+        if node_post:
+            node_post = node_post.get_output_port(
+                outputPortIndex=int(match_post.group(2)))
+            if node_post:
+                found_node_names.append(match_post.group(1))
+                found_nodes.append(node_post)
+
+    regexp_pre = r'(\d+):(.+)'
+    match_pre = re.search(regexp_pre, node_name)
+    if match_pre:
+        node_pre = input_model.get_place_by_operation_name(match_pre.group(2))
+        if node_pre:
+            node_pre = node_pre.get_input_port(
+                inputPortIndex=int(match_pre.group(1)))
+            if node_pre:
+                found_node_names.append(match_pre.group(2))
+                found_nodes.append(node_pre)
+
+    if len(found_nodes) == 0:
+        raise_no_node(node_name)
+
+    # Check that there is no collision, all found places shall point to same data
+    if not all([n.is_equal_data(found_nodes[0]) for n in found_nodes]):
+        raise_node_name_collision(node_name, found_node_names)
+
+    # TODO: ONNX specific (59408)
+    # To comply with legacy behavior, for ONNX-only there shall be considered additional 2 possibilities
+    # 1) "abc:1" - get_place_by_tensor_name("abc").get_producing_operation().get_output_port(1)
+    # 2) "1:abc" - get_place_by_tensor_name("abc").get_producing_operation().get_input_port(1)
+    # This logic is not going to work with other frontends
 
     # TODO: Add support for input/output group name and port index here (58562)
-    # Legacy frontends use format "number:name:number" to specify input and output port indices
-    # For new frontends this logic shall be extended to additionally support input and output group names
-    raise Error('There is no node with name {}'.format(node_name))
+    # For new frontends logic shall be extended to additionally support input and output group names
+    return found_nodes[0]
 
 
 def fe_input_user_data_repack(input_model: InputModel, input_user_shapes: [None, list, dict, np.ndarray],
diff --git a/model-optimizer/mo/moc_frontend/serialize.py b/model-optimizer/mo/moc_frontend/serialize.py
index 86433c6e36d..dd2533c6702 100644
--- a/model-optimizer/mo/moc_frontend/serialize.py
+++ b/model-optimizer/mo/moc_frontend/serialize.py
@@ -5,7 +5,7 @@ import argparse
 import os
 from mo.pipeline.common import get_ir_version
 from mo.back.ie_ir_ver_2.emitter import append_ir_info
-from mo.utils.cli_parser import get_meta_info
+from mo.utils.cli_parser import get_meta_info, parse_transform
 
 from ngraph import Function         # pylint: disable=no-name-in-module,import-error
 from ngraph import function_to_cnn  # pylint: disable=no-name-in-module,import-error
@@ -15,6 +15,8 @@ def moc_emit_ir(ngraph_function: Function, argv: argparse.Namespace):
     output_dir = argv.output_dir if argv.output_dir != '.' else os.getcwd()
 
     network = function_to_cnn(ngraph_function)
+    from mo.back.offline_transformations import apply_moc_transformations
+    apply_moc_transformations(network, parse_transform(argv.transform))
 
     orig_model_name = os.path.normpath(os.path.join(output_dir, argv.model_name))
     network.serialize(orig_model_name + ".xml", orig_model_name + ".bin")
diff --git a/model-optimizer/mo/ops/deformable_convolution.py b/model-optimizer/mo/ops/deformable_convolution.py
index 5eb573fb214..14c7a39e03e 100644
--- a/model-optimizer/mo/ops/deformable_convolution.py
+++ b/model-optimizer/mo/ops/deformable_convolution.py
@@ -27,4 +27,7 @@ class DeformableConvolution(Op):
     def backend_attrs(self):
         # the same attributes as in a regular convolution and additional attributes 'deformable_group', 'group'
         # and 'bilinear_interpolation_pad'
-        return Convolution(self.graph, {}).backend_attrs() + ['deformable_group', 'group', 'bilinear_interpolation_pad']
+        attrs = Convolution(self.graph, {}).backend_attrs() + ['deformable_group', 'group']
+        if self.get_opset() == 'opset8':
+            attrs.append('bilinear_interpolation_pad')
+        return attrs
diff --git a/model-optimizer/mo/utils/custom_replacement_config.py b/model-optimizer/mo/utils/custom_replacement_config.py
index 47176e36e4a..f36c21c3dd2 100644
--- a/model-optimizer/mo/utils/custom_replacement_config.py
+++ b/model-optimizer/mo/utils/custom_replacement_config.py
@@ -1,7 +1,6 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import fastjsonschema as json_validate
 import json
 import logging as log
 import os
@@ -399,12 +398,18 @@ def load_and_validate_json_config(config_file_name: str):
     :param config_file_name: name of the file to read from.
     :return: A dictionary serialized from json config file.
     """
-
     try:
         with open(config_file_name, 'r') as f:
             json_config = json.load(f)
-            validator = json_validate.compile(schema_dict)
-            validator(json_config)
+            try:
+                import fastjsonschema as json_validate
+
+                validator = json_validate.compile(schema_dict)
+                validator(json_config)
+            except ModuleNotFoundError as e:
+                log.error("Module 'fastjsonschema' for json validation not installed. Please update requirements.",
+                          extra={'is_warning': True})
+
     except Exception as e:
         raise Error("Failed to parse custom replacements configuration file '{}': {}. ".format(config_file_name, e) +
                     refer_to_faq_msg(70)) from e
diff --git a/model-optimizer/mo_pdpd.py b/model-optimizer/mo_paddle.py
similarity index 82%
rename from model-optimizer/mo_pdpd.py
rename to model-optimizer/mo_paddle.py
index abd0d2dd15c..9d1ba8aedb8 100755
--- a/model-optimizer/mo_pdpd.py
+++ b/model-optimizer/mo_paddle.py
@@ -5,4 +5,4 @@
 
 if __name__ == "__main__":
     from mo.subprocess_main import subprocess_main
-    subprocess_main(framework='pdpd')
+    subprocess_main(framework='paddle')
diff --git a/model-optimizer/unit_tests/extensions/back/ChangeOutputTypeAttributes_test.py b/model-optimizer/unit_tests/extensions/back/ChangeOutputTypeAttributes_test.py
new file mode 100644
index 00000000000..b40797397d3
--- /dev/null
+++ b/model-optimizer/unit_tests/extensions/back/ChangeOutputTypeAttributes_test.py
@@ -0,0 +1,122 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+from copy import deepcopy
+
+import numpy as np
+
+from extensions.back.ChangeOutputTypeAttributes import ChangeOutputTypeAttributes
+from extensions.ops.Cast import Cast
+from extensions.ops.range import Range
+from mo.front.common.partial_infer.utils import float32_array
+from mo.middle.passes.convert_data_type import convert_blobs, data_type_str_to_np
+from mo.middle.passes.infer import partial_infer
+from mo.utils.error import Error
+from mo.utils.ir_engine.compare_graphs import compare_graphs
+from unit_tests.utils.graph import build_graph, result, regular_op_with_empty_data, connect
+from unit_tests.utils.graph import valued_const_with_data
+
+
+class ChangeOutputTypeAttributesTests(unittest.TestCase):
+
+    def test_range_correct_case(self):
+        graph, graph_ref = build_range_test_graphs(start=0, limit=10, delta=1, dst_type_str='FP16')
+        ChangeOutputTypeAttributes().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'res', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    # starting from ~1000 FP16 absolute difference between neighbor values is more than 1
+    # fails because of shape inconsistency
+    def test_range_different_values(self):
+        graph, graph_ref = build_range_test_graphs(start=0, limit=50000, delta=1, dst_type_str='FP16')
+        self.assertRaises(Error, ChangeOutputTypeAttributes().find_and_replace_pattern, graph)
+
+    def test_range_out_of_fp16_max(self):
+        graph, graph_ref = build_range_test_graphs(start=0, limit=100000, delta=1, dst_type_str='FP16')
+        self.assertRaises(Error, ChangeOutputTypeAttributes().find_and_replace_pattern, graph)
+
+    def test_range_out_of_fp16_min(self):
+        graph, graph_ref = build_range_test_graphs(start=0, limit=-100000, delta=-1, dst_type_str='FP16')
+        self.assertRaises(Error, ChangeOutputTypeAttributes().find_and_replace_pattern, graph)
+
+    def test_cast_correct_case(self):
+        input_data = np.array([0, 1000, 4, 9, 0])
+        graph, graph_ref = build_cast_test_graphs(input_data, dst_type_str='FP16')
+        ChangeOutputTypeAttributes().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'res', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_cast_out_of_fp16_max(self):
+        input_data = np.array([0, 100000, 4, 9, 0])
+        graph, graph_ref = build_cast_test_graphs(input_data, dst_type_str='FP16')
+        self.assertRaises(Error, ChangeOutputTypeAttributes().find_and_replace_pattern, graph)
+
+    def test_cast_out_of_fp16_min(self):
+        input_data = np.array([0, -100000, 4, 9, 0])
+        graph, graph_ref = build_cast_test_graphs(input_data, dst_type_str='FP16')
+        self.assertRaises(Error, ChangeOutputTypeAttributes().find_and_replace_pattern, graph)
+
+
+def build_range_test_graphs(start=0, limit=10, delta=1, dst_type_str='FP16'):
+    nodes = {
+        **valued_const_with_data('start', float32_array(start)),
+        **valued_const_with_data('limit', float32_array(limit)),
+        **valued_const_with_data('delta', float32_array(delta)),
+        **regular_op_with_empty_data('range', {'type': 'Range', 'op': 'Range',
+                                               'output_type': np.float32,
+                                               'infer': Range.infer}),
+        **result('res'),
+    }
+
+    nodes_ref = deepcopy(nodes)
+    nodes_ref.update({
+        **regular_op_with_empty_data('range', {'type': 'Range', 'op': 'Range',
+                                               'output_type': data_type_str_to_np(dst_type_str),
+                                               'infer': Range.infer}),
+    })
+
+    edges = [
+        *connect('start', '0:range'),
+        *connect('limit', '1:range'),
+        *connect('delta', '2:range'),
+        *connect('range', 'res'),
+    ]
+    graph = build_graph(nodes, edges)
+    graph_ref = build_graph(nodes_ref, edges)
+
+    graph = partial_infer(graph)
+
+    graph.graph['cmd_params'].data_type = dst_type_str
+    convert_blobs(graph, dst_type_str)
+    return graph, graph_ref
+
+
+def build_cast_test_graphs(input_data, dst_type_str='FP16'):
+    nodes = {
+        **valued_const_with_data('input', float32_array(input_data)),
+        **regular_op_with_empty_data('cast', {'type': 'Convert', 'op': 'Cast',
+                                              'dst_type': np.float32,
+                                              'infer': Cast.infer}),
+        **result('res'),
+    }
+
+    nodes_ref = deepcopy(nodes)
+    nodes_ref.update({
+        **regular_op_with_empty_data('cast', {'type': 'Convert', 'op': 'Cast',
+                                              'dst_type': data_type_str_to_np(dst_type_str),
+                                              'infer': Cast.infer}),
+    })
+
+    edges = [
+        *connect('input', 'cast'),
+        *connect('cast', 'res'),
+    ]
+    graph = build_graph(nodes, edges)
+    graph_ref = build_graph(nodes_ref, edges)
+
+    graph = partial_infer(graph)
+
+    graph.graph['cmd_params'].data_type = dst_type_str
+    convert_blobs(graph, dst_type_str)
+    return graph, graph_ref
diff --git a/model-optimizer/unit_tests/mo/frontend_ngraph_test.py b/model-optimizer/unit_tests/mo/frontend_ngraph_test.py
index aaf24d14b92..f440284b8f9 100644
--- a/model-optimizer/unit_tests/mo/frontend_ngraph_test.py
+++ b/model-optimizer/unit_tests/mo/frontend_ngraph_test.py
@@ -34,6 +34,15 @@ def test_frontends():
     assert not status.returncode
 
 
+def test_moc_extractor():
+    setup_env()
+    args = [sys.executable, '-m', 'pytest',
+            os.path.join(os.path.dirname(__file__), 'moc_frontend/moc_extractor_test_actual.py'), '-s']
+
+    status = subprocess.run(args, env=os.environ)
+    assert not status.returncode
+
+
 def test_main_test():
     setup_env()
     args = [sys.executable, '-m', 'pytest',
diff --git a/model-optimizer/unit_tests/mo/moc_frontend/moc_extractor_test_actual.py b/model-optimizer/unit_tests/mo/moc_frontend/moc_extractor_test_actual.py
new file mode 100644
index 00000000000..17ac755c4d1
--- /dev/null
+++ b/model-optimizer/unit_tests/mo/moc_frontend/moc_extractor_test_actual.py
@@ -0,0 +1,246 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+from mo.moc_frontend.extractor import decode_name_with_port
+from mo.utils.error import Error
+
+import pytest
+
+
+mock_available = True
+
+try:
+    # pylint: disable=no-name-in-module,import-error
+    from mock_mo_python_api import get_model_statistic, get_place_statistic, \
+        clear_frontend_statistic, clear_model_statistic, clear_place_statistic, \
+        clear_setup, set_equal_data, set_max_port_counts
+
+    # pylint: disable=no-name-in-module,import-error
+    from ngraph.frontend import FrontEndManager
+
+except Exception:
+    print("No mock frontend API available,"
+          "ensure to use -DENABLE_TESTS=ON option when running these tests")
+    mock_available = False
+
+# FrontEndManager shall be initialized and destroyed after all tests finished
+# This is because destroy of FrontEndManager will unload all plugins,
+# no objects shall exist after this
+if mock_available:
+    fem = FrontEndManager()
+
+mock_needed = pytest.mark.skipif(not mock_available,
+                                 reason="mock MO fe is not available")
+
+
+class TestMainFrontend(unittest.TestCase):
+    def setUp(self):
+        clear_frontend_statistic()
+        clear_model_statistic()
+        clear_place_statistic()
+        clear_setup()
+        set_max_port_counts(10, 10)
+        self.fe = fem.load_by_framework('mock_mo_ngraph_frontend')
+        self.model = self.fe.load('abc.bin')
+
+    # Mock model has 'tensor' tensor place
+    @mock_needed
+    def test_decode_name_with_port_tensor(self):
+        node = decode_name_with_port(self.model, "tensor")
+        model_stat = get_model_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 1
+        assert node
+
+    # Mock model has 'operation' operation place
+    @mock_needed
+    def test_decode_name_with_port_op(self):
+        node = decode_name_with_port(self.model, "operation")
+        model_stat = get_model_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 1
+        assert node
+
+    # pylint: disable=wrong-spelling-in-comment
+    # Mock model doesn't have 'mocknoname' place
+    @mock_needed
+    def test_decode_name_with_port_noname(self):
+        with self.assertRaisesRegex(Error, 'No\\ node\\ with\\ name.*mocknoname*'):
+            decode_name_with_port(self.model, 'mocknoname')
+        model_stat = get_model_statistic()
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 1
+
+    # Mock model has both tensor and operation with same name and non-equal data
+    # Collision is expected
+    @mock_needed
+    def test_decode_name_with_port_collision_op_tensor(self):
+        with self.assertRaisesRegex(Error, 'Name\\ collision.*tensorAndOp*'):
+            decode_name_with_port(self.model, 'tensorAndOp')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 1
+        assert place_stat.is_equal_data > 0
+
+    # Mock model has 'operation' and output port up to 10
+    @mock_needed
+    def test_decode_name_with_port_delim_op_out(self):
+        node = decode_name_with_port(self.model, 'operation:7')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 2
+        assert place_stat.get_output_port == 1
+        assert place_stat.lastArgInt == 7
+        assert node
+
+    # Mock model has 'operation' and input port up to 10
+    @mock_needed
+    def test_decode_name_with_port_delim_op_in(self):
+        node = decode_name_with_port(self.model, '7:operation')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 2
+        assert place_stat.get_input_port == 1
+        assert place_stat.lastArgInt == 7
+        assert node
+
+    # Mock model has 'operation' and 'operation:0' op places, collision is expected
+    @mock_needed
+    def test_decode_name_with_port_delim_op_collision_out(self):
+        with self.assertRaisesRegex(Error, 'Name\\ collision(?!.*Tensor.*).*operation\\:0*'):
+            decode_name_with_port(self.model, 'operation:0')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 2
+        assert place_stat.is_equal_data > 0
+        assert place_stat.get_output_port == 1
+        assert place_stat.lastArgInt == 0
+
+    # Mock model has 'operation' and '0:operation' op places, collision is expected
+    @mock_needed
+    def test_decode_name_with_port_delim_op_collision_in(self):
+        with self.assertRaisesRegex(Error, 'Name\\ collision(?!.*Tensor.*).*0\\:operation*'):
+            decode_name_with_port(self.model, '0:operation')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 2
+        assert place_stat.is_equal_data > 0
+        assert place_stat.get_input_port == 1
+        assert place_stat.lastArgInt == 0
+
+    # Mock model has 'tensor' and 'tensor:0' tensor places, no collision is expected
+    @mock_needed
+    def test_decode_name_with_port_delim_tensor_no_collision_out(self):
+        node = decode_name_with_port(self.model, 'tensor:0')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 2
+        assert place_stat.get_output_port == 0
+        assert node
+
+    # Mock model has 'tensor' and '0:tensor' tensor places, no collision is expected
+    @mock_needed
+    def test_decode_name_with_port_delim_tensor_no_collision_in(self):
+        node = decode_name_with_port(self.model, '0:tensor')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 2
+        assert place_stat.get_input_port == 0
+        assert node
+
+    # Mock model doesn't have such '1234:operation' or output port=1234 for 'operation'
+    @mock_needed
+    def test_decode_name_with_port_delim_no_port_out(self):
+        with self.assertRaisesRegex(Error, 'No\\ node\\ with\\ name.*operation\\:1234*'):
+            decode_name_with_port(self.model, 'operation:1234')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 2
+        assert place_stat.get_output_port == 1
+        assert place_stat.lastArgInt == 1234
+
+    # Mock model doesn't have such '1234:operation' or input port=1234 for 'operation'
+    @mock_needed
+    def test_decode_name_with_port_delim_no_port_in(self):
+        with self.assertRaisesRegex(Error, 'No\\ node\\ with\\ name.*1234\\:operation*'):
+            decode_name_with_port(self.model, '1234:operation')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 2
+        assert place_stat.get_input_port == 1
+        assert place_stat.lastArgInt == 1234
+
+    # Mock model has tensor with name 'conv2d:0' and operation 'conv2d' with output port = 1
+    # It is setup to return 'is_equal_data=True' for these tensor and port
+    # So no collision is expected
+    @mock_needed
+    def test_decode_name_with_port_delim_equal_data_out(self):
+        set_equal_data('conv2d', 'conv2d')
+        node = decode_name_with_port(self.model, 'conv2d:0')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 2
+        assert place_stat.get_output_port == 1
+        assert place_stat.is_equal_data > 0
+        assert node
+
+    # Mock model has tensor with name '0:conv2d' and operation 'conv2d' with input port = 1
+    # It is setup to return 'is_equal_data=True' for these tensor and port
+    # So no collision is expected
+    @mock_needed
+    def test_decode_name_with_port_delim_equal_data_in(self):
+        set_equal_data('conv2d', 'conv2d')
+        node = decode_name_with_port(self.model, '0:conv2d')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 2
+        assert place_stat.get_input_port == 1
+        assert place_stat.is_equal_data > 0
+        assert node
+
+    # Stress case: Mock model has:
+    # Tensor '8:9'
+    # Operation '8:9'
+    # Operation '8' with output port = 9
+    # Operation '9' with input port = 8
+    # All places point to same data - no collision is expected
+    @mock_needed
+    def test_decode_name_with_port_delim_all_same_data(self):
+        set_equal_data('8', '9')
+        node = decode_name_with_port(self.model, '8:9')
+        model_stat = get_model_statistic()
+        place_stat = get_place_statistic()
+
+        assert model_stat.get_place_by_tensor_name == 1
+        assert model_stat.get_place_by_operation_name == 3
+        assert place_stat.get_input_port == 1
+        assert place_stat.get_output_port == 1
+        # At least 3 comparisons of places are expected
+        assert place_stat.is_equal_data > 2
+        assert node
diff --git a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.cpp b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.cpp
index 9ddbba38040..17c647e35b5 100644
--- a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.cpp
+++ b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.cpp
@@ -14,6 +14,11 @@ FeStat FrontEndMockPy::m_stat = {};
 ModelStat InputModelMockPy::m_stat = {};
 PlaceStat PlaceMockPy::m_stat = {};
 
+std::string MockSetup::m_equal_data_node1 = {};
+std::string MockSetup::m_equal_data_node2 = {};
+int MockSetup::m_max_input_port_index = 0;
+int MockSetup::m_max_output_port_index = 0;
+
 PartialShape InputModelMockPy::m_returnShape = {};
 
 extern "C" MOCK_API FrontEndVersion GetAPIVersion()
diff --git a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.hpp b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.hpp
index e6b91764a0d..eb8182132f9 100644
--- a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.hpp
+++ b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_ngraph_frontend/mock_mo_frontend.hpp
@@ -21,6 +21,35 @@ using namespace ngraph;
 using namespace ngraph::frontend;
 
 ////////////////////////////////
+/// \brief This structure holds number static setup values
+/// It will be used by Python unit tests to setup particular mock behavior
+struct MOCK_API MockSetup
+{
+    static std::string m_equal_data_node1;
+    static std::string m_equal_data_node2;
+    static int m_max_input_port_index;
+    static int m_max_output_port_index;
+
+    static void clear_setup()
+    {
+        m_equal_data_node1 = {};
+        m_equal_data_node2 = {};
+        m_max_input_port_index = 0;
+        m_max_output_port_index = 0;
+    }
+
+    static void set_equal_data(const std::string& node1, const std::string& node2)
+    {
+        m_equal_data_node1 = node1;
+        m_equal_data_node2 = node2;
+    }
+
+    static void set_max_port_counts(int max_input, int max_output)
+    {
+        m_max_input_port_index = max_input;
+        m_max_output_port_index = max_output;
+    }
+};
 
 /// \brief This structure holds number of calls of particular methods of Place objects
 /// It will be used by Python unit tests to verify that appropriate API
@@ -33,6 +62,7 @@ struct MOCK_API PlaceStat
     int m_is_input = 0;
     int m_is_output = 0;
     int m_is_equal = 0;
+    int m_is_equal_data = 0;
 
     // Arguments tracking
     std::string m_lastArgString;
@@ -46,6 +76,7 @@ struct MOCK_API PlaceStat
     int is_input() const { return m_is_input; }
     int is_output() const { return m_is_output; }
     int is_equal() const { return m_is_equal; }
+    int is_equal_data() const { return m_is_equal_data; }
 
     // Arguments getters
     std::string get_lastArgString() const { return m_lastArgString; }
@@ -60,10 +91,14 @@ class MOCK_API PlaceMockPy : public Place
 {
     static PlaceStat m_stat;
     std::string m_name;
+    bool m_is_op = false;
+    int m_portIndex = -1;
 
 public:
-    PlaceMockPy(const std::string& name = {})
+    PlaceMockPy(const std::string& name = {}, bool is_op = false, int portIndex = -1)
         : m_name(name)
+        , m_is_op(is_op)
+        , m_portIndex(portIndex)
     {
     }
 
@@ -84,7 +119,11 @@ public:
     {
         m_stat.m_get_input_port++;
         m_stat.m_lastArgInt = inputPortIndex;
-        return std::make_shared<PlaceMockPy>();
+        if (inputPortIndex < MockSetup::m_max_input_port_index)
+        {
+            return std::make_shared<PlaceMockPy>(m_name, false, inputPortIndex);
+        }
+        return nullptr;
     }
 
     Place::Ptr get_input_port(const std::string& inputName) const override
@@ -114,7 +153,11 @@ public:
     {
         m_stat.m_get_output_port++;
         m_stat.m_lastArgInt = outputPortIndex;
-        return std::make_shared<PlaceMockPy>();
+        if (outputPortIndex < MockSetup::m_max_output_port_index)
+        {
+            return std::make_shared<PlaceMockPy>(m_name, false, outputPortIndex);
+        }
+        return nullptr;
     }
 
     Place::Ptr get_output_port(const std::string& outputName) const override
@@ -149,7 +192,27 @@ public:
     {
         m_stat.m_is_equal++;
         m_stat.m_lastArgPlace = another;
-        return m_name == another->get_names().at(0);
+        std::shared_ptr<PlaceMockPy> mock = std::dynamic_pointer_cast<PlaceMockPy>(another);
+        return m_name == mock->m_name && m_is_op == mock->m_is_op &&
+               m_portIndex == mock->m_portIndex;
+    }
+
+    bool is_equal_data(Ptr another) const override
+    {
+        m_stat.m_is_equal_data++;
+        m_stat.m_lastArgPlace = another;
+        std::shared_ptr<PlaceMockPy> mock = std::dynamic_pointer_cast<PlaceMockPy>(another);
+        if (!MockSetup::m_equal_data_node1.empty() && !MockSetup::m_equal_data_node2.empty())
+        {
+            if ((mock->m_name.find(MockSetup::m_equal_data_node1) != std::string::npos ||
+                 mock->m_name.find(MockSetup::m_equal_data_node2) != std::string::npos) &&
+                (m_name.find(MockSetup::m_equal_data_node1) != std::string::npos ||
+                 m_name.find(MockSetup::m_equal_data_node2) != std::string::npos))
+            {
+                return true;
+            }
+        }
+        return mock->m_is_op == m_is_op;
     }
 
     //---------------Stat--------------------
@@ -167,6 +230,7 @@ struct MOCK_API ModelStat
     int m_get_inputs = 0;
     int m_get_outputs = 0;
     int m_get_place_by_tensor_name = 0;
+    int m_get_place_by_operation_name = 0;
     int m_set_partial_shape = 0;
     int m_get_partial_shape = 0;
     int m_set_element_type = 0;
@@ -190,6 +254,7 @@ struct MOCK_API ModelStat
     int extract_subgraph() const { return m_extract_subgraph; }
     int override_all_inputs() const { return m_override_all_inputs; }
     int override_all_outputs() const { return m_override_all_outputs; }
+    int get_place_by_operation_name() const { return m_get_place_by_operation_name; }
     int get_place_by_tensor_name() const { return m_get_place_by_tensor_name; }
     int set_partial_shape() const { return m_set_partial_shape; }
     int get_partial_shape() const { return m_get_partial_shape; }
@@ -208,12 +273,31 @@ struct MOCK_API ModelStat
 /// \brief Mock implementation of InputModel
 /// Every call increments appropriate counters in statistic and stores argument values to statistics
 /// as well
-/// ("mock_output1", "mock_output2")
 class MOCK_API InputModelMockPy : public InputModel
 {
     static ModelStat m_stat;
     static PartialShape m_returnShape;
 
+    std::set<std::string> m_operations = {
+        "8", "9", "8:9", "operation", "operation:0", "0:operation", "tensorAndOp", "conv2d"};
+    std::set<std::string> m_tensors = {"8:9",
+                                       "tensor",
+                                       "tensor:0",
+                                       "0:tensor",
+                                       "tensorAndOp",
+                                       "conv2d:0",
+                                       "0:conv2d",
+                                       "mock_input1",
+                                       "mock_input2",
+                                       "newInput1",
+                                       "newIn1",
+                                       "newIn2",
+                                       "mock_output1",
+                                       "mock_output2",
+                                       "new_output2",
+                                       "newOut1",
+                                       "newOut2"};
+
 public:
     std::vector<Place::Ptr> get_inputs() const override
     {
@@ -229,11 +313,26 @@ public:
                 std::make_shared<PlaceMockPy>("mock_output2")};
     }
 
+    Place::Ptr get_place_by_operation_name(const std::string& opName) const override
+    {
+        m_stat.m_get_place_by_operation_name++;
+        m_stat.m_lastArgString = opName;
+        if (m_operations.count(opName))
+        {
+            return std::make_shared<PlaceMockPy>(opName, true);
+        }
+        return nullptr;
+    }
+
     Place::Ptr get_place_by_tensor_name(const std::string& tensorName) const override
     {
         m_stat.m_get_place_by_tensor_name++;
         m_stat.m_lastArgString = tensorName;
-        return std::make_shared<PlaceMockPy>(tensorName);
+        if (m_tensors.count(tensorName))
+        {
+            return std::make_shared<PlaceMockPy>(tensorName);
+        }
+        return nullptr;
     }
 
     void override_all_outputs(const std::vector<Place::Ptr>& outputs) override
@@ -319,7 +418,7 @@ public:
 
     static void clear_stat() { m_stat = {}; }
 
-protected:
+private:
     InputModel::Ptr load_impl(const std::vector<std::shared_ptr<Variant>>& params) const override
     {
         if (params.size() > 0 && is_type<VariantWrapper<std::string>>(params[0]))
diff --git a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/mock_mo_python_api.cpp b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/mock_mo_python_api.cpp
index d5b79789326..d2d17042cfc 100644
--- a/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/mock_mo_python_api.cpp
+++ b/model-optimizer/unit_tests/mock_mo_frontend/mock_mo_python_api/mock_mo_python_api.cpp
@@ -21,6 +21,13 @@ static void register_mock_frontend_stat(py::module m)
     feStat.def_property_readonly("convert_model", &FeStat::convert_model);
 }
 
+static void register_mock_setup(py::module m)
+{
+    m.def("clear_setup", &MockSetup::clear_setup);
+    m.def("set_equal_data", &MockSetup::set_equal_data);
+    m.def("set_max_port_counts", &MockSetup::set_max_port_counts);
+}
+
 static void register_mock_model_stat(py::module m)
 {
     m.def("get_model_statistic", &InputModelMockPy::get_stat);
@@ -30,6 +37,8 @@ static void register_mock_model_stat(py::module m)
     py::class_<ModelStat> mdlStat(m, "ModelStat", py::dynamic_attr());
     mdlStat.def_property_readonly("get_inputs", &ModelStat::get_inputs);
     mdlStat.def_property_readonly("get_outputs", &ModelStat::get_outputs);
+    mdlStat.def_property_readonly("get_place_by_operation_name",
+                                  &ModelStat::get_place_by_operation_name);
     mdlStat.def_property_readonly("get_place_by_tensor_name", &ModelStat::get_place_by_tensor_name);
 
     mdlStat.def_property_readonly("set_partial_shape", &ModelStat::set_partial_shape);
@@ -66,12 +75,14 @@ static void register_mock_place_stat(py::module m)
     placeStat.def_property_readonly("is_input", &PlaceStat::is_input);
     placeStat.def_property_readonly("is_output", &PlaceStat::is_output);
     placeStat.def_property_readonly("is_equal", &PlaceStat::is_equal);
+    placeStat.def_property_readonly("is_equal_data", &PlaceStat::is_equal_data);
 }
 
 PYBIND11_MODULE(mock_mo_python_api, m)
 {
     m.doc() = "Mock frontend call counters for testing Pyngraph frontend bindings";
     register_mock_frontend_stat(m);
+    register_mock_setup(m);
     register_mock_model_stat(m);
     register_mock_place_stat(m);
 }
diff --git a/ngraph/core/CMakeLists.txt b/ngraph/core/CMakeLists.txt
index c6ee2a2a270..70be17b1281 100644
--- a/ngraph/core/CMakeLists.txt
+++ b/ngraph/core/CMakeLists.txt
@@ -43,6 +43,9 @@ target_link_libraries(ngraph PRIVATE ngraph::builder ngraph::reference)
 
 ie_mark_target_as_cc(ngraph)
 
+ov_ncc_naming_style(FOR_TARGET ngraph
+                    INCLUDE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include")
+
 add_clang_format_target(ngraph_clang FOR_TARGETS ngraph)
 
 if(NOT BUILD_SHARED_LIBS)
diff --git a/ngraph/core/include/ngraph/descriptor/input.hpp b/ngraph/core/include/ngraph/descriptor/input.hpp
index 1941c4c4c05..8fc37ec790a 100644
--- a/ngraph/core/include/ngraph/descriptor/input.hpp
+++ b/ngraph/core/include/ngraph/descriptor/input.hpp
@@ -8,13 +8,12 @@
 #include <memory>
 
 #include "ngraph/descriptor/tensor.hpp"
+#include "ngraph/variant.hpp"
 
 namespace ngraph
 {
     class Node;
 
-    class Variant;
-
     namespace descriptor
     {
         class Output;
@@ -54,8 +53,6 @@ namespace ngraph
             /// \return the tensor of the connected output
             Tensor& get_tensor();
 
-            using RTMap = std::map<std::string, std::shared_ptr<Variant>>;
-
             RTMap& get_rt_info() { return m_rt_info; }
             const RTMap& get_rt_info() const { return m_rt_info; }
 
diff --git a/ngraph/core/include/ngraph/descriptor/output.hpp b/ngraph/core/include/ngraph/descriptor/output.hpp
index 8df7f68df02..85ffeb1f1e9 100644
--- a/ngraph/core/include/ngraph/descriptor/output.hpp
+++ b/ngraph/core/include/ngraph/descriptor/output.hpp
@@ -13,6 +13,7 @@
 #include "ngraph/descriptor/input.hpp"
 #include "ngraph/descriptor/tensor.hpp"
 #include "ngraph/node_output.hpp"
+#include "ngraph/variant.hpp"
 
 namespace ngraph
 {
@@ -23,8 +24,6 @@ namespace ngraph
     // systems (namely macOS).
     class Node;
 
-    class Variant;
-
     namespace descriptor
     {
         // Describes an output tensor of an op
@@ -54,8 +53,6 @@ namespace ngraph
             const std::vector<Input*>& get_inputs() const { return m_inputs; }
             Tensor& get_tensor() const;
 
-            using RTMap = std::map<std::string, std::shared_ptr<Variant>>;
-
             RTMap& get_rt_info() { return m_rt_info; }
             const RTMap& get_rt_info() const { return m_rt_info; }
             /// \return the shape of the output
diff --git a/ngraph/core/include/ngraph/descriptor/tensor.hpp b/ngraph/core/include/ngraph/descriptor/tensor.hpp
index 381e528e531..4dc14e57068 100644
--- a/ngraph/core/include/ngraph/descriptor/tensor.hpp
+++ b/ngraph/core/include/ngraph/descriptor/tensor.hpp
@@ -4,7 +4,9 @@
 
 #pragma once
 
+#include <atomic>
 #include <memory>
+#include <mutex>
 #include <string>
 #include <unordered_set>
 
@@ -74,16 +76,24 @@ namespace ngraph
         protected:
             element::Type m_element_type;
 
-            // TODO(amprocte): For now we are maintaining both m_shape and m_partial_shape fields,
-            //    with m_shape possibly being invalid (get_shape will throw an exception if it
-            //    is). This is because get_shape() returns a const reference. I think ideally we
-            //    should refactor so that get_shape returns by value.
-            Shape m_shape;
-            PartialShape m_partial_shape;
-            Node* m_node{nullptr};
-            HostTensorPtr m_lower_value, m_upper_value;
-            size_t m_node_output_number{0};
+            // TODO: remove along with get_shape
+            // Initially there was ngraph::Shape m_shape only available to keep shape information.
+            // Support for dynamic shapes required transition to ngraph::PartialShape.
+            // To smoothly transition to ngraph::PartialShape we introduced m_partial_shape
+            // and kept m_shape in sync with m_partial_shape. Synchronization point was placed
+            // in set_partial_shape which dramatically affected performance of ngraph::Function
+            // validation. Since we have started the transition to ngraph::PartialShape and reduced
+            // ngraph::Shape usage the only user of m_shape was get_shape method with signature:
+            // const Shape& descriptor::Tensor::get_shape() const
+            // It was decided to move m_shape and m_partial_shape synchronization point there and
+            // to keep methods signature backward compatible.
+            mutable std::mutex shape_mutex;
+            mutable std::atomic_bool m_shape_changed;
+            mutable Shape m_shape;
+            // TODO: end
 
+            PartialShape m_partial_shape;
+            HostTensorPtr m_lower_value, m_upper_value;
             std::string m_name;
             std::unordered_set<std::string> m_names;
         };
diff --git a/ngraph/core/include/ngraph/enum_names.hpp b/ngraph/core/include/ngraph/enum_names.hpp
index 213613d8064..8dbdf6f5cff 100644
--- a/ngraph/core/include/ngraph/enum_names.hpp
+++ b/ngraph/core/include/ngraph/enum_names.hpp
@@ -28,7 +28,7 @@ namespace ngraph
                 });
                 return rc;
             };
-            for (auto p : get().m_string_enums)
+            for (const auto& p : get().m_string_enums)
             {
                 if (to_lower(p.first) == to_lower(name))
                 {
@@ -41,7 +41,7 @@ namespace ngraph
         /// Converts enum values to strings
         static const std::string& as_string(EnumType e)
         {
-            for (auto& p : get().m_string_enums)
+            for (const auto& p : get().m_string_enums)
             {
                 if (p.second == e)
                 {
diff --git a/ngraph/core/include/ngraph/env_util.hpp b/ngraph/core/include/ngraph/env_util.hpp
index b8adecf880d..2031a2055c2 100644
--- a/ngraph/core/include/ngraph/env_util.hpp
+++ b/ngraph/core/include/ngraph/env_util.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <cstdint>
 #include <string>
 
 #include <ngraph/ngraph_visibility.hpp>
diff --git a/ngraph/core/include/ngraph/graph_util.hpp b/ngraph/core/include/ngraph/graph_util.hpp
index fda65edffff..cfc5c295984 100644
--- a/ngraph/core/include/ngraph/graph_util.hpp
+++ b/ngraph/core/include/ngraph/graph_util.hpp
@@ -365,7 +365,8 @@ namespace ngraph
     bool is_post_dominated(Node* X, Node* Y);
 
     NGRAPH_API
-    bool is_equal_to_const_value(std::string const_value, const Output<Node>& reduce_constant);
+    bool is_equal_to_const_value(const std::string& const_value,
+                                 const Output<Node>& reduce_constant);
 
     // input nodes are cloned and returned
     // NodeMap input may contain default node mapping i.e. pre-cloned nodes
diff --git a/ngraph/core/include/ngraph/interval.hpp b/ngraph/core/include/ngraph/interval.hpp
index 08302289f99..c5cb5453d02 100644
--- a/ngraph/core/include/ngraph/interval.hpp
+++ b/ngraph/core/include/ngraph/interval.hpp
@@ -41,9 +41,16 @@ namespace ngraph
         Interval& operator=(const Interval& interval) = default;
 
         /// \brief The number of elements in the interval. Zero if max < min.
-        size_type size() const;
+        size_type size() const
+        {
+            if (m_max_val == s_max)
+            {
+                return m_min_val == s_max ? 0 : s_max;
+            }
+            return m_max_val - m_min_val + 1;
+        }
         /// \brief Returns true if the interval has no elements
-        bool empty() const;
+        bool empty() const { return m_min_val == s_max; }
         /// \brief the inclusive lower bound of the interval
         value_type get_min_val() const { return m_min_val; }
         /// \brief Set the inclusive lower bound of the interval
@@ -84,7 +91,7 @@ namespace ngraph
         Interval& operator&=(const Interval& interval);
 
         /// \brief True if this interval includes value
-        bool contains(value_type value) const;
+        bool contains(value_type value) const { return m_min_val <= value && value <= m_max_val; }
         /// \brief True if this interval includes all the values in interval
         bool contains(const Interval& interval) const;
 
@@ -93,10 +100,6 @@ namespace ngraph
 
     protected:
         void canonicalize();
-        static value_type clip(value_type value);
-        static value_type clip_times(value_type a, value_type b);
-        static value_type clip_add(value_type a, value_type b);
-        static value_type clip_minus(value_type a, value_type b);
 
         value_type m_min_val{0};
         value_type m_max_val{s_max};
diff --git a/ngraph/core/include/ngraph/node.hpp b/ngraph/core/include/ngraph/node.hpp
index 3d4880298b9..a5080af6ef9 100644
--- a/ngraph/core/include/ngraph/node.hpp
+++ b/ngraph/core/include/ngraph/node.hpp
@@ -35,6 +35,7 @@
 #include "ngraph/output_vector.hpp"
 #include "ngraph/strides.hpp"
 #include "ngraph/type.hpp"
+#include "ngraph/variant.hpp"
 
 namespace ngraph
 {
@@ -45,7 +46,6 @@ namespace ngraph
     class Output;
 
     class AttributeVisitor;
-    class Variant;
     class Node;
 
     class Function;
diff --git a/ngraph/core/include/ngraph/node_input.hpp b/ngraph/core/include/ngraph/node_input.hpp
index d78bcf50b25..1e81f6650aa 100644
--- a/ngraph/core/include/ngraph/node_input.hpp
+++ b/ngraph/core/include/ngraph/node_input.hpp
@@ -11,6 +11,7 @@
 #include "ngraph/partial_shape.hpp"
 #include "ngraph/shape.hpp"
 #include "ngraph/type/element_type.hpp"
+#include "ngraph/variant.hpp"
 
 namespace ngraph
 {
@@ -24,8 +25,6 @@ namespace ngraph
     {
     };
 
-    class Variant;
-
     /// \brief A handle for one of a node's inputs.
     template <>
     class NGRAPH_API Input<Node>
@@ -61,7 +60,6 @@ namespace ngraph
         /// \param new_source_output A handle for the output that will replace this input's source.
         void replace_source_output(const Output<Node>& new_source_output) const;
 
-        using RTMap = std::map<std::string, std::shared_ptr<Variant>>;
         /// \return The reference to runtime info map
         RTMap& get_rt_info();
         /// \return The constant reference to runtime info map
@@ -110,7 +108,6 @@ namespace ngraph
         /// \return true if this input is relevant to its node's output values; else false.
         bool get_is_relevant_to_values() const;
 
-        using RTMap = std::map<std::string, std::shared_ptr<Variant>>;
         /// \return The constant reference to runtime info map
         const RTMap& get_rt_info() const;
 
diff --git a/ngraph/core/include/ngraph/node_output.hpp b/ngraph/core/include/ngraph/node_output.hpp
index 1e52e4f8576..01358a8b7b3 100644
--- a/ngraph/core/include/ngraph/node_output.hpp
+++ b/ngraph/core/include/ngraph/node_output.hpp
@@ -12,6 +12,7 @@
 #include "ngraph/partial_shape.hpp"
 #include "ngraph/shape.hpp"
 #include "ngraph/type/element_type.hpp"
+#include "ngraph/variant.hpp"
 
 namespace ngraph
 {
@@ -25,8 +26,6 @@ namespace ngraph
     {
     };
 
-    class Variant;
-
     /// \brief A handle for one of a node's outputs.
     template <>
     class NGRAPH_API Output<Node>
@@ -79,7 +78,6 @@ namespace ngraph
         /// \return The partial shape of the output referred to by this output handle.
         const PartialShape& get_partial_shape() const;
 
-        using RTMap = std::map<std::string, std::shared_ptr<Variant>>;
         /// \return The reference to runtime info map
         RTMap& get_rt_info();
         /// \return The constant reference to runtime info map
@@ -161,7 +159,6 @@ namespace ngraph
         /// \return The partial shape of the output referred to by this output handle.
         const PartialShape& get_partial_shape() const;
 
-        using RTMap = std::map<std::string, std::shared_ptr<Variant>>;
         /// \return The constant reference to runtime info map
         const RTMap& get_rt_info() const;
         /// \return A set containing handles for all inputs targeted by the output referenced by
diff --git a/ngraph/core/include/ngraph/op/abs.hpp b/ngraph/core/include/ngraph/op/abs.hpp
index 38034f42316..d41ee5ce8af 100644
--- a/ngraph/core/include/ngraph/op/abs.hpp
+++ b/ngraph/core/include/ngraph/op/abs.hpp
@@ -23,7 +23,7 @@ namespace ngraph
                 const NodeTypeInfo& get_type_info() const override { return type_info; }
                 /// \brief Constructs an absolute value operation.
                 Abs() = default;
-                bool visit_attributes(AttributeVisitor& visitor) override { return true; }
+                bool visit_attributes(AttributeVisitor&) override { return true; }
                 /// \brief Constructs an absolute value operation.
                 ///
                 /// \param arg Output that produces the input tensor.<br>
diff --git a/ngraph/core/include/ngraph/op/acos.hpp b/ngraph/core/include/ngraph/op/acos.hpp
index 3175f822401..2840c853129 100644
--- a/ngraph/core/include/ngraph/op/acos.hpp
+++ b/ngraph/core/include/ngraph/op/acos.hpp
@@ -31,7 +31,7 @@ namespace ngraph
                 /// Output `[d1, ...]`
                 ///
                 Acos(const Output<Node>& arg);
-                bool visit_attributes(AttributeVisitor& visitor) override { return true; }
+                bool visit_attributes(AttributeVisitor&) override { return true; }
                 std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
                 bool evaluate(const HostTensorVector& outputs,
diff --git a/ngraph/core/include/ngraph/op/acosh.hpp b/ngraph/core/include/ngraph/op/acosh.hpp
index ad9ac7d2821..5c7e57b8b30 100644
--- a/ngraph/core/include/ngraph/op/acosh.hpp
+++ b/ngraph/core/include/ngraph/op/acosh.hpp
@@ -34,7 +34,7 @@ namespace ngraph
 
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
-                bool visit_attributes(AttributeVisitor& visitor) override { return true; }
+                bool visit_attributes(AttributeVisitor&) override { return true; }
                 bool evaluate(const HostTensorVector& outputs,
                               const HostTensorVector& inputs) const override;
                 bool has_evaluate() const override;
diff --git a/ngraph/core/include/ngraph/op/asin.hpp b/ngraph/core/include/ngraph/op/asin.hpp
index 7fc0b3786c5..6768632d220 100644
--- a/ngraph/core/include/ngraph/op/asin.hpp
+++ b/ngraph/core/include/ngraph/op/asin.hpp
@@ -34,7 +34,7 @@ namespace ngraph
 
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
-                bool visit_attributes(AttributeVisitor& visitor) override { return true; }
+                bool visit_attributes(AttributeVisitor&) override { return true; }
                 bool evaluate(const HostTensorVector& outputs,
                               const HostTensorVector& inputs) const override;
                 bool has_evaluate() const override;
diff --git a/ngraph/core/include/ngraph/op/asinh.hpp b/ngraph/core/include/ngraph/op/asinh.hpp
index 62a4eba36a4..dd075acce1d 100644
--- a/ngraph/core/include/ngraph/op/asinh.hpp
+++ b/ngraph/core/include/ngraph/op/asinh.hpp
@@ -34,7 +34,7 @@ namespace ngraph
 
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
-                bool visit_attributes(AttributeVisitor& visitor) override { return true; }
+                bool visit_attributes(AttributeVisitor&) override { return true; }
                 bool evaluate(const HostTensorVector& outputs,
                               const HostTensorVector& inputs) const override;
                 bool has_evaluate() const override;
diff --git a/ngraph/core/include/ngraph/op/atan.hpp b/ngraph/core/include/ngraph/op/atan.hpp
index c15b4d0b60f..1693ca40c45 100644
--- a/ngraph/core/include/ngraph/op/atan.hpp
+++ b/ngraph/core/include/ngraph/op/atan.hpp
@@ -34,7 +34,7 @@ namespace ngraph
 
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
-                bool visit_attributes(AttributeVisitor& visitor) override { return true; }
+                bool visit_attributes(AttributeVisitor&) override { return true; }
                 bool evaluate(const HostTensorVector& outputs,
                               const HostTensorVector& inputs) const override;
                 bool has_evaluate() const override;
diff --git a/ngraph/core/include/ngraph/op/atanh.hpp b/ngraph/core/include/ngraph/op/atanh.hpp
index 50cece8cfe4..ec99b45d107 100644
--- a/ngraph/core/include/ngraph/op/atanh.hpp
+++ b/ngraph/core/include/ngraph/op/atanh.hpp
@@ -34,7 +34,7 @@ namespace ngraph
 
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
-                bool visit_attributes(AttributeVisitor& visitor) override { return true; }
+                bool visit_attributes(AttributeVisitor&) override { return true; }
                 bool evaluate(const HostTensorVector& outputs,
                               const HostTensorVector& inputs) const override;
                 bool has_evaluate() const override;
diff --git a/ngraph/core/include/ngraph/op/broadcast.hpp b/ngraph/core/include/ngraph/op/broadcast.hpp
index 665668bcab6..838115084a0 100644
--- a/ngraph/core/include/ngraph/op/broadcast.hpp
+++ b/ngraph/core/include/ngraph/op/broadcast.hpp
@@ -20,8 +20,8 @@ namespace ngraph
             class NGRAPH_API Broadcast : public util::BroadcastBase
             {
             public:
-                static constexpr NodeTypeInfo type_info{"Broadcast", 3};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 /// \brief Constructs a broadcast operation.
                 Broadcast() = default;
                 /// \brief Constructs a broadcast operation.
diff --git a/ngraph/core/include/ngraph/op/bucketize.hpp b/ngraph/core/include/ngraph/op/bucketize.hpp
index f45cbd4746e..2c1bcd13988 100644
--- a/ngraph/core/include/ngraph/op/bucketize.hpp
+++ b/ngraph/core/include/ngraph/op/bucketize.hpp
@@ -16,8 +16,8 @@ namespace ngraph
             class NGRAPH_API Bucketize : public Op
             {
             public:
-                static constexpr NodeTypeInfo type_info{"Bucketize", 3};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 Bucketize() = default;
                 /// \brief Constructs a Bucketize node
 
@@ -32,7 +32,7 @@ namespace ngraph
                           const bool with_right_bound = true);
 
                 virtual void validate_and_infer_types() override;
-                virtual bool visit_attributes(AttributeVisitor& visitor) override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
 
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& inputs) const override;
diff --git a/ngraph/core/include/ngraph/op/ceiling.hpp b/ngraph/core/include/ngraph/op/ceiling.hpp
index 7ac74ad3dd4..23e7f70b29f 100644
--- a/ngraph/core/include/ngraph/op/ceiling.hpp
+++ b/ngraph/core/include/ngraph/op/ceiling.hpp
@@ -24,7 +24,7 @@ namespace ngraph
                 /// \param arg Node that produces the input tensor.
                 Ceiling(const Output<Node>& arg);
 
-                bool visit_attributes(AttributeVisitor& visitor) override { return true; }
+                bool visit_attributes(AttributeVisitor&) override { return true; }
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
                 bool evaluate(const HostTensorVector& outputs,
diff --git a/ngraph/core/include/ngraph/op/constant.hpp b/ngraph/core/include/ngraph/op/constant.hpp
index ce75a674452..84a37bf643e 100644
--- a/ngraph/core/include/ngraph/op/constant.hpp
+++ b/ngraph/core/include/ngraph/op/constant.hpp
@@ -155,6 +155,7 @@ namespace ngraph
                 }
 
                 Constant(const Constant& other);
+                Constant(const Constant& other, const Shape& new_shape);
                 Constant& operator=(const Constant&) = delete;
 
                 virtual ~Constant() override;
@@ -176,6 +177,8 @@ namespace ngraph
                 // Don't constant fold a constant; it would make a copy
                 bool constant_fold(OutputVector& outputs, const OutputVector& inputs) override
                 {
+                    (void)outputs;
+                    (void)inputs;
                     return false;
                 }
 
@@ -213,6 +216,7 @@ namespace ngraph
                 /// count
                 ///
                 /// \param shape The shape of the tensor constant.
+                NGRAPH_DEPRECATED("Use Constant c-tor with shape argument instead")
                 void set_data_shape(const Shape& shape);
 
                 /// \brief Wrapper around constructing a shared_ptr of a Constant
@@ -377,7 +381,7 @@ namespace ngraph
                 {
                     const uint8_t i4data =
                         (get_data_ptr<uint8_t>()[index / 2] >> (index % 2 ? 0 : 4)) & 0x0F;
-                    const bool is_negative_number = (i4data >> 3) & 0b1;
+                    const bool is_negative_number = (i4data >> 3) & 0x01;
                     const int8_t data = is_negative_number ? i4data | 0xF0 : i4data;
                     return data;
                 }
@@ -463,7 +467,7 @@ namespace ngraph
                         for (const auto i : {4, 0})
                         {
                             const uint8_t i4data = (c >> i) & 0x0F;
-                            const bool is_negative_number = (i4data >> 3) & 0b1;
+                            const bool is_negative_number = (i4data >> 3) & 0x01;
                             const int8_t data = is_negative_number ? i4data | 0xF0 : i4data;
                             output.push_back(data);
                         }
diff --git a/ngraph/core/include/ngraph/op/cum_sum.hpp b/ngraph/core/include/ngraph/op/cum_sum.hpp
index 72cfe892256..120d134300e 100644
--- a/ngraph/core/include/ngraph/op/cum_sum.hpp
+++ b/ngraph/core/include/ngraph/op/cum_sum.hpp
@@ -60,8 +60,8 @@ namespace ngraph
             class NGRAPH_API CumSum : public Op
             {
             public:
-                static constexpr NodeTypeInfo type_info{"CumSum", 0};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 /// \brief Constructs a cumulative summation operation.
                 CumSum() = default;
 
diff --git a/ngraph/core/include/ngraph/op/detection_output.hpp b/ngraph/core/include/ngraph/op/detection_output.hpp
index bd92bacbc15..a8f5177b52f 100644
--- a/ngraph/core/include/ngraph/op/detection_output.hpp
+++ b/ngraph/core/include/ngraph/op/detection_output.hpp
@@ -72,7 +72,7 @@ namespace ngraph
                     clone_with_new_inputs(const OutputVector& new_args) const override;
 
                 const DetectionOutputAttrs& get_attrs() const { return m_attrs; }
-                virtual bool visit_attributes(AttributeVisitor& visitor) override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
 
             private:
                 DetectionOutputAttrs m_attrs;
diff --git a/ngraph/core/include/ngraph/op/embedding_segments_sum.hpp b/ngraph/core/include/ngraph/op/embedding_segments_sum.hpp
index f449958b9d7..9d74cc47e35 100644
--- a/ngraph/core/include/ngraph/op/embedding_segments_sum.hpp
+++ b/ngraph/core/include/ngraph/op/embedding_segments_sum.hpp
@@ -66,7 +66,7 @@ namespace ngraph
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
 
-                virtual bool visit_attributes(AttributeVisitor& visitor) override { return true; }
+                bool visit_attributes(AttributeVisitor&) override { return true; }
 
             private:
                 static constexpr int EMB_TABLE = 0;
diff --git a/ngraph/core/include/ngraph/op/erf.hpp b/ngraph/core/include/ngraph/op/erf.hpp
index 473914823b4..361653d9b6f 100644
--- a/ngraph/core/include/ngraph/op/erf.hpp
+++ b/ngraph/core/include/ngraph/op/erf.hpp
@@ -12,12 +12,16 @@ namespace ngraph
     {
         namespace v0
         {
+            /// \brief Elementwise erf operation.
             class NGRAPH_API Erf : public util::UnaryElementwiseArithmetic
             {
             public:
-                static constexpr NodeTypeInfo type_info{"Erf", 0};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+                /// \brief Constructs a floor operation.
                 Erf() = default;
+                /// \brief Constructs a floor operation.
+                ///
+                /// \param arg Node that produces the input tensor.
                 Erf(const Output<Node>& arg);
 
                 bool visit_attributes(AttributeVisitor& visitor) override;
diff --git a/ngraph/core/include/ngraph/op/floor_mod.hpp b/ngraph/core/include/ngraph/op/floor_mod.hpp
index 9cc1d25557d..ba8af70fcc4 100644
--- a/ngraph/core/include/ngraph/op/floor_mod.hpp
+++ b/ngraph/core/include/ngraph/op/floor_mod.hpp
@@ -19,8 +19,8 @@ namespace ngraph
             class NGRAPH_API FloorMod : public util::BinaryElementwiseArithmetic
             {
             public:
-                static constexpr NodeTypeInfo type_info{"FloorMod", 1};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 /// \brief Constructs an uninitialized addition operation
                 FloorMod()
                     : util::BinaryElementwiseArithmetic(AutoBroadcastSpec::NUMPY){};
diff --git a/ngraph/core/include/ngraph/op/max_pool.hpp b/ngraph/core/include/ngraph/op/max_pool.hpp
index fcfbf69132d..28c77448410 100644
--- a/ngraph/core/include/ngraph/op/max_pool.hpp
+++ b/ngraph/core/include/ngraph/op/max_pool.hpp
@@ -4,8 +4,9 @@
 
 #pragma once
 
-#include "ngraph/op/op.hpp"
-#include "ngraph/op/util/attr_types.hpp"
+#include <limits>
+
+#include "ngraph/op/util/max_pool_base.hpp"
 
 namespace ngraph
 {
@@ -14,7 +15,7 @@ namespace ngraph
         namespace v1
         {
             /// \brief Batched max pooling operation.
-            class NGRAPH_API MaxPool : public Op
+            class NGRAPH_API MaxPool : public op::util::MaxPoolBase
             {
             public:
                 NGRAPH_RTTI_DECLARATION;
@@ -29,7 +30,7 @@ namespace ngraph
                 /// \param pads_begin The beginning of padding shape.
                 /// \param pads_end The end of padding shape.
                 /// \param kernel The kernel shape.
-                /// \param rounding_mode Whether to use ceiling or floor rounding type while
+                /// \param rounding_type Whether to use ceiling or floor rounding type while
                 /// computing output shape.
                 /// \param auto_pad The pad type for automatically computing padding sizes.
                 MaxPool(const Output<Node>& arg,
@@ -37,8 +38,8 @@ namespace ngraph
                         const Shape& pads_begin,
                         const Shape& pads_end,
                         const Shape& kernel,
-                        op::RoundingType rounding_mode = op::RoundingType::FLOOR,
-                        const PadType& auto_pad = op::PadType::EXPLICIT);
+                        const op::RoundingType rounding_type = op::RoundingType::FLOOR,
+                        const PadType auto_pad = op::PadType::EXPLICIT);
 
                 bool visit_attributes(AttributeVisitor& visitor) override;
                 void validate_and_infer_types() override;
@@ -46,27 +47,6 @@ namespace ngraph
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
 
-                /// \return The kernel shape.
-                const Shape& get_kernel() const { return m_kernel; }
-                void set_kernel(const Shape& kernel) { m_kernel = kernel; }
-                /// \return The strides.
-                const Strides& get_strides() const { return m_strides; }
-                void set_strides(const Strides& strides) { m_strides = strides; }
-                /// \return The beginning of padding shape.
-                const Shape& get_pads_begin() const { return m_pads_begin; }
-                void set_pads_begin(const Shape& pads_begin) { m_pads_begin = pads_begin; }
-                /// \return The end of padding shape.
-                const Shape& get_pads_end() const { return m_pads_end; }
-                void set_adding_above(const Shape& pads_end) { m_pads_end = pads_end; }
-                /// \return The pad type for pooling.
-                const PadType& get_auto_pad() const { return m_auto_pad; }
-                void set_auto_pad(const PadType& auto_pad) { m_auto_pad = auto_pad; }
-                /// \return The ceiling mode being used for output shape computations
-                op::RoundingType get_rounding_type() const { return m_rounding_type; }
-                void set_rounding_type(op::RoundingType rounding_mode)
-                {
-                    m_rounding_type = rounding_mode;
-                }
                 /// \return The default value for MaxPool.
                 NGRAPH_SUPPRESS_DEPRECATED_START
                 virtual std::shared_ptr<Node> get_default_value() const override;
@@ -76,21 +56,85 @@ namespace ngraph
                               const HostTensorVector& inputs) const override;
                 bool has_evaluate() const override;
 
-            protected:
-                Shape m_kernel;
-                Strides m_strides;
-                Shape m_pads_begin;
-                Shape m_pads_end;
-                PadType m_auto_pad;
-                op::RoundingType m_rounding_type;
-
             private:
-                bool update_auto_padding(const PartialShape& in_shape,
-                                         Shape& new_pads_end,
-                                         Shape& new_pads_begin) const;
                 bool evaluate_maxpool(const HostTensorVector& outputs,
                                       const HostTensorVector& inputs) const;
             };
         } // namespace v1
+
+        namespace v8
+        {
+            /// \brief MaxPooling operation with values and indices calculated as individual outputs
+            class NGRAPH_API MaxPool : public op::util::MaxPoolBase
+            {
+            public:
+                NGRAPH_RTTI_DECLARATION;
+
+                /// \brief Constructs an empty MaxPool operation.
+                MaxPool() = default;
+
+                /// \brief Constructs a parametrized MaxPool operation.
+                ///
+                /// \param arg Output of a node producing the feature tensor to be pooled.
+                /// \param strides The strides of the pooling filter.
+                /// \param dilations The dilations of the pooling filter.
+                /// \param pads_begin Paddings at the beginning of each spatial axis.
+                /// \param pads_end Paddings at the end of each spatial axis.
+                /// \param kernel The kernel shape.
+                /// \param rounding_type Whether to use ceiling or floor rounding type while
+                ///                      computing the output shape.
+                /// \param auto_pad The pad type for automatic calculation of the padding sizes.
+                /// \param index_element_type The data type used by the second output tensor
+                ///                           containing the selected indices.
+                /// \param axis Indicates a dimension in the input data shape which should be used
+                ///             as a starting point for calculation of the upper bound of allowed
+                ///             values of the indices output.
+                MaxPool(const Output<Node>& arg,
+                        const Strides& strides,
+                        const Strides& dilations,
+                        const Shape& pads_begin,
+                        const Shape& pads_end,
+                        const Shape& kernel,
+                        const op::RoundingType rounding_type = op::RoundingType::FLOOR,
+                        const PadType auto_pad = op::PadType::EXPLICIT,
+                        const element::Type index_element_type = element::i64,
+                        const int64_t axis = 0,
+                        const float pads_value = -std::numeric_limits<float>::infinity());
+
+                bool visit_attributes(AttributeVisitor& visitor) override;
+                void validate_and_infer_types() override;
+
+                virtual std::shared_ptr<Node>
+                    clone_with_new_inputs(const OutputVector& new_args) const override;
+
+                /// \return The pooling filter's dilations.
+                const Strides& get_dilations() const noexcept { return m_dilations; }
+                void set_dilations(const Strides& dilations) { m_dilations = dilations; }
+
+                /// \return The data type of the second output tensor (indices).
+                element::Type get_index_element_type() const noexcept
+                {
+                    return m_index_element_type;
+                }
+                void set_index_element_type(const element::Type index_element_type)
+                {
+                    m_index_element_type = index_element_type;
+                }
+
+                // \return The 'axis' attribute value.
+                int64_t get_axis() const { return m_axis; }
+                void set_axis(const int64_t axis) { m_axis = axis; }
+
+                // \return The value stored in the padding cells.
+                float get_pads_value() const { return m_pads_value; }
+                void set_pads_value(const float pads_value) { m_pads_value = pads_value; }
+
+            private:
+                Strides m_dilations;
+                element::Type m_index_element_type{element::i32};
+                int64_t m_axis{0};
+                float m_pads_value{-std::numeric_limits<float>::infinity()};
+            };
+        } // namespace v8
     }     // namespace op
 } // namespace ngraph
diff --git a/ngraph/core/include/ngraph/op/maximum.hpp b/ngraph/core/include/ngraph/op/maximum.hpp
index 154801336e0..d4135285600 100644
--- a/ngraph/core/include/ngraph/op/maximum.hpp
+++ b/ngraph/core/include/ngraph/op/maximum.hpp
@@ -16,8 +16,8 @@ namespace ngraph
             class NGRAPH_API Maximum : public util::BinaryElementwiseArithmetic
             {
             public:
-                static constexpr NodeTypeInfo type_info{"Maximum", 1};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 /// \brief Constructs a maximum operation.
                 Maximum()
                     : util::BinaryElementwiseArithmetic(AutoBroadcastSpec::NUMPY)
diff --git a/ngraph/core/include/ngraph/op/minimum.hpp b/ngraph/core/include/ngraph/op/minimum.hpp
index 33ef1395e93..edf19276218 100644
--- a/ngraph/core/include/ngraph/op/minimum.hpp
+++ b/ngraph/core/include/ngraph/op/minimum.hpp
@@ -16,8 +16,8 @@ namespace ngraph
             class NGRAPH_API Minimum : public util::BinaryElementwiseArithmetic
             {
             public:
-                static constexpr NodeTypeInfo type_info{"Minimum", 1};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 /// \brief Constructs a minimum operation.
                 Minimum()
                     : util::BinaryElementwiseArithmetic(AutoBroadcastSpec::NUMPY)
diff --git a/ngraph/core/include/ngraph/op/mod.hpp b/ngraph/core/include/ngraph/op/mod.hpp
index 4b9851be6b6..50d351c3358 100644
--- a/ngraph/core/include/ngraph/op/mod.hpp
+++ b/ngraph/core/include/ngraph/op/mod.hpp
@@ -17,8 +17,8 @@ namespace ngraph
             class NGRAPH_API Mod : public util::BinaryElementwiseArithmetic
             {
             public:
-                static constexpr NodeTypeInfo type_info{"Mod", 0};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 /// \brief Constructs a Mod node.
                 Mod()
                     : util::BinaryElementwiseArithmetic(AutoBroadcastSpec::NUMPY)
diff --git a/ngraph/core/include/ngraph/op/mvn.hpp b/ngraph/core/include/ngraph/op/mvn.hpp
index 49f9c3a71d8..a517ce7d739 100644
--- a/ngraph/core/include/ngraph/op/mvn.hpp
+++ b/ngraph/core/include/ngraph/op/mvn.hpp
@@ -57,7 +57,7 @@ namespace ngraph
 
                 virtual void validate_and_infer_types() override;
 
-                virtual bool visit_attributes(AttributeVisitor& visitor) override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
 
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
diff --git a/ngraph/core/include/ngraph/op/power.hpp b/ngraph/core/include/ngraph/op/power.hpp
index 0d800f538e5..3fbd29a2994 100644
--- a/ngraph/core/include/ngraph/op/power.hpp
+++ b/ngraph/core/include/ngraph/op/power.hpp
@@ -31,8 +31,8 @@ namespace ngraph
             class NGRAPH_API Power : public util::BinaryElementwiseArithmetic
             {
             public:
-                static constexpr NodeTypeInfo type_info{"Power", 1};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 Power()
                     : util::BinaryElementwiseArithmetic(AutoBroadcastSpec::NUMPY)
                 {
diff --git a/ngraph/core/include/ngraph/op/prior_box.hpp b/ngraph/core/include/ngraph/op/prior_box.hpp
index 0b6f0771b06..12161a969d2 100644
--- a/ngraph/core/include/ngraph/op/prior_box.hpp
+++ b/ngraph/core/include/ngraph/op/prior_box.hpp
@@ -63,7 +63,7 @@ namespace ngraph
                 static std::vector<float>
                     normalized_aspect_ratio(const std::vector<float>& aspect_ratio, bool flip);
                 const PriorBoxAttrs& get_attrs() const { return m_attrs; }
-                virtual bool visit_attributes(AttributeVisitor& visitor) override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
                 bool evaluate(const HostTensorVector& outputs,
                               const HostTensorVector& inputs) const override;
                 bool has_evaluate() const override;
diff --git a/ngraph/core/include/ngraph/op/prior_box_clustered.hpp b/ngraph/core/include/ngraph/op/prior_box_clustered.hpp
index 0ef35ab5c32..541372717f8 100644
--- a/ngraph/core/include/ngraph/op/prior_box_clustered.hpp
+++ b/ngraph/core/include/ngraph/op/prior_box_clustered.hpp
@@ -53,7 +53,7 @@ namespace ngraph
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
                 const PriorBoxClusteredAttrs& get_attrs() const { return m_attrs; }
-                virtual bool visit_attributes(AttributeVisitor& visitor) override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
                 bool evaluate(const HostTensorVector& outputs,
                               const HostTensorVector& inputs) const override;
                 bool has_evaluate() const override;
diff --git a/ngraph/core/include/ngraph/op/proposal.hpp b/ngraph/core/include/ngraph/op/proposal.hpp
index 87998b67e69..0b0017c5a7f 100644
--- a/ngraph/core/include/ngraph/op/proposal.hpp
+++ b/ngraph/core/include/ngraph/op/proposal.hpp
@@ -65,7 +65,7 @@ namespace ngraph
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
                 const ProposalAttrs& get_attrs() const { return m_attrs; }
-                virtual bool visit_attributes(AttributeVisitor& visitor) override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
 
             protected:
                 ProposalAttrs m_attrs;
diff --git a/ngraph/core/include/ngraph/op/random_uniform.hpp b/ngraph/core/include/ngraph/op/random_uniform.hpp
new file mode 100644
index 00000000000..a9da867dae6
--- /dev/null
+++ b/ngraph/core/include/ngraph/op/random_uniform.hpp
@@ -0,0 +1,69 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "ngraph/op/op.hpp"
+
+namespace ngraph
+{
+    namespace op
+    {
+        namespace v8
+        {
+            /// \brief Tensor RandomUniform operation.
+            class NGRAPH_API RandomUniform : public Op
+            {
+            public:
+                NGRAPH_RTTI_DECLARATION;
+
+                RandomUniform() = default;
+
+                ///
+                /// \brief      Constructs a RandomUniform operation.
+                ///
+                /// \param      out_shape         Node producing the tensor with output shape.
+                /// \param      min_val           Node producing the tensor with minimum value.
+                /// \param      max_val           Node producing the tensor with maximum value.
+                /// \param      out_type          Output type of the tensor.
+                /// \param      global_seed       Global seed value.
+                /// \param      op_seed           Operational seed value.
+                RandomUniform(const Output<Node>& out_shape,
+                              const Output<Node>& min_val,
+                              const Output<Node>& max_val,
+                              const ngraph::element::Type& out_type,
+                              uint64_t global_seed,
+                              uint64_t op_seed);
+
+                void validate_and_infer_types() override;
+
+                bool visit_attributes(AttributeVisitor& visitor) override;
+
+                std::shared_ptr<Node>
+                    clone_with_new_inputs(const OutputVector& new_args) const override;
+
+                /// \return The output tensor type.
+                const ngraph::element::Type& get_out_type() const { return m_output_type; }
+                void set_out_type(const ngraph::element::Type& output_type)
+                {
+                    m_output_type = output_type;
+                }
+
+                /// \return The global seed value.
+                uint64_t get_global_seed() const { return m_global_seed; }
+                void set_global_seed(uint64_t seed) { m_global_seed = seed; }
+
+                /// \return The operational seed value.
+                uint64_t get_op_seed() const { return m_op_seed; }
+                void set_op_seed(uint64_t seed2) { m_op_seed = seed2; }
+
+            protected:
+                ngraph::element::Type m_output_type;
+                uint64_t m_global_seed;
+                uint64_t m_op_seed;
+            };
+        } // namespace v8
+    }     // namespace op
+} // namespace ngraph
diff --git a/ngraph/core/include/ngraph/op/reorg_yolo.hpp b/ngraph/core/include/ngraph/op/reorg_yolo.hpp
index 64bd77013f9..82fdb785101 100644
--- a/ngraph/core/include/ngraph/op/reorg_yolo.hpp
+++ b/ngraph/core/include/ngraph/op/reorg_yolo.hpp
@@ -29,7 +29,7 @@ namespace ngraph
 
                 void validate_and_infer_types() override;
 
-                virtual bool visit_attributes(AttributeVisitor& visitor) override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
 
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
diff --git a/ngraph/core/include/ngraph/op/roi_align.hpp b/ngraph/core/include/ngraph/op/roi_align.hpp
index caf1fe2a71b..7bf8589d88d 100644
--- a/ngraph/core/include/ngraph/op/roi_align.hpp
+++ b/ngraph/core/include/ngraph/op/roi_align.hpp
@@ -55,7 +55,7 @@ namespace ngraph
                          const PoolingMode mode);
 
                 virtual void validate_and_infer_types() override;
-                virtual bool visit_attributes(AttributeVisitor& visitor) override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& new_args) const override;
 
diff --git a/ngraph/core/include/ngraph/op/roi_pooling.hpp b/ngraph/core/include/ngraph/op/roi_pooling.hpp
index 2fcf3443639..d1dc8fbd5e6 100644
--- a/ngraph/core/include/ngraph/op/roi_pooling.hpp
+++ b/ngraph/core/include/ngraph/op/roi_pooling.hpp
@@ -15,8 +15,8 @@ namespace ngraph
             class NGRAPH_API ROIPooling : public Op
             {
             public:
-                static constexpr NodeTypeInfo type_info{"ROIPooling", 0};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 ROIPooling() = default;
                 /// \brief Constructs a ROIPooling operation
                 ///
diff --git a/ngraph/core/include/ngraph/op/scatter_elements_update.hpp b/ngraph/core/include/ngraph/op/scatter_elements_update.hpp
index 457f863e79e..903e608fac2 100644
--- a/ngraph/core/include/ngraph/op/scatter_elements_update.hpp
+++ b/ngraph/core/include/ngraph/op/scatter_elements_update.hpp
@@ -34,7 +34,7 @@ namespace ngraph
                                       const Output<Node>& axis);
 
                 virtual void validate_and_infer_types() override;
-                virtual bool visit_attributes(AttributeVisitor& visitor) override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
 
                 virtual std::shared_ptr<Node>
                     clone_with_new_inputs(const OutputVector& inputs) const override;
diff --git a/ngraph/core/include/ngraph/op/tan.hpp b/ngraph/core/include/ngraph/op/tan.hpp
index 3a6fe15d6e9..3f191b92de5 100644
--- a/ngraph/core/include/ngraph/op/tan.hpp
+++ b/ngraph/core/include/ngraph/op/tan.hpp
@@ -30,8 +30,7 @@ namespace ngraph
             class NGRAPH_API Tan : public util::UnaryElementwiseArithmetic
             {
             public:
-                static constexpr NodeTypeInfo type_info{"Tan", 0};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
                 /// \brief Constructs a tangent operation.
                 ///
                 /// \param arg Node that produces the input tensor.
diff --git a/ngraph/core/include/ngraph/op/tanh.hpp b/ngraph/core/include/ngraph/op/tanh.hpp
index b67849ed20c..61dec52f213 100644
--- a/ngraph/core/include/ngraph/op/tanh.hpp
+++ b/ngraph/core/include/ngraph/op/tanh.hpp
@@ -16,8 +16,8 @@ namespace ngraph
             class NGRAPH_API Tanh : public util::UnaryElementwiseArithmetic
             {
             public:
-                static constexpr NodeTypeInfo type_info{"Tanh", 0};
-                const NodeTypeInfo& get_type_info() const override { return type_info; }
+                NGRAPH_RTTI_DECLARATION;
+
                 /// \brief Constructs a hyperbolic tangent operation.
                 ///
                 /// \param arg Node that produces the input tensor.
diff --git a/ngraph/core/include/ngraph/op/util/binary_elementwise_comparison.hpp b/ngraph/core/include/ngraph/op/util/binary_elementwise_comparison.hpp
index 64ca2502b41..e94a3c5a6b4 100644
--- a/ngraph/core/include/ngraph/op/util/binary_elementwise_comparison.hpp
+++ b/ngraph/core/include/ngraph/op/util/binary_elementwise_comparison.hpp
@@ -55,6 +55,8 @@ namespace ngraph
                                             const AutoBroadcastSpec& autob = AutoBroadcastSpec());
 
             public:
+                NGRAPH_RTTI_DECLARATION;
+
                 void validate_and_infer_types() override;
 
                 const AutoBroadcastSpec& get_autob() const override { return m_autob; }
diff --git a/ngraph/core/include/ngraph/op/util/max_pool_base.hpp b/ngraph/core/include/ngraph/op/util/max_pool_base.hpp
new file mode 100644
index 00000000000..e3029734997
--- /dev/null
+++ b/ngraph/core/include/ngraph/op/util/max_pool_base.hpp
@@ -0,0 +1,79 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/op/op.hpp"
+#include "ngraph/op/util/attr_types.hpp"
+
+namespace ngraph
+{
+    namespace op
+    {
+        namespace util
+        {
+            class NGRAPH_API MaxPoolBase : public Op
+            {
+            public:
+                NGRAPH_RTTI_DECLARATION;
+                MaxPoolBase() = default;
+
+                /// \param arg The node producing the input data batch tensor.
+                /// \param strides The strides.
+                /// \param pads_begin The beginning of padding shape.
+                /// \param pads_end The end of padding shape.
+                /// \param kernel The kernel shape.
+                /// \param rounding_mode Whether to use ceiling or floor rounding type while
+                /// computing output shape.
+                /// \param auto_pad The pad type for automatically computing padding sizes.
+                MaxPoolBase(const Output<Node>& arg,
+                            const Strides& strides,
+                            const Shape& pads_begin,
+                            const Shape& pads_end,
+                            const Shape& kernel,
+                            const op::RoundingType rounding_mode = op::RoundingType::FLOOR,
+                            const PadType auto_pad = op::PadType::EXPLICIT);
+
+                void validate_and_infer_types() override;
+
+                /// \return The kernel shape.
+                const Shape& get_kernel() const { return m_kernel; }
+                void set_kernel(const Shape& kernel) { m_kernel = kernel; }
+                /// \return The strides.
+                const Strides& get_strides() const { return m_strides; }
+                void set_strides(const Strides& strides) { m_strides = strides; }
+                /// \return The beginning of padding shape.
+                const Shape& get_pads_begin() const { return m_pads_begin; }
+                void set_pads_begin(const Shape& pads_begin) { m_pads_begin = pads_begin; }
+                /// \return The end of padding shape.
+                const Shape& get_pads_end() const { return m_pads_end; }
+                void set_adding_above(const Shape& pads_end) { m_pads_end = pads_end; }
+                /// \return The pad type for pooling.
+                PadType get_auto_pad() const { return m_auto_pad; }
+                void set_auto_pad(const PadType auto_pad) { m_auto_pad = auto_pad; }
+                /// \return The ceiling mode being used for output shape computations
+                op::RoundingType get_rounding_type() const { return m_rounding_type; }
+                void set_rounding_type(op::RoundingType rounding_type)
+                {
+                    m_rounding_type = rounding_type;
+                }
+
+            protected:
+                bool update_auto_padding(const PartialShape& in_shape,
+                                         const Strides& filter_dilations,
+                                         Shape& new_pads_end,
+                                         Shape& new_pads_begin) const;
+
+                PartialShape infer_output_shape(const Strides& dilations);
+
+                Shape m_kernel;
+                Strides m_strides;
+                Shape m_pads_begin;
+                Shape m_pads_end;
+                PadType m_auto_pad;
+                op::RoundingType m_rounding_type;
+            };
+        } // namespace util
+    }     // namespace op
+} // namespace ngraph
diff --git a/ngraph/core/include/ngraph/op/util/scatter_base.hpp b/ngraph/core/include/ngraph/op/util/scatter_base.hpp
index 869fbeaca93..c93ff9876b4 100644
--- a/ngraph/core/include/ngraph/op/util/scatter_base.hpp
+++ b/ngraph/core/include/ngraph/op/util/scatter_base.hpp
@@ -21,7 +21,7 @@ namespace ngraph
                 static constexpr NodeTypeInfo type_info{"ScatterBase", 3};
                 const NodeTypeInfo& get_type_info() const override { return type_info; }
                 virtual void validate_and_infer_types() override;
-                virtual bool visit_attributes(AttributeVisitor& visitor) override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
 
             protected:
                 ScatterBase() = default;
diff --git a/ngraph/core/include/ngraph/op/util/scatter_nd_base.hpp b/ngraph/core/include/ngraph/op/util/scatter_nd_base.hpp
index 795780298d7..0d545dd0023 100644
--- a/ngraph/core/include/ngraph/op/util/scatter_nd_base.hpp
+++ b/ngraph/core/include/ngraph/op/util/scatter_nd_base.hpp
@@ -25,7 +25,7 @@ namespace ngraph
                 static constexpr int INDICES = 1;
                 static constexpr int UPDATES = 2;
                 virtual void validate_and_infer_types() override;
-                virtual bool visit_attributes(AttributeVisitor& visitor) override;
+                bool visit_attributes(AttributeVisitor& visitor) override;
 
             protected:
                 ScatterNDBase() = default;
diff --git a/ngraph/core/include/ngraph/op/util/variable.hpp b/ngraph/core/include/ngraph/op/util/variable.hpp
index 3390de9026e..14aa13da9f4 100644
--- a/ngraph/core/include/ngraph/op/util/variable.hpp
+++ b/ngraph/core/include/ngraph/op/util/variable.hpp
@@ -4,9 +4,13 @@
 
 #pragma once
 
-#include <ngraph/node.hpp>
+#include <string>
 #include <utility>
 
+#include "ngraph/partial_shape.hpp"
+#include "ngraph/type.hpp"
+#include "ngraph/type/element_type.hpp"
+
 namespace ngraph
 {
     struct VariableInfo
diff --git a/ngraph/core/include/ngraph/op/util/variable_context.hpp b/ngraph/core/include/ngraph/op/util/variable_context.hpp
index 200e5cb35d3..47d40ccb978 100644
--- a/ngraph/core/include/ngraph/op/util/variable_context.hpp
+++ b/ngraph/core/include/ngraph/op/util/variable_context.hpp
@@ -4,6 +4,9 @@
 
 #pragma once
 
+#include <memory>
+#include <unordered_map>
+
 #include <ngraph/output_vector.hpp>
 #include <ngraph/variant.hpp>
 #include "ngraph/op/util/variable.hpp"
diff --git a/ngraph/core/include/ngraph/ops.hpp b/ngraph/core/include/ngraph/ops.hpp
index 4701a2f733f..2f07e3ef67a 100644
--- a/ngraph/core/include/ngraph/ops.hpp
+++ b/ngraph/core/include/ngraph/ops.hpp
@@ -112,6 +112,7 @@
 #include "ngraph/op/prior_box_clustered.hpp"
 #include "ngraph/op/proposal.hpp"
 #include "ngraph/op/psroi_pooling.hpp"
+#include "ngraph/op/random_uniform.hpp"
 #include "ngraph/op/range.hpp"
 #include "ngraph/op/read_value.hpp"
 #include "ngraph/op/reduce_l1.hpp"
diff --git a/ngraph/core/include/ngraph/opsets/opset8_tbl.hpp b/ngraph/core/include/ngraph/opsets/opset8_tbl.hpp
index 0004161dc48..16ddf55d0be 100644
--- a/ngraph/core/include/ngraph/opsets/opset8_tbl.hpp
+++ b/ngraph/core/include/ngraph/opsets/opset8_tbl.hpp
@@ -58,7 +58,6 @@ NGRAPH_OP(LogicalXor, ngraph::op::v1)
 NGRAPH_OP(LRN, ngraph::op::v0)
 NGRAPH_OP(LSTMCell, ngraph::op::v4)
 NGRAPH_OP(MatMul, ngraph::op::v0)
-NGRAPH_OP(MaxPool, ngraph::op::v1)
 NGRAPH_OP(Maximum, ngraph::op::v1)
 NGRAPH_OP(Minimum, ngraph::op::v1)
 NGRAPH_OP(Mod, ngraph::op::v1)
@@ -180,4 +179,6 @@ NGRAPH_OP(AdaptiveAvgPool, ngraph::op::v8)
 NGRAPH_OP(AdaptiveMaxPool, ngraph::op::v8)
 NGRAPH_OP(DeformableConvolution, ngraph::op::v8)
 NGRAPH_OP(MatrixNms, ngraph::op::v8)
-NGRAPH_OP(MulticlassNms, ngraph::op::v8)
\ No newline at end of file
+NGRAPH_OP(MaxPool, ngraph::op::v8)
+NGRAPH_OP(MulticlassNms, ngraph::op::v8)
+NGRAPH_OP(RandomUniform, ngraph::op::v8)
diff --git a/ngraph/core/include/ngraph/partial_shape.hpp b/ngraph/core/include/ngraph/partial_shape.hpp
index c100273d765..5f4bddf6894 100644
--- a/ngraph/core/include/ngraph/partial_shape.hpp
+++ b/ngraph/core/include/ngraph/partial_shape.hpp
@@ -54,7 +54,7 @@ namespace ngraph
 
         /// \brief Constructs a PartialShape with static rank from a vector of Dimension.
         /// \param dimensions The Dimension values for the constructed shape.
-        PartialShape(const std::vector<Dimension>& dimensions);
+        PartialShape(std::vector<Dimension> dimensions);
 
         /// \brief Constructs a PartialShape with static rank from a vector of dimensions values.
         /// \param dimensions The Dimension values for the constructed shape.
@@ -269,7 +269,7 @@ namespace ngraph
 
     private:
         // Private constructor for PartialShape::dynamic().
-        PartialShape(bool rank_is_static, const std::vector<Dimension>& dimensions);
+        PartialShape(bool rank_is_static, std::vector<Dimension> dimensions);
 
         // True if the shape's rank is static.
         bool m_rank_is_static;
diff --git a/ngraph/core/include/ngraph/pass/graph_rewrite.hpp b/ngraph/core/include/ngraph/pass/graph_rewrite.hpp
index 68d4e2fad7f..ab27f4197d7 100644
--- a/ngraph/core/include/ngraph/pass/graph_rewrite.hpp
+++ b/ngraph/core/include/ngraph/pass/graph_rewrite.hpp
@@ -227,7 +227,7 @@ namespace ngraph
 
         protected:
             bool apply_matcher_passes(std::shared_ptr<Function> f,
-                                      std::deque<std::shared_ptr<Node>> nodes_to_run);
+                                      std::deque<std::weak_ptr<Node>> nodes_to_run);
 
             bool m_enable_shape_inference = false;
 
diff --git a/ngraph/core/include/ngraph/runtime/host_tensor.hpp b/ngraph/core/include/ngraph/runtime/host_tensor.hpp
index 7a97a5d8ccd..7ffae62f3c5 100644
--- a/ngraph/core/include/ngraph/runtime/host_tensor.hpp
+++ b/ngraph/core/include/ngraph/runtime/host_tensor.hpp
@@ -6,7 +6,7 @@
 
 #include <memory>
 
-#include "ngraph/node.hpp"
+#include "ngraph/descriptor/output.hpp"
 #include "ngraph/runtime/tensor.hpp"
 #include "ngraph/type/element_type.hpp"
 #include "ngraph/type/element_type_traits.hpp"
@@ -25,17 +25,10 @@ namespace ngraph
         class NGRAPH_API HostTensor : public ngraph::runtime::Tensor
         {
         public:
-            HostTensor(const element::Type& element_type,
-                       const Shape& shape,
-                       void* memory_pointer,
-                       const std::string& name = "");
-            HostTensor(const element::Type& element_type,
-                       const Shape& shape,
-                       const std::string& name = "");
-            HostTensor(const element::Type& element_type,
-                       const PartialShape& partial_shape,
-                       const std::string& name = "");
-            HostTensor(const std::string& name = "");
+            HostTensor(const element::Type& element_type, const Shape& shape, void* memory_pointer);
+            HostTensor(const element::Type& element_type, const Shape& shape);
+            HostTensor(const element::Type& element_type, const PartialShape& partial_shape);
+            HostTensor();
             explicit HostTensor(const Output<Node>&);
             explicit HostTensor(const std::shared_ptr<op::v0::Constant>& constant);
             virtual ~HostTensor() override;
diff --git a/ngraph/core/include/ngraph/runtime/shared_buffer.hpp b/ngraph/core/include/ngraph/runtime/shared_buffer.hpp
index 40afce2f66b..ff2f9554605 100644
--- a/ngraph/core/include/ngraph/runtime/shared_buffer.hpp
+++ b/ngraph/core/include/ngraph/runtime/shared_buffer.hpp
@@ -17,7 +17,7 @@ namespace ngraph
         class SharedBuffer : public ngraph::runtime::AlignedBuffer
         {
         public:
-            SharedBuffer(char* data, size_t size, T& shared_object)
+            SharedBuffer(char* data, size_t size, const T& shared_object)
                 : _shared_object(shared_object)
             {
                 m_allocated_buffer = data;
diff --git a/ngraph/core/include/ngraph/runtime/tensor.hpp b/ngraph/core/include/ngraph/runtime/tensor.hpp
index c0891fb8a17..d4e17ad5446 100644
--- a/ngraph/core/include/ngraph/runtime/tensor.hpp
+++ b/ngraph/core/include/ngraph/runtime/tensor.hpp
@@ -54,17 +54,6 @@ namespace ngraph
             NGRAPH_DEPRECATED("Only output ports have names")
             const std::string& get_name() const;
 
-            /// \brief Get the stale value of the tensor. A tensor is stale if its data is
-            /// changed.
-            /// \return true if there is new data in this tensor
-            NGRAPH_DEPRECATED("This method is deprecated and will be removed in 2022.1 release")
-            bool get_stale() const;
-
-            /// \brief Set the stale value of the tensor. A tensor is stale if its data is
-            /// changed.
-            NGRAPH_DEPRECATED("This method is deprecated and will be removed in 2022.1 release")
-            void set_stale(bool val);
-
             /// \brief Write bytes directly into the tensor
             /// \param p Pointer to source of data
             /// \param n Number of bytes to write, must be integral number of elements.
@@ -75,15 +64,6 @@ namespace ngraph
             /// \param n Number of bytes to read, must be integral number of elements.
             virtual void read(void* p, size_t n) const = 0;
 
-            /// \brief check tensor for new data, call may block.
-            ///    backends may use this to ensure tensor is updated (eg: lazy eval).
-            NGRAPH_DEPRECATED("This method is deprecated and will be removed in 2022.1 release")
-            virtual void wait_for_read_ready() {}
-            /// \brief notify tensor of new data, call may block.
-            ///    backends may use this as indication of new data in tensor.
-            NGRAPH_DEPRECATED("This method is deprecated and will be removed in 2022.1 release")
-            virtual void wait_for_write_ready() {}
-
         protected:
             std::shared_ptr<ngraph::descriptor::Tensor> m_descriptor;
             bool m_stale;
diff --git a/ngraph/core/include/ngraph/specialize_function.hpp b/ngraph/core/include/ngraph/specialize_function.hpp
index e4c480048ff..4dde8d131a0 100644
--- a/ngraph/core/include/ngraph/specialize_function.hpp
+++ b/ngraph/core/include/ngraph/specialize_function.hpp
@@ -90,7 +90,8 @@ namespace ngraph
     ///       which a Constant node with element type parameter_element_types[i] and shape
     ///       parameter_shapes[i] can be created.
     ///
-    /// TODO(amprocte): convert this to a pass.
+    NGRAPH_DEPRECATED(
+        "This function was deprecated. Please modify the original ov::Function instead.")
     NGRAPH_API
     std::shared_ptr<Function>
         specialize_function(std::shared_ptr<Function> f,
diff --git a/ngraph/core/include/ngraph/validation_util.hpp b/ngraph/core/include/ngraph/validation_util.hpp
index de81ebfd171..60a245890d6 100644
--- a/ngraph/core/include/ngraph/validation_util.hpp
+++ b/ngraph/core/include/ngraph/validation_util.hpp
@@ -85,7 +85,8 @@ namespace ngraph
                                                const PartialShape& window_shape,
                                                const Strides& window_strides,
                                                bool is_window_all_in_padding_allowed,
-                                               bool ceil_mode = false);
+                                               bool ceil_mode = false,
+                                               const Strides& window_dilation = Strides{});
 
     NGRAPH_API
     std::tuple<element::Type, PartialShape, PartialShape>
@@ -328,6 +329,8 @@ namespace ngraph
     /// that all the HostTensorPtrs are not equal to nullptr
     NGRAPH_API bool validate_host_tensor_vector(const HostTensorVector& v, const size_t& size);
 
+    NGRAPH_API bool could_propagate(const Output<Node>& output, std::vector<Node*>& order);
+
     namespace opset1
     {
         ///
diff --git a/ngraph/core/include/ngraph/variant.hpp b/ngraph/core/include/ngraph/variant.hpp
index d39be393c76..5fb7b99d875 100644
--- a/ngraph/core/include/ngraph/variant.hpp
+++ b/ngraph/core/include/ngraph/variant.hpp
@@ -4,14 +4,16 @@
 
 #pragma once
 
+#include <map>
 #include <string>
 
 #include "ngraph/ngraph_visibility.hpp"
-#include "ngraph/node.hpp"
+#include "ngraph/output_vector.hpp"
 #include "ngraph/type.hpp"
 
 namespace ngraph
 {
+    class Node;
     using VariantTypeInfo = DiscreteTypeInfo;
 
     class NGRAPH_API Variant
@@ -22,6 +24,7 @@ namespace ngraph
 
         virtual std::shared_ptr<ngraph::Variant> init(const std::shared_ptr<ngraph::Node>& node);
         virtual std::shared_ptr<ngraph::Variant> merge(const ngraph::NodeVector& nodes);
+        virtual bool is_copyable() const;
         virtual std::string to_string() { return ""; }
     };
 
@@ -98,4 +101,5 @@ namespace ngraph
     }
 #endif
 
+    using RTMap = std::map<std::string, std::shared_ptr<Variant>>;
 } // namespace ngraph
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/adaptive_max_pool.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/adaptive_max_pool.hpp
index c235a2a4405..7646dbe8f91 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/adaptive_max_pool.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/adaptive_max_pool.hpp
@@ -17,9 +17,8 @@ namespace ngraph
     {
         namespace reference
         {
-            template <typename T>
-            void adaptive_max_pool_1d(
-                const T* arg, T* out, int64_t* indices, size_t h_in, size_t h_out)
+            template <typename T, typename IT>
+            void adaptive_max_pool_1d(const T* arg, T* out, IT* indices, size_t h_in, size_t h_out)
             {
                 for (size_t i = 0; i < h_out; i++)
                 {
@@ -31,10 +30,10 @@ namespace ngraph
                     indices[i] = it - arg;
                 }
             }
-            template <typename T>
+            template <typename T, typename IT>
             void adaptive_max_pool_2d(const T* arg,
                                       T* out,
-                                      int64_t* indices,
+                                      IT* indices,
                                       size_t h_in,
                                       size_t h_out,
                                       size_t w_in,
@@ -63,10 +62,10 @@ namespace ngraph
                     }
                 }
             }
-            template <typename T>
+            template <typename T, typename IT>
             void adaptive_max_pool_3d(const T* arg,
                                       T* out,
-                                      int64_t* indices,
+                                      IT* indices,
                                       size_t d_in,
                                       size_t d_out,
                                       size_t h_in,
@@ -105,10 +104,10 @@ namespace ngraph
                     }
                 }
             }
-            template <typename T>
+            template <typename T, typename IT>
             void adaptive_max_pool(const T* arg,
                                    T* out,
-                                   int64_t* selected_indices,
+                                   IT* selected_indices,
                                    const Shape& arg_shape,
                                    const Shape& out_shape)
             {
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/convolution.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/convolution.hpp
index adee512d975..809cfb23643 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/convolution.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/convolution.hpp
@@ -145,7 +145,7 @@ namespace ngraph
                     }
                 }
 
-                void extend_to_3D(ConvolutionParams& p, Shape& in_shape, Shape& filter_shape)
+                inline void extend_to_3D(ConvolutionParams& p, Shape& in_shape, Shape& filter_shape)
                 {
                     int spatial_rank = in_shape.size() - 2;
                     if (spatial_rank < 3)
@@ -291,42 +291,6 @@ namespace ngraph
                     batch += batch_size;
                 }
             }
-
-            // DEPRECATED, can't be removed currently due to kmb-plugin dependency (#47799)
-            template <typename INPUT,
-                      typename FILTER,
-                      typename OUTPUT,
-                      typename ACCU = typename widen<OUTPUT>::type>
-            void convolution(const INPUT* in,
-                             const FILTER* f,
-                             OUTPUT* out,
-                             const Shape& in_shape,
-                             const Shape& f_shape,
-                             const Shape& out_shape,
-                             const Strides& strides,
-                             const Strides& dilation,
-                             const CoordinateDiff& pads_begin,
-                             const CoordinateDiff& pads_end,
-                             const Strides&)
-
-            {
-                static_assert(std::is_same<INPUT, FILTER>::value,
-                              "input and filter types must be the same");
-                static_assert(std::is_same<INPUT, OUTPUT>::value,
-                              "input and output types must be the same");
-
-                convolution(in,
-                            f,
-                            out,
-                            in_shape,
-                            f_shape,
-                            out_shape,
-                            strides,
-                            dilation,
-                            pads_begin,
-                            pads_end);
-            }
-
         } // namespace reference
     }     // namespace runtime
 } // namespace ngraph
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/convolution_backprop_data.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/convolution_backprop_data.hpp
index 1c755198163..93edbd6acc3 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/convolution_backprop_data.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/convolution_backprop_data.hpp
@@ -125,14 +125,15 @@ namespace ngraph
                     }
                 }
 
-                void validate_convolution_backprop_parameters(const Shape& in_shape,
-                                                              const Shape& f_shape,
-                                                              const Shape& out_shape,
-                                                              const Strides& strides,
-                                                              const Strides& dilations,
-                                                              const CoordinateDiff& pads_begin,
-                                                              const CoordinateDiff& pads_end,
-                                                              const CoordinateDiff& output_padding)
+                inline void
+                    validate_convolution_backprop_parameters(const Shape& in_shape,
+                                                             const Shape& f_shape,
+                                                             const Shape& out_shape,
+                                                             const Strides& strides,
+                                                             const Strides& dilations,
+                                                             const CoordinateDiff& pads_begin,
+                                                             const CoordinateDiff& pads_end,
+                                                             const CoordinateDiff& output_padding)
                 {
                     // this implementation supports 1D, 2D and 3D convolutions
                     NGRAPH_CHECK(in_shape.size() >= 3 && in_shape.size() <= 5,
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder.hpp
index 2e41850ed16..275913c3485 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/ctc_greedy_decoder.hpp
@@ -37,7 +37,7 @@ namespace ngraph
                 // information are set to -1
 
                 std::vector<T> tmp_out(shape_size(out_shape));
-                std::fill(tmp_out.begin(), tmp_out.end(), static_cast<T>(-1.0));
+                std::fill(tmp_out.begin(), tmp_out.end(), static_cast<T>(-1));
 
                 for (unsigned int batch_ind = 0; batch_ind < batch_size; batch_ind++)
                 {
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp
index 2b0c3b7110a..c9c635991a9 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/fake_quantize.hpp
@@ -194,7 +194,7 @@ namespace ngraph
                 }
 
             } // namespace fake_quantize_details
-            namespace v0
+            inline namespace v0
             {
                 template <typename T>
                 void fake_quantize(const T* const arg,
@@ -264,35 +264,6 @@ namespace ngraph
                     }
                 }
             } // namespace v0
-
-            template <typename T>
-            void fake_quantize(const T* const arg,
-                               const T* const in_low,
-                               const T* const in_high,
-                               const T* const out_low,
-                               const T* const out_high,
-                               T* const out,
-                               const Shape& arg_shape,
-                               const Shape& in_low_shape,
-                               const Shape& in_high_shape,
-                               const Shape& out_low_shape,
-                               const Shape& out_high_shape,
-                               size_t levels)
-            {
-                v0::fake_quantize(arg,
-                                  in_low,
-                                  in_high,
-                                  out_low,
-                                  out_high,
-                                  out,
-                                  arg_shape,
-                                  in_low_shape,
-                                  in_high_shape,
-                                  out_low_shape,
-                                  out_high_shape,
-                                  levels,
-                                  op::AutoBroadcastType::NUMPY);
-            }
-        } // namespace reference
-    }     // namespace runtime
+        }     // namespace reference
+    }         // namespace runtime
 } // namespace ngraph
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/logical_reduction.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/logical_reduction.hpp
index f2789da27de..63d384b9afc 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/logical_reduction.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/logical_reduction.hpp
@@ -43,16 +43,6 @@ namespace ngraph
                 }
             }
 
-            NGRAPH_DEPRECATED("Remove when arm plugin supports the new signature")
-            static inline void reduce_logical_and(const char* arg,
-                                                  char* out,
-                                                  const Shape& input_shape,
-                                                  const AxisSet& reduction_axes,
-                                                  bool)
-            {
-                reduce_logical_and(arg, out, input_shape, reduction_axes);
-            }
-
             static inline void reduce_logical_or(const char* arg,
                                                  char* out,
                                                  const Shape& in_shape,
@@ -77,16 +67,6 @@ namespace ngraph
                     out[out_idx] = out[out_idx] || arg[in_idx];
                 }
             }
-
-            NGRAPH_DEPRECATED("Remove when arm plugin supports the new signature")
-            static inline void reduce_logical_or(const char* arg,
-                                                 char* out,
-                                                 const Shape& input_shape,
-                                                 const AxisSet& reduction_axes,
-                                                 bool)
-            {
-                reduce_logical_or(arg, out, input_shape, reduction_axes);
-            }
         } // namespace reference
     }     // namespace runtime
 } // namespace ngraph
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/proposal.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/proposal.hpp
index a8a3e3647d3..0fa61c883f2 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/proposal.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/proposal.hpp
@@ -22,7 +22,7 @@ namespace ngraph
                     T score;
                 };
 
-                static std::vector<float> generate_anchors(const op::ProposalAttrs& attrs,
+                inline std::vector<float> generate_anchors(const op::ProposalAttrs& attrs,
                                                            const unsigned int anchor_count)
                 {
                     std::vector<float> anchors(4 * anchor_count);
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/roi_pooling.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/roi_pooling.hpp
index e37005c3e6a..5574061664c 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/roi_pooling.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/roi_pooling.hpp
@@ -138,14 +138,36 @@ namespace ngraph
                             {
                                 for (int pw = 0; pw < pooled_w; pw++)
                                 {
-                                    T in_y =
-                                        (pooled_h > 1)
-                                            ? (ph * roi_height_scale + roi_h_start * (height - 1))
-                                            : 0.5 * (roi_h_start + roi_h_end) * (height - 1);
-                                    T in_x =
-                                        (pooled_w > 1)
-                                            ? (pw * roi_width_scale + roi_w_start * (width - 1))
-                                            : 0.5 * (roi_w_end + roi_w_start) * (width - 1);
+                                    // because of nonalgebraic character of floating point
+                                    // operation, some proposals can cause violation of inequality:
+                                    // ((end_h - start_h) * (input_h - 1) / (pooled_h - 1)) *
+                                    // (pooled_h - 1)
+                                    // <= (end_h - start_h) * (input_h - 1),
+                                    // and as result excess of right limit for proposal value
+                                    // if the border case (current_h == pooled_h - 1)
+                                    // will not be handled explicitly
+                                    T in_y, in_x;
+                                    if (pooled_h > 1)
+                                    {
+                                        in_y =
+                                            ((ph == pooled_h - 1) ? (height - 1) * roi_h_end
+                                                                  : (ph * roi_height_scale +
+                                                                     roi_h_start * (height - 1)));
+                                    }
+                                    else
+                                    {
+                                        in_y = 0.5 * (roi_h_start + roi_h_end) * (height - 1);
+                                    }
+                                    if (pooled_w > 1)
+                                    {
+                                        in_x = ((pw == pooled_w - 1) ? (width - 1) * roi_w_end
+                                                                     : (pw * roi_width_scale +
+                                                                        roi_w_start * (width - 1)));
+                                    }
+                                    else
+                                    {
+                                        in_x = 0.5 * (roi_w_end + roi_w_start) * (width - 1);
+                                    }
 
                                     const size_t pool_index =
                                         roi_num * channels * pooled_h * pooled_w +
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/tan.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/tan.hpp
index 33dc63e6804..e1368f510e2 100644
--- a/ngraph/core/reference/include/ngraph/runtime/reference/tan.hpp
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/tan.hpp
@@ -13,7 +13,8 @@ namespace ngraph
     {
         namespace reference
         {
-            template <typename T>
+            template <typename T,
+                      typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
             void tan(const T* arg, T* out, size_t count)
             {
                 for (size_t i = 0; i < count; i++)
@@ -21,6 +22,15 @@ namespace ngraph
                     out[i] = std::tan(arg[i]);
                 }
             }
+            template <typename T,
+                      typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
+            void tan(const T* arg, T* out, size_t count)
+            {
+                for (size_t i = 0; i < count; i++)
+                {
+                    out[i] = std::roundl(std::tan(arg[i]));
+                }
+            }
         } // namespace reference
     }     // namespace runtime
 } // namespace ngraph
diff --git a/ngraph/core/src/descriptor/tensor.cpp b/ngraph/core/src/descriptor/tensor.cpp
index 1d8335fee08..f1da2fbdd52 100644
--- a/ngraph/core/src/descriptor/tensor.cpp
+++ b/ngraph/core/src/descriptor/tensor.cpp
@@ -4,7 +4,6 @@
 
 #include "ngraph/descriptor/tensor.hpp"
 #include "ngraph/node.hpp"
-#include "ngraph/runtime/host_tensor.hpp"
 
 using namespace ngraph;
 using namespace std;
@@ -13,9 +12,9 @@ descriptor::Tensor::Tensor(const element::Type& element_type,
                            const PartialShape& pshape,
                            const std::string& name)
     : m_element_type(element_type)
-    , m_shape(pshape.is_static() ? pshape.to_shape() : Shape{})
     , m_partial_shape(pshape)
     , m_name(name)
+    , m_shape_changed(true)
 {
 }
 
@@ -24,10 +23,8 @@ descriptor::Tensor::Tensor(const element::Type& element_type,
                            Node* node,
                            size_t node_output_number)
     : m_element_type(element_type)
-    , m_shape(pshape.is_static() ? pshape.to_shape() : Shape{})
     , m_partial_shape(pshape)
-    , m_node(node)
-    , m_node_output_number(node_output_number)
+    , m_shape_changed(true)
 {
 }
 
@@ -46,14 +43,7 @@ void descriptor::Tensor::set_element_type(const element::Type& element_type)
 void descriptor::Tensor::set_partial_shape(const PartialShape& partial_shape)
 {
     m_partial_shape = partial_shape;
-    if (m_partial_shape.is_static())
-    {
-        m_shape = m_partial_shape.to_shape();
-    }
-    else
-    {
-        m_shape = Shape{};
-    }
+    m_shape_changed = true;
 }
 
 void descriptor::Tensor::invalidate_values()
@@ -82,6 +72,15 @@ const Shape& descriptor::Tensor::get_shape() const
 {
     if (m_partial_shape.is_static())
     {
+        if (m_shape_changed.load(std::memory_order_relaxed))
+        {
+            std::lock_guard<std::mutex> guard(shape_mutex);
+            if (m_shape_changed) // double check after mutex lock
+            {
+                m_shape = m_partial_shape.to_shape();
+                m_shape_changed = false;
+            }
+        }
         return m_shape;
     }
     else
diff --git a/ngraph/core/src/dimension.cpp b/ngraph/core/src/dimension.cpp
index 2941d1ff083..6b86316c740 100644
--- a/ngraph/core/src/dimension.cpp
+++ b/ngraph/core/src/dimension.cpp
@@ -99,12 +99,12 @@ bool Dimension::merge(Dimension& dst, const Dimension d1, const Dimension d2)
 
 bool Dimension::broadcast_merge(Dimension& dst, const Dimension d1, const Dimension d2)
 {
-    if (d1.m_dimension.size() == 1 && d1.m_dimension.get_min_val() == 1)
+    if (d1.m_dimension.get_min_val() == 1 && d1.m_dimension.size() == 1)
     {
         dst = d2;
         return true;
     }
-    if (d2.m_dimension.size() == 1 && d2.m_dimension.get_min_val() == 1)
+    if (d2.m_dimension.get_min_val() == 1 && d2.m_dimension.size() == 1)
     {
         dst = d1;
         return true;
diff --git a/ngraph/core/src/graph_util.cpp b/ngraph/core/src/graph_util.cpp
index 901672723a1..921f54f25da 100644
--- a/ngraph/core/src/graph_util.cpp
+++ b/ngraph/core/src/graph_util.cpp
@@ -20,6 +20,7 @@
 #include "ngraph/op/tensor_iterator.hpp"
 #include "ngraph/op/util/op_types.hpp"
 #include "ngraph/opsets/opset5.hpp"
+#include "ngraph/opsets/opset8.hpp"
 #include "ngraph/pass/manager.hpp"
 #include "ngraph/pass/visualize_tree.hpp"
 #include "ngraph/provenance.hpp"
@@ -405,6 +406,32 @@ std::shared_ptr<ngraph::Function> ngraph::clone_function(const ngraph::Function&
     // clone function operations
     clone_nodes(func.get_ops(), node_map);
 
+    // clone variables
+    auto variables = func.get_variables();
+    VariableVector cloned_vars;
+    std::map<std::string, std::shared_ptr<Variable>> var_map;
+    for (const auto& var : variables)
+    {
+        auto cloned_var = std::make_shared<Variable>(
+            VariableInfo{PartialShape::dynamic(), element::dynamic, var->get_info().variable_id});
+        cloned_vars.push_back(cloned_var);
+        var_map[cloned_var->get_info().variable_id] = cloned_var;
+    }
+    if (!variables.empty())
+    {
+        for (const auto& op : node_map)
+        {
+            if (auto read_val = std::dynamic_pointer_cast<VariableExtension>(op.second))
+            {
+                read_val->set_variable(var_map.at(read_val->get_variable_id()));
+            }
+            else if (auto assign = std::dynamic_pointer_cast<VariableExtension>(op.second))
+            {
+                assign->set_variable(var_map.at(assign->get_variable_id()));
+            }
+        }
+    }
+
     // get cloned function results and sinks and parameters
     ResultVector cloned_results;
     for (shared_ptr<Node> node : func.get_results())
@@ -417,25 +444,25 @@ std::shared_ptr<ngraph::Function> ngraph::clone_function(const ngraph::Function&
         cloned_results.push_back(result);
     }
     SinkVector cloned_sinks;
-    for (auto node : func.get_sinks())
+    for (const auto& node : func.get_sinks())
     {
         cloned_sinks.push_back(static_pointer_cast<op::Sink>(node_map.at(node.get())));
     }
 
     std::vector<std::shared_ptr<op::Parameter>> cloned_params;
-    for (auto param : func.get_parameters())
+    for (const auto& param : func.get_parameters())
     {
         cloned_params.push_back(as_type_ptr<op::Parameter>(node_map.at(param.get())));
     }
 
     // create and return cloned function
-    auto result = std::make_shared<ngraph::Function>(cloned_results, cloned_params);
-    result->set_friendly_name(func.get_friendly_name());
-    result->add_sinks(cloned_sinks);
+    auto result = std::make_shared<ngraph::Function>(
+        cloned_results, cloned_sinks, cloned_params, cloned_vars, func.get_friendly_name());
     return result;
 }
 
-bool ngraph::is_equal_to_const_value(std::string const_value, const Output<Node>& reduce_constant)
+bool ngraph::is_equal_to_const_value(const std::string& const_value,
+                                     const Output<Node>& reduce_constant)
 {
     if (auto rc = as_type_ptr<ngraph::op::Constant>(reduce_constant.get_node_shared_ptr()))
     {
diff --git a/ngraph/core/src/interval.cpp b/ngraph/core/src/interval.cpp
index f02ad332885..ef8a466fa75 100644
--- a/ngraph/core/src/interval.cpp
+++ b/ngraph/core/src/interval.cpp
@@ -6,6 +6,46 @@
 
 using namespace ngraph;
 
+namespace
+{
+    Interval::value_type clip(Interval::value_type value)
+    {
+        return std::max(Interval::value_type(0), std::min(Interval::s_max, value));
+    }
+
+    Interval::value_type clip_times(Interval::value_type a, Interval::value_type b)
+    {
+        if (a == 0 || b == 0)
+        {
+            return 0;
+        }
+        else if (a == Interval::s_max || b == Interval::s_max)
+        {
+            return Interval::s_max;
+        }
+        else
+        {
+            return a * b;
+        }
+    }
+    Interval::value_type clip_add(Interval::value_type a, Interval::value_type b)
+    {
+        return (a == Interval::s_max || b == Interval::s_max) ? Interval::s_max : a + b;
+    }
+    Interval::value_type clip_minus(Interval::value_type a, Interval::value_type b)
+    {
+        if (a <= b)
+        {
+            return 0;
+        }
+        if (a == Interval::s_max)
+        {
+            return Interval::s_max;
+        }
+        return a - b;
+    }
+} // namespace
+
 void Interval::canonicalize()
 {
     if (m_max_val < m_min_val)
@@ -28,22 +68,9 @@ Interval::Interval(value_type min_val, value_type max_val)
 }
 
 Interval::Interval(value_type val)
-    : Interval(val, val)
 {
-}
-
-Interval::size_type Interval::size() const
-{
-    if (m_max_val == s_max)
-    {
-        return m_min_val == s_max ? 0 : s_max;
-    }
-    return m_max_val - m_min_val + 1;
-}
-
-bool Interval::empty() const
-{
-    return m_min_val == s_max;
+    m_min_val = clip(val);
+    m_max_val = m_min_val;
 }
 
 bool Interval::operator==(const Interval& interval) const
@@ -116,55 +143,11 @@ Interval& Interval::operator&=(const Interval& interval)
     return *this = *this & interval;
 }
 
-bool Interval::contains(value_type value) const
-{
-    return m_min_val <= value && value <= m_max_val;
-}
-
 bool Interval::contains(const Interval& interval) const
 {
     return contains(interval.m_min_val) && contains(interval.m_max_val);
 }
 
-Interval::value_type Interval::clip(value_type value)
-{
-    return std::max(value_type(0), std::min(s_max, value));
-}
-
-Interval::value_type Interval::clip_add(value_type a, value_type b)
-{
-    return (a == s_max || b == s_max) ? s_max : a + b;
-}
-
-Interval::value_type Interval::clip_minus(value_type a, value_type b)
-{
-    if (a <= b)
-    {
-        return 0;
-    }
-    if (a == s_max)
-    {
-        return s_max;
-    }
-    return a - b;
-}
-
-Interval::value_type Interval::clip_times(value_type a, value_type b)
-{
-    if (a == 0 || b == 0)
-    {
-        return 0;
-    }
-    else if (a == s_max || b == s_max)
-    {
-        return s_max;
-    }
-    else
-    {
-        return a * b;
-    }
-}
-
 constexpr Interval::value_type Interval::s_max;
 
 namespace ngraph
diff --git a/ngraph/core/src/node.cpp b/ngraph/core/src/node.cpp
index 8d23c8f65bb..d600333e900 100644
--- a/ngraph/core/src/node.cpp
+++ b/ngraph/core/src/node.cpp
@@ -210,7 +210,7 @@ descriptor::Output& Node::get_output_descriptor(size_t position)
             make_shared<descriptor::Tensor>(element::dynamic, PartialShape::dynamic(), this, i);
         m_outputs.emplace_back(this, i, tensor_descriptor);
     }
-    return m_outputs.at(position);
+    return m_outputs[position];
 }
 
 void Node::set_argument(size_t position, const Output<Node>& argument)
diff --git a/ngraph/core/src/node_input.cpp b/ngraph/core/src/node_input.cpp
index 7e8e07ed655..7897e606f5f 100644
--- a/ngraph/core/src/node_input.cpp
+++ b/ngraph/core/src/node_input.cpp
@@ -82,8 +82,6 @@ namespace ngraph
     {
     }
 
-    using RTMap = std::map<std::string, std::shared_ptr<Variant>>;
-
     RTMap& Input<Node>::get_rt_info() { return m_node->m_inputs.at(m_index).get_rt_info(); }
 
     const RTMap& Input<Node>::get_rt_info() const
diff --git a/ngraph/core/src/node_output.cpp b/ngraph/core/src/node_output.cpp
index e64dd472f35..2d1bca20f53 100644
--- a/ngraph/core/src/node_output.cpp
+++ b/ngraph/core/src/node_output.cpp
@@ -79,8 +79,6 @@ namespace ngraph
         replacement.get_tensor_ptr()->set_names(get_tensor_ptr()->get_names());
     }
 
-    using RTMap = std::map<std::string, std::shared_ptr<Variant>>;
-
     RTMap& Output<Node>::get_rt_info() { return m_node->m_outputs.at(m_index).get_rt_info(); }
 
     const RTMap& Output<Node>::get_rt_info() const
diff --git a/ngraph/core/src/op/and.cpp b/ngraph/core/src/op/and.cpp
index 4c81190083b..945b0e1918e 100644
--- a/ngraph/core/src/op/and.cpp
+++ b/ngraph/core/src/op/and.cpp
@@ -7,6 +7,8 @@
 #include "ngraph/runtime/host_tensor.hpp"
 #include "ngraph/runtime/reference/and.hpp"
 
+#include "ngraph/validation_util.hpp"
+
 using namespace std;
 using namespace ngraph;
 
@@ -61,12 +63,6 @@ namespace logand
         switch (arg0->get_element_type())
         {
             NGRAPH_TYPE_CASE(evaluate_logand, boolean, arg0, arg1, out, broadcast_spec);
-            NGRAPH_TYPE_CASE(evaluate_logand, i32, arg0, arg1, out, broadcast_spec);
-            NGRAPH_TYPE_CASE(evaluate_logand, i64, arg0, arg1, out, broadcast_spec);
-            NGRAPH_TYPE_CASE(evaluate_logand, u32, arg0, arg1, out, broadcast_spec);
-            NGRAPH_TYPE_CASE(evaluate_logand, u64, arg0, arg1, out, broadcast_spec);
-            NGRAPH_TYPE_CASE(evaluate_logand, f16, arg0, arg1, out, broadcast_spec);
-            NGRAPH_TYPE_CASE(evaluate_logand, f32, arg0, arg1, out, broadcast_spec);
         default: rc = false; break;
         }
         return rc;
@@ -77,6 +73,7 @@ bool op::v1::LogicalAnd::evaluate(const HostTensorVector& outputs,
                                   const HostTensorVector& inputs) const
 {
     NGRAPH_OP_SCOPE(v1_LogicalAnd_evaluate);
+    NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 2));
     return logand::evaluate_logand(inputs[0], inputs[1], outputs[0], get_autob());
 }
 
@@ -85,13 +82,7 @@ bool op::v1::LogicalAnd::has_evaluate() const
     NGRAPH_OP_SCOPE(v1_LogicalAnd_has_evaluate);
     switch (get_input_element_type(0))
     {
-    case ngraph::element::boolean:
-    case ngraph::element::i32:
-    case ngraph::element::i64:
-    case ngraph::element::u32:
-    case ngraph::element::u64:
-    case ngraph::element::f16:
-    case ngraph::element::f32: return true;
+    case ngraph::element::boolean: return true;
     default: break;
     }
     return false;
diff --git a/ngraph/core/src/op/avg_pool.cpp b/ngraph/core/src/op/avg_pool.cpp
index f0375c54599..7fe28f2e9c3 100644
--- a/ngraph/core/src/op/avg_pool.cpp
+++ b/ngraph/core/src/op/avg_pool.cpp
@@ -147,7 +147,8 @@ void op::v1::AvgPool::validate_and_infer_types()
                                                         m_kernel,
                                                         m_strides,
                                                         !m_exclude_pad,
-                                                        m_rounding_type == op::RoundingType::CEIL)
+                                                        m_rounding_type == op::RoundingType::CEIL,
+                                                        Strides{}) // no dilation of the window
                         : output_shape);
 }
 
diff --git a/ngraph/core/src/op/broadcast.cpp b/ngraph/core/src/op/broadcast.cpp
index 81993720203..1e775be5fa3 100644
--- a/ngraph/core/src/op/broadcast.cpp
+++ b/ngraph/core/src/op/broadcast.cpp
@@ -17,7 +17,7 @@
 using namespace std;
 using namespace ngraph;
 
-constexpr NodeTypeInfo op::v3::Broadcast::type_info;
+NGRAPH_RTTI_DEFINITION(op::v3::Broadcast, "Broadcast", 3, op::util::BroadcastBase);
 
 op::v3::Broadcast::Broadcast(const Output<Node>& arg,
                              const Output<Node>& target_shape,
diff --git a/ngraph/core/src/op/bucketize.cpp b/ngraph/core/src/op/bucketize.cpp
index 4ee5270bab2..fbe7f3ae7f3 100644
--- a/ngraph/core/src/op/bucketize.cpp
+++ b/ngraph/core/src/op/bucketize.cpp
@@ -8,7 +8,7 @@
 using namespace ngraph;
 using namespace std;
 
-constexpr NodeTypeInfo op::v3::Bucketize::type_info;
+NGRAPH_RTTI_DEFINITION(op::v3::Bucketize, "Bucketize", 3);
 
 op::v3::Bucketize::Bucketize(const Output<Node>& data,
                              const Output<Node>& buckets,
diff --git a/ngraph/core/src/op/constant.cpp b/ngraph/core/src/op/constant.cpp
index 1e638896a2f..51e64f1cd21 100644
--- a/ngraph/core/src/op/constant.cpp
+++ b/ngraph/core/src/op/constant.cpp
@@ -41,10 +41,24 @@ static inline string to_cpp_string(T value)
 NGRAPH_RTTI_DEFINITION(op::Constant, "Constant", 0);
 
 op::Constant::Constant(const shared_ptr<runtime::Tensor>& tensor)
-    : Constant(tensor->get_element_type(), tensor->get_shape())
 {
-    tensor->read(get_data_ptr_nc(), tensor->get_size_in_bytes());
-    m_all_elements_bitwise_identical = are_all_data_elements_bitwise_identical();
+    m_element_type = tensor->get_element_type();
+    m_shape = tensor->get_shape();
+    // Share data from HostTensor if we work with it
+    // And copy data in other cas
+    if (auto hostTensor = std::dynamic_pointer_cast<runtime::HostTensor>(tensor))
+    {
+        m_data = make_shared<runtime::SharedBuffer<std::shared_ptr<runtime::Tensor>>>(
+            static_cast<char*>(hostTensor->get_data_ptr()), tensor->get_size_in_bytes(), tensor);
+    }
+    else
+    {
+        constructor_validate_and_infer_types();
+        allocate_buffer();
+        tensor->read(get_data_ptr_nc(), tensor->get_size_in_bytes());
+        m_all_elements_bitwise_identical = are_all_data_elements_bitwise_identical();
+    }
+    constructor_validate_and_infer_types();
 }
 
 op::Constant::Constant(const element::Type& type,
@@ -148,6 +162,18 @@ op::Constant::Constant(const Constant& other)
     constructor_validate_and_infer_types();
 }
 
+op::Constant::Constant(const Constant& other, const Shape& new_shape)
+{
+    NGRAPH_CHECK(shape_size(other.m_shape) == shape_size(new_shape),
+                 "Shape size " + std::to_string(shape_size(new_shape)) + " is not equal to " +
+                     std::to_string(shape_size(other.m_shape)));
+    m_element_type = other.m_element_type;
+    m_shape = new_shape;
+    m_data = other.m_data;
+    m_all_elements_bitwise_identical = other.m_all_elements_bitwise_identical;
+    constructor_validate_and_infer_types();
+}
+
 op::Constant::~Constant() {}
 
 string op::Constant::convert_value_to_string(size_t index) const
diff --git a/ngraph/core/src/op/cum_sum.cpp b/ngraph/core/src/op/cum_sum.cpp
index f8a7286eb86..00ad8c631ab 100644
--- a/ngraph/core/src/op/cum_sum.cpp
+++ b/ngraph/core/src/op/cum_sum.cpp
@@ -12,7 +12,7 @@
 using namespace std;
 using namespace ngraph;
 
-constexpr NodeTypeInfo op::v0::CumSum::type_info;
+NGRAPH_RTTI_DEFINITION(op::v0::CumSum, "CumSum", 0);
 
 op::v0::CumSum::CumSum(const Output<Node>& arg,
                        const Output<Node>& axis,
diff --git a/ngraph/core/src/op/equal.cpp b/ngraph/core/src/op/equal.cpp
index d70abc1537c..e9f8b57ec55 100644
--- a/ngraph/core/src/op/equal.cpp
+++ b/ngraph/core/src/op/equal.cpp
@@ -51,7 +51,7 @@ namespace equal
 
 //------------------------------- v1 -------------------------------------------
 
-NGRAPH_RTTI_DEFINITION(op::v1::Equal, "Equal", 1);
+NGRAPH_RTTI_DEFINITION(op::v1::Equal, "Equal", 1, op::util::BinaryElementwiseComparison);
 
 op::v1::Equal::Equal(const Output<Node>& arg0,
                      const Output<Node>& arg1,
@@ -94,5 +94,6 @@ bool op::v1::Equal::has_evaluate() const
 bool op::v1::Equal::visit_attributes(AttributeVisitor& visitor)
 {
     NGRAPH_OP_SCOPE(v1_Equal_visit_attributes);
+    BinaryElementwiseComparison::visit_attributes(visitor);
     return true;
 }
diff --git a/ngraph/core/src/op/erf.cpp b/ngraph/core/src/op/erf.cpp
index 90f64dbe9df..3d56d1e0b0a 100644
--- a/ngraph/core/src/op/erf.cpp
+++ b/ngraph/core/src/op/erf.cpp
@@ -13,7 +13,7 @@
 using namespace std;
 using namespace ngraph;
 
-constexpr NodeTypeInfo op::Erf::type_info;
+NGRAPH_RTTI_DEFINITION(op::v0::Erf, "Erf", 0, util::UnaryElementwiseArithmetic);
 
 bool ngraph::op::v0::Erf::visit_attributes(AttributeVisitor& visitor)
 {
@@ -51,7 +51,6 @@ namespace erfop
 
         switch (arg0->get_element_type())
         {
-            NGRAPH_TYPE_CASE(evaluate_erf, boolean, arg0, out, count);
             NGRAPH_TYPE_CASE(evaluate_erf, i32, arg0, out, count);
             NGRAPH_TYPE_CASE(evaluate_erf, i64, arg0, out, count);
             NGRAPH_TYPE_CASE(evaluate_erf, u32, arg0, out, count);
@@ -75,7 +74,6 @@ bool op::Erf::has_evaluate() const
     NGRAPH_OP_SCOPE(v0_Erf_has_evaluate);
     switch (get_input_element_type(0))
     {
-    case ngraph::element::boolean:
     case ngraph::element::i32:
     case ngraph::element::i64:
     case ngraph::element::u32:
diff --git a/ngraph/core/src/op/floor_mod.cpp b/ngraph/core/src/op/floor_mod.cpp
index 3ccb7a29524..23b84098146 100644
--- a/ngraph/core/src/op/floor_mod.cpp
+++ b/ngraph/core/src/op/floor_mod.cpp
@@ -10,7 +10,7 @@
 using namespace std;
 using namespace ngraph;
 
-constexpr NodeTypeInfo op::v1::FloorMod::type_info;
+NGRAPH_RTTI_DEFINITION(op::v1::FloorMod, "FloorMod", 1, op::util::BinaryElementwiseArithmetic);
 
 op::v1::FloorMod::FloorMod(const Output<Node>& arg0,
                            const Output<Node>& arg1,
@@ -97,5 +97,6 @@ bool op::v1::FloorMod::has_evaluate() const
 bool op::v1::FloorMod::visit_attributes(AttributeVisitor& visitor)
 {
     NGRAPH_OP_SCOPE(v1_FloorMod_visit_attributes);
+    BinaryElementwiseArithmetic::visit_attributes(visitor);
     return true;
 }
diff --git a/ngraph/core/src/op/greater.cpp b/ngraph/core/src/op/greater.cpp
index bbc28493346..fc3333472fb 100644
--- a/ngraph/core/src/op/greater.cpp
+++ b/ngraph/core/src/op/greater.cpp
@@ -51,7 +51,7 @@ namespace greaterop
 
 //-------------------------------------- v1 ------------------------------------
 
-NGRAPH_RTTI_DEFINITION(op::v1::Greater, "Greater", 1);
+NGRAPH_RTTI_DEFINITION(op::v1::Greater, "Greater", 1, op::util::BinaryElementwiseComparison);
 
 op::v1::Greater::Greater(const Output<Node>& arg0,
                          const Output<Node>& arg1,
diff --git a/ngraph/core/src/op/greater_eq.cpp b/ngraph/core/src/op/greater_eq.cpp
index 3db1d4155a5..11c099dcc5d 100644
--- a/ngraph/core/src/op/greater_eq.cpp
+++ b/ngraph/core/src/op/greater_eq.cpp
@@ -51,7 +51,10 @@ namespace greater_equalop
 
 //---------------------------------- v1 ----------------------------------------
 
-NGRAPH_RTTI_DEFINITION(op::v1::GreaterEqual, "GreaterEqual", 1);
+NGRAPH_RTTI_DEFINITION(op::v1::GreaterEqual,
+                       "GreaterEqual",
+                       1,
+                       op::util::BinaryElementwiseComparison);
 
 op::v1::GreaterEqual::GreaterEqual(const Output<Node>& arg0,
                                    const Output<Node>& arg1,
@@ -95,5 +98,6 @@ bool op::v1::GreaterEqual::has_evaluate() const
 bool op::v1::GreaterEqual::visit_attributes(AttributeVisitor& visitor)
 {
     NGRAPH_OP_SCOPE(v1_GreaterEqual_visit_attributes);
+    BinaryElementwiseComparison::visit_attributes(visitor);
     return true;
 }
diff --git a/ngraph/core/src/op/less.cpp b/ngraph/core/src/op/less.cpp
index d9b4e8dfeb9..af0131f0d8b 100644
--- a/ngraph/core/src/op/less.cpp
+++ b/ngraph/core/src/op/less.cpp
@@ -51,7 +51,7 @@ namespace lessop
 
 // ----------------------------- v1 --------------------------------------------
 
-NGRAPH_RTTI_DEFINITION(op::v1::Less, "Less", 1);
+NGRAPH_RTTI_DEFINITION(op::v1::Less, "Less", 1, op::util::BinaryElementwiseComparison);
 
 op::v1::Less::Less(const Output<Node>& arg0,
                    const Output<Node>& arg1,
diff --git a/ngraph/core/src/op/less_eq.cpp b/ngraph/core/src/op/less_eq.cpp
index 3528090de46..9e00e738929 100644
--- a/ngraph/core/src/op/less_eq.cpp
+++ b/ngraph/core/src/op/less_eq.cpp
@@ -12,7 +12,7 @@ using namespace ngraph;
 
 // ---------------------------------- v1 ---------------------------------------
 
-NGRAPH_RTTI_DEFINITION(op::v1::LessEqual, "LessEqual", 1);
+NGRAPH_RTTI_DEFINITION(op::v1::LessEqual, "LessEqual", 1, op::util::BinaryElementwiseComparison);
 
 op::v1::LessEqual::LessEqual(const Output<Node>& arg0,
                              const Output<Node>& arg1,
diff --git a/ngraph/core/src/op/loop.cpp b/ngraph/core/src/op/loop.cpp
index b7cc4128810..2e868f97dc8 100644
--- a/ngraph/core/src/op/loop.cpp
+++ b/ngraph/core/src/op/loop.cpp
@@ -8,7 +8,6 @@
 #include "ngraph/factory.hpp"
 #include "ngraph/graph_util.hpp"
 #include "ngraph/opsets/opset5.hpp"
-#include "ngraph/specialize_function.hpp"
 
 #include "ngraph/runtime/reference/loop.hpp"
 
diff --git a/ngraph/core/src/op/matrix_nms.cpp b/ngraph/core/src/op/matrix_nms.cpp
index 7d3731f3b11..3cac8707883 100644
--- a/ngraph/core/src/op/matrix_nms.cpp
+++ b/ngraph/core/src/op/matrix_nms.cpp
@@ -74,7 +74,8 @@ bool ngraph::op::v8::MatrixNms::visit_attributes(AttributeVisitor& visitor)
 namespace ngraph
 {
     template <>
-    EnumNames<op::v8::MatrixNms::DecayFunction>& EnumNames<op::v8::MatrixNms::DecayFunction>::get()
+    NGRAPH_API EnumNames<op::v8::MatrixNms::DecayFunction>&
+        EnumNames<op::v8::MatrixNms::DecayFunction>::get()
     {
         static auto enum_names = EnumNames<op::v8::MatrixNms::DecayFunction>(
             "op::v8::MatrixNms::DecayFunction",
diff --git a/ngraph/core/src/op/max_pool.cpp b/ngraph/core/src/op/max_pool.cpp
index eb41510dabb..680cac142bb 100644
--- a/ngraph/core/src/op/max_pool.cpp
+++ b/ngraph/core/src/op/max_pool.cpp
@@ -14,44 +14,16 @@
 using namespace std;
 using namespace ngraph;
 
-bool op::v1::MaxPool::update_auto_padding(const PartialShape& in_shape,
-                                          Shape& new_pads_end,
-                                          Shape& new_pads_begin) const
-{
-    bool update_auto_padding_succeed = true;
-    if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER)
-    {
-        CoordinateDiff pads_end, pads_begin;
-        update_auto_padding_succeed =
-            try_apply_auto_padding(in_shape,
-                                   m_kernel,
-                                   m_strides,
-                                   Strides(m_kernel.size(), 1), // No dilation
-                                   m_auto_pad,
-                                   pads_end,
-                                   pads_begin);
-        new_pads_end = Shape(pads_end.begin(), pads_end.end());
-        new_pads_begin = Shape(pads_begin.begin(), pads_begin.end());
-    }
-    return update_auto_padding_succeed;
-}
-
-NGRAPH_RTTI_DEFINITION(op::v1::MaxPool, "MaxPool", 1);
+NGRAPH_RTTI_DEFINITION(op::v1::MaxPool, "MaxPool", 1, op::util::MaxPoolBase);
 
 op::v1::MaxPool::MaxPool(const Output<Node>& arg,
                          const Strides& strides,
                          const Shape& pads_begin,
                          const Shape& pads_end,
                          const Shape& kernel,
-                         op::RoundingType rounding_type,
-                         const PadType& auto_pad)
-    : Op({arg})
-    , m_kernel(kernel)
-    , m_strides(strides)
-    , m_pads_begin(pads_begin)
-    , m_pads_end(pads_end)
-    , m_auto_pad(auto_pad)
-    , m_rounding_type(rounding_type)
+                         const op::RoundingType rounding_type,
+                         const PadType auto_pad)
+    : op::util::MaxPoolBase(arg, strides, pads_begin, pads_end, kernel, rounding_type, auto_pad)
 {
     constructor_validate_and_infer_types();
 }
@@ -71,96 +43,13 @@ bool ngraph::op::v1::MaxPool::visit_attributes(AttributeVisitor& visitor)
 void op::v1::MaxPool::validate_and_infer_types()
 {
     NGRAPH_OP_SCOPE(v1_MaxPool_validate_and_infer_types);
-    if (0 == m_strides.size())
-    {
-        m_strides = Strides(m_kernel.size(), 1);
-    }
 
-    if (0 == m_pads_begin.size())
-    {
-        m_pads_begin = Shape(m_kernel.size(), 0);
-    }
+    MaxPoolBase::validate_and_infer_types();
 
-    if (0 == m_pads_end.size())
-    {
-        m_pads_end = Shape(m_kernel.size(), 0);
-    }
+    const PartialShape output_shape =
+        infer_output_shape(Strides{}); // no dilations of the filter window
 
-    const PartialShape& arg_shape = get_input_partial_shape(0);
-
-    NODE_VALIDATION_CHECK(this,
-                          arg_shape.rank().compatible(3) || arg_shape.rank().compatible(4) ||
-                              arg_shape.rank().compatible(5),
-                          "Expected a 3D, 4D or 5D tensor for the input. Got: ",
-                          arg_shape);
-
-    if (arg_shape.rank().is_static())
-    {
-        NODE_VALIDATION_CHECK(this,
-                              static_cast<int64_t>(m_pads_end.size()) ==
-                                  arg_shape.rank().get_max_length() - 2,
-                              "Expected pads_end size to be equal to input size - 2. Got: ",
-                              m_pads_end.size());
-
-        NODE_VALIDATION_CHECK(this,
-                              static_cast<int64_t>(m_pads_begin.size()) ==
-                                  arg_shape.rank().get_max_length() - 2,
-                              "Expected pads_begin size to be equal to input size - 2. Got: ",
-                              m_pads_begin.size());
-        NODE_VALIDATION_CHECK(this,
-                              static_cast<int64_t>(m_kernel.size()) ==
-                                  arg_shape.rank().get_max_length() - 2,
-                              "Expected kernel size to be equal to input size - 2. Got: ",
-                              m_kernel.size());
-        NODE_VALIDATION_CHECK(this,
-                              static_cast<int64_t>(m_pads_end.size()) ==
-                                  arg_shape.rank().get_max_length() - 2,
-                              "Expected strides size to be equal to input size - 2. Got: ",
-                              m_strides.size());
-    }
-
-    auto output_shape = PartialShape::dynamic();
-    if (arg_shape.rank().is_static())
-    {
-        output_shape =
-            std::vector<Dimension>(arg_shape.rank().get_max_length(), Dimension::dynamic());
-        if (arg_shape[0].is_static())
-        {
-            output_shape[0] = arg_shape[0]; // batch size
-        }
-        if (arg_shape[1].is_static())
-        {
-            output_shape[1] = arg_shape[1]; // channel size
-        }
-    }
-
-    bool update_auto_padding_succeed = true;
-    if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER)
-    {
-        update_auto_padding_succeed = update_auto_padding(arg_shape, m_pads_end, m_pads_begin);
-    }
-    if (m_auto_pad == PadType::VALID)
-    {
-        m_pads_end = Shape(m_pads_end.size(), 0);
-        m_pads_begin = Shape(m_pads_begin.size(), 0);
-    }
-    // infer_batched_forward_pooling wants CoordinateDiffs for these, while the pooling ops for
-    // now still take Shape (no negative padding).
-    CoordinateDiff pads_begin(m_pads_begin.begin(), m_pads_begin.end());
-    CoordinateDiff pads_end(m_pads_end.begin(), m_pads_end.end());
-
-    set_output_type(0,
-                    get_input_element_type(0),
-                    update_auto_padding_succeed
-                        ? infer_batched_pooling_forward(this,
-                                                        arg_shape,
-                                                        pads_begin,
-                                                        pads_end,
-                                                        m_kernel,
-                                                        m_strides,
-                                                        true,
-                                                        m_rounding_type == op::RoundingType::CEIL)
-                        : output_shape);
+    set_output_type(0, get_input_element_type(0), output_shape);
 }
 
 shared_ptr<Node> op::v1::MaxPool::clone_with_new_inputs(const OutputVector& new_args) const
@@ -237,7 +126,7 @@ bool op::v1::MaxPool::evaluate_maxpool(const HostTensorVector& outputs,
     auto arg_shape = inputs[0]->get_partial_shape();
     auto pads_begin_s = get_pads_begin();
     auto pads_end_s = get_pads_end();
-    update_auto_padding(arg_shape, pads_begin_s, pads_end_s);
+    update_auto_padding(arg_shape, Strides(m_kernel.size(), 1), pads_begin_s, pads_end_s);
     CoordinateDiff pads_begin(pads_begin_s.begin(), pads_begin_s.end());
     CoordinateDiff pads_end(pads_end_s.begin(), pads_end_s.end());
     auto out_shape = infer_batched_pooling_forward(this,
@@ -247,7 +136,8 @@ bool op::v1::MaxPool::evaluate_maxpool(const HostTensorVector& outputs,
                                                    get_kernel(),
                                                    get_strides(),
                                                    true,
-                                                   get_rounding_type() == op::RoundingType::CEIL);
+                                                   get_rounding_type() == op::RoundingType::CEIL,
+                                                   Strides{}); // no dilation of the window
 
     return maxpool::evaluate_maxpool(inputs[0],
                                      outputs[0],
@@ -266,7 +156,7 @@ bool op::v1::MaxPool::evaluate(const HostTensorVector& outputs,
 
 bool op::v1::MaxPool::has_evaluate() const
 {
-    NGRAPH_OP_SCOPE(v0_Log_has_evaluate);
+    NGRAPH_OP_SCOPE(v1_MaxPool_has_evaluate);
     switch (get_input_element_type(0))
     {
     case ngraph::element::i32:
@@ -279,3 +169,78 @@ bool op::v1::MaxPool::has_evaluate() const
     }
     return false;
 }
+
+// ------------------------------ V8 ------------------------------
+
+NGRAPH_RTTI_DEFINITION(op::v8::MaxPool, "MaxPool", 8, op::util::MaxPoolBase);
+
+op::v8::MaxPool::MaxPool(const Output<Node>& arg,
+                         const Strides& strides,
+                         const Strides& dilations,
+                         const Shape& pads_begin,
+                         const Shape& pads_end,
+                         const Shape& kernel,
+                         const op::RoundingType rounding_type,
+                         const PadType auto_pad,
+                         const element::Type index_element_type,
+                         const int64_t axis,
+                         const float pads_value)
+    : op::util::MaxPoolBase(arg, strides, pads_begin, pads_end, kernel, rounding_type, auto_pad)
+    , m_dilations{dilations}
+    , m_index_element_type{index_element_type}
+    , m_axis{axis}
+    , m_pads_value{pads_value}
+{
+    constructor_validate_and_infer_types();
+}
+
+bool ngraph::op::v8::MaxPool::visit_attributes(AttributeVisitor& visitor)
+{
+    NGRAPH_OP_SCOPE(v8_MaxPool_visit_attributes);
+    visitor.on_attribute("strides", m_strides);
+    visitor.on_attribute("dilations", m_dilations);
+    visitor.on_attribute("pads_begin", m_pads_begin);
+    visitor.on_attribute("pads_end", m_pads_end);
+    visitor.on_attribute("kernel", m_kernel);
+    visitor.on_attribute("rounding_type", m_rounding_type);
+    visitor.on_attribute("auto_pad", m_auto_pad);
+    visitor.on_attribute("index_element_type", m_index_element_type);
+    visitor.on_attribute("axis", m_axis);
+    visitor.on_attribute("pads_value", m_pads_value);
+    return true;
+}
+
+void op::v8::MaxPool::validate_and_infer_types()
+{
+    NGRAPH_OP_SCOPE(v8_MaxPool_validate_and_infer_types);
+
+    MaxPoolBase::validate_and_infer_types();
+
+    const auto input_shape = get_input_partial_shape(0);
+    if (input_shape.rank().is_static())
+    {
+        m_axis = ngraph::normalize_axis(this, m_axis, input_shape.rank());
+    }
+
+    const PartialShape output_shape = infer_output_shape(m_dilations);
+
+    set_output_type(0, get_input_element_type(0), output_shape);
+    set_output_type(1, m_index_element_type, output_shape);
+}
+
+shared_ptr<Node> op::v8::MaxPool::clone_with_new_inputs(const OutputVector& new_args) const
+{
+    NGRAPH_OP_SCOPE(v8_MaxPool_clone_with_new_inputs);
+    check_new_args_count(this, new_args);
+    return make_shared<v8::MaxPool>(new_args.at(0),
+                                    m_strides,
+                                    m_dilations,
+                                    m_pads_begin,
+                                    m_pads_end,
+                                    m_kernel,
+                                    m_rounding_type,
+                                    m_auto_pad,
+                                    m_index_element_type,
+                                    m_axis,
+                                    m_pads_value);
+}
diff --git a/ngraph/core/src/op/maximum.cpp b/ngraph/core/src/op/maximum.cpp
index 0733759c2b5..7d06d67b356 100644
--- a/ngraph/core/src/op/maximum.cpp
+++ b/ngraph/core/src/op/maximum.cpp
@@ -58,7 +58,7 @@ namespace maximumop
 
 // ------------------------------------ v1 -------------------------------------
 
-constexpr NodeTypeInfo op::v1::Maximum::type_info;
+NGRAPH_RTTI_DEFINITION(op::v1::Maximum, "Maximum", 1, op::util::BinaryElementwiseArithmetic);
 
 op::v1::Maximum::Maximum(const Output<Node>& arg0,
                          const Output<Node>& arg1,
diff --git a/ngraph/core/src/op/minimum.cpp b/ngraph/core/src/op/minimum.cpp
index bd0ff3f79f6..cfa7abeffaf 100644
--- a/ngraph/core/src/op/minimum.cpp
+++ b/ngraph/core/src/op/minimum.cpp
@@ -56,7 +56,7 @@ namespace minimumop
 
 // ------------------------------ v1 -------------------------------------------
 
-constexpr NodeTypeInfo op::v1::Minimum::type_info;
+NGRAPH_RTTI_DEFINITION(op::v1::Minimum, "Minimum", 1, op::util::BinaryElementwiseArithmetic);
 
 op::v1::Minimum::Minimum(const Output<Node>& arg0,
                          const Output<Node>& arg1,
diff --git a/ngraph/core/src/op/mod.cpp b/ngraph/core/src/op/mod.cpp
index 8f3703c829f..12b323e6ebd 100644
--- a/ngraph/core/src/op/mod.cpp
+++ b/ngraph/core/src/op/mod.cpp
@@ -10,7 +10,7 @@ using namespace ngraph;
 
 // ------------------------------ v1 -------------------------------------------
 
-constexpr NodeTypeInfo op::v1::Mod::type_info;
+NGRAPH_RTTI_DEFINITION(op::v1::Mod, "Mod", 1, op::util::BinaryElementwiseArithmetic);
 
 op::v1::Mod::Mod(const Output<Node>& arg0,
                  const Output<Node>& arg1,
diff --git a/ngraph/core/src/op/not_equal.cpp b/ngraph/core/src/op/not_equal.cpp
index a53ea2ee74f..1e079c36163 100644
--- a/ngraph/core/src/op/not_equal.cpp
+++ b/ngraph/core/src/op/not_equal.cpp
@@ -51,7 +51,7 @@ namespace not_equalop
 
 // ----------------------------------- v1 --------------------------------------
 
-NGRAPH_RTTI_DEFINITION(op::v1::NotEqual, "NotEqual", 1);
+NGRAPH_RTTI_DEFINITION(op::v1::NotEqual, "NotEqual", 1, op::util::BinaryElementwiseComparison);
 
 op::v1::NotEqual::NotEqual(const Output<Node>& arg0,
                            const Output<Node>& arg1,
@@ -95,5 +95,6 @@ bool op::v1::NotEqual::has_evaluate() const
 bool op::v1::NotEqual::visit_attributes(AttributeVisitor& visitor)
 {
     NGRAPH_OP_SCOPE(v1_NotEqual_visit_attributes);
+    BinaryElementwiseComparison::visit_attributes(visitor);
     return true;
 }
diff --git a/ngraph/core/src/op/power.cpp b/ngraph/core/src/op/power.cpp
index 10695c755b5..1a52c959cd8 100644
--- a/ngraph/core/src/op/power.cpp
+++ b/ngraph/core/src/op/power.cpp
@@ -54,7 +54,7 @@ namespace power
 
 // ------------------------------ v1 -------------------------------------------
 
-constexpr NodeTypeInfo op::v1::Power::type_info;
+NGRAPH_RTTI_DEFINITION(op::v1::Power, "Power", 1, op::util::BinaryElementwiseArithmetic);
 
 op::v1::Power::Power(const Output<Node>& arg0,
                      const Output<Node>& arg1,
diff --git a/ngraph/core/src/op/random_uniform.cpp b/ngraph/core/src/op/random_uniform.cpp
new file mode 100644
index 00000000000..6c037071d43
--- /dev/null
+++ b/ngraph/core/src/op/random_uniform.cpp
@@ -0,0 +1,144 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph/op/random_uniform.hpp"
+#include <ngraph/validation_util.hpp>
+#include "itt.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(op::v8::RandomUniform, "RandomUniform", 8);
+
+op::v8::RandomUniform::RandomUniform(const Output<Node>& out_shape,
+                                     const Output<Node>& min_val,
+                                     const Output<Node>& max_val,
+                                     const ngraph::element::Type& out_type,
+                                     uint64_t global_seed,
+                                     uint64_t op_seed)
+    : Op({out_shape, min_val, max_val})
+    , m_output_type(out_type)
+    , m_global_seed(global_seed)
+    , m_op_seed(op_seed)
+{
+    constructor_validate_and_infer_types();
+}
+
+void op::v8::RandomUniform::validate_and_infer_types()
+{
+    NGRAPH_OP_SCOPE(v8_RandomUniform_validate_and_infer_types);
+
+    const auto& shape_et = get_input_element_type(0);
+    NODE_VALIDATION_CHECK(this,
+                          shape_et.is_dynamic() || shape_et == element::i32 ||
+                              shape_et == element::i64,
+                          "Type of the input should be int32 or int64.");
+
+    PartialShape output_shape = PartialShape::dynamic();
+    const auto& input_shape = get_input_partial_shape(0);
+    if (input_shape.rank().is_static())
+    {
+        NODE_VALIDATION_CHECK(this,
+                              input_shape.rank() == 1,
+                              "The rank of the tensor defining output shape must be equal to 1.");
+        if (const auto& const_shape = get_constant_from_source(input_value(0)))
+        {
+            output_shape = PartialShape(const_shape->cast_vector<int64_t>());
+        }
+    }
+
+    const auto& min_pshape = get_input_partial_shape(1);
+    const auto& max_pshape = get_input_partial_shape(2);
+    if (min_pshape.is_static())
+    {
+        const auto& min_rank = min_pshape.rank().get_length();
+        NODE_VALIDATION_CHECK(this, min_rank <= 1, "Min value must be a scalar or 1D tensor.");
+
+        if (min_rank == 1)
+        {
+            NODE_VALIDATION_CHECK(
+                this, min_pshape.compatible(Shape{1}), "'min_val' should have 1 element.");
+        }
+    }
+
+    if (max_pshape.is_static())
+    {
+        const auto& max_rank = max_pshape.rank().get_length();
+        NODE_VALIDATION_CHECK(this, max_rank <= 1, "Max value must be a scalar or 1D tensor.");
+
+        if (max_rank == 1)
+        {
+            NODE_VALIDATION_CHECK(
+                this, max_pshape.compatible(Shape{1}), "'max_val' should have 1 element.");
+        }
+    }
+
+    const element::Type& min_element_type = get_input_element_type(1);
+    element::Type max_element_type = get_input_element_type(2);
+    NODE_VALIDATION_CHECK(this,
+                          min_element_type == max_element_type,
+                          "'min_val' should have the same type as 'max_val'.");
+    NODE_VALIDATION_CHECK(
+        this,
+        min_element_type == get_out_type(),
+        "'min_val' and 'max_val' should have the same type as 'out_type' attribute.");
+
+    if (const auto& const_min = get_constant_from_source(input_value(1)))
+    {
+        if (const auto& const_max = get_constant_from_source(input_value(2)))
+        {
+            if (get_out_type() == ngraph::element::Type_t::i64 ||
+                get_out_type() == ngraph::element::Type_t::i32)
+            {
+                int64_t min_val = const_min->cast_vector<int64_t>()[0];
+                int64_t max_val = const_max->cast_vector<int64_t>()[0];
+
+                NODE_VALIDATION_CHECK(this,
+                                      min_val < max_val,
+                                      "Min value must be less than max value. Got "
+                                      "min value: ",
+                                      min_val,
+                                      ", max value: ",
+                                      max_val);
+            }
+            else if (get_out_type().is_real())
+            {
+                double min_val = const_min->cast_vector<double>()[0];
+                double max_val = const_max->cast_vector<double>()[0];
+
+                NODE_VALIDATION_CHECK(this,
+                                      min_val < max_val,
+                                      "Min value must be less than max value. Got "
+                                      "min value: ",
+                                      min_val,
+                                      ", max value: ",
+                                      max_val);
+            }
+            else
+            {
+                throw ngraph_error("Unsupported output type of RandomUniform: " +
+                                   get_out_type().get_type_name());
+            }
+        }
+    }
+
+    set_output_type(0, get_out_type(), output_shape);
+}
+
+bool op::v8::RandomUniform::visit_attributes(AttributeVisitor& visitor)
+{
+    NGRAPH_OP_SCOPE(v8_RandomUniform_visit_attributes);
+    visitor.on_attribute("output_type", m_output_type);
+    visitor.on_attribute("op_seed", m_op_seed);
+    visitor.on_attribute("global_seed", m_global_seed);
+    return true;
+}
+
+shared_ptr<Node> op::v8::RandomUniform::clone_with_new_inputs(const OutputVector& new_args) const
+{
+    NGRAPH_OP_SCOPE(v8_Roll_clone_with_new_inputs);
+    check_new_args_count(this, new_args);
+    return make_shared<v8::RandomUniform>(
+        new_args[0], new_args[1], new_args[2], m_output_type, m_global_seed, m_op_seed);
+}
diff --git a/ngraph/core/src/op/reshape.cpp b/ngraph/core/src/op/reshape.cpp
index 2e95da16dad..b50681fb40a 100644
--- a/ngraph/core/src/op/reshape.cpp
+++ b/ngraph/core/src/op/reshape.cpp
@@ -241,19 +241,7 @@ bool op::v1::Reshape::constant_fold(OutputVector& output_values, const OutputVec
     if (auto data_const =
             std::dynamic_pointer_cast<op::Constant>(inputs_values[0].get_node_shared_ptr()))
     {
-        // In case if data constant has single consumer we can change it shape without making a copy
-        // Otherwise we create Constant copy with shape from reshape node
-        if (data_const->output(0).get_target_inputs().size() == 1)
-        {
-            data_const->set_data_shape(shape);
-            data_const->validate_and_infer_types();
-            output_values[0] = data_const;
-        }
-        else
-        {
-            output_values[0] = std::make_shared<op::Constant>(
-                data_const->get_element_type(), shape, data_const->get_data_ptr());
-        }
+        output_values[0] = std::make_shared<op::Constant>(*data_const, shape);
         return true;
     }
     return false;
diff --git a/ngraph/core/src/op/roi_pooling.cpp b/ngraph/core/src/op/roi_pooling.cpp
index 2aac3d9f786..3d287a6581c 100644
--- a/ngraph/core/src/op/roi_pooling.cpp
+++ b/ngraph/core/src/op/roi_pooling.cpp
@@ -8,7 +8,7 @@
 using namespace std;
 using namespace ngraph;
 
-constexpr NodeTypeInfo op::ROIPooling::type_info;
+NGRAPH_RTTI_DEFINITION(op::ROIPooling, "ROIPooling", 0);
 
 op::ROIPooling::ROIPooling(const Output<Node>& input,
                            const Output<Node>& coords,
diff --git a/ngraph/core/src/op/squeeze.cpp b/ngraph/core/src/op/squeeze.cpp
index 3b4732f8729..b5a2c1876bc 100644
--- a/ngraph/core/src/op/squeeze.cpp
+++ b/ngraph/core/src/op/squeeze.cpp
@@ -327,19 +327,7 @@ bool op::v0::Squeeze::constant_fold(OutputVector& output_values, const OutputVec
     if (auto data_const =
             std::dynamic_pointer_cast<op::Constant>(inputs_values[0].get_node_shared_ptr()))
     {
-        // In case if data constant has single consumer we can change it shape without making a copy
-        // Otherwise we create Constant copy with shape from squeeze node
-        if (data_const->output(0).get_target_inputs().size() == 1)
-        {
-            data_const->set_data_shape(shape);
-            data_const->validate_and_infer_types();
-            output_values[0] = data_const;
-        }
-        else
-        {
-            output_values[0] = std::make_shared<op::Constant>(
-                data_const->get_element_type(), shape, data_const->get_data_ptr());
-        }
+        output_values[0] = std::make_shared<op::Constant>(*data_const, shape);
         return true;
     }
     return false;
diff --git a/ngraph/core/src/op/tan.cpp b/ngraph/core/src/op/tan.cpp
index d81d23db097..905519a1c2a 100644
--- a/ngraph/core/src/op/tan.cpp
+++ b/ngraph/core/src/op/tan.cpp
@@ -15,7 +15,7 @@
 using namespace std;
 using namespace ngraph;
 
-constexpr NodeTypeInfo op::Tan::type_info;
+NGRAPH_RTTI_DEFINITION(op::v0::Tan, "Tan", 0, util::UnaryElementwiseArithmetic);
 
 op::Tan::Tan(const Output<Node>& arg)
     : UnaryElementwiseArithmetic(arg)
diff --git a/ngraph/core/src/op/tanh.cpp b/ngraph/core/src/op/tanh.cpp
index c251f1c63a5..e392507475b 100644
--- a/ngraph/core/src/op/tanh.cpp
+++ b/ngraph/core/src/op/tanh.cpp
@@ -14,7 +14,7 @@
 using namespace std;
 using namespace ngraph;
 
-constexpr NodeTypeInfo op::Tanh::type_info;
+NGRAPH_RTTI_DEFINITION(op::v0::Tanh, "Tanh", 0, op::util::UnaryElementwiseArithmetic);
 
 op::Tanh::Tanh(const Output<Node>& arg)
     : UnaryElementwiseArithmetic(arg)
diff --git a/ngraph/core/src/op/tensor_iterator.cpp b/ngraph/core/src/op/tensor_iterator.cpp
index 0ae86f6052a..f48ec600829 100644
--- a/ngraph/core/src/op/tensor_iterator.cpp
+++ b/ngraph/core/src/op/tensor_iterator.cpp
@@ -298,8 +298,10 @@ std::shared_ptr<Node>
     op->m_num_iterations = m_num_iterations;
     auto func = std::make_shared<Function>(
         m_bodies[0]->get_results(), m_bodies[0]->get_sinks(), m_bodies[0]->get_parameters());
+    NGRAPH_SUPPRESS_DEPRECATED_START;
     auto spec_func =
         specialize_function(func, types, new_shapes, std::vector<void*>(new_args.size(), nullptr));
+    NGRAPH_SUPPRESS_DEPRECATED_END;
     op->m_bodies[0] = std::make_shared<Function>(
         spec_func->get_results(), spec_func->get_sinks(), spec_func->get_parameters());
 
diff --git a/ngraph/core/src/op/unsqueeze.cpp b/ngraph/core/src/op/unsqueeze.cpp
index a043d59a660..51dea7415d4 100644
--- a/ngraph/core/src/op/unsqueeze.cpp
+++ b/ngraph/core/src/op/unsqueeze.cpp
@@ -190,19 +190,7 @@ bool op::v0::Unsqueeze::constant_fold(OutputVector& output_values,
     if (auto data_const =
             std::dynamic_pointer_cast<op::Constant>(inputs_values[0].get_node_shared_ptr()))
     {
-        // In case if data constant has single consumer we can change it shape without making a copy
-        // Otherwise we create Constant copy with shape from unsqueeze node
-        if (data_const->output(0).get_target_inputs().size() == 1)
-        {
-            data_const->set_data_shape(shape);
-            data_const->validate_and_infer_types();
-            output_values[0] = data_const;
-        }
-        else
-        {
-            output_values[0] = std::make_shared<op::Constant>(
-                data_const->get_element_type(), shape, data_const->get_data_ptr());
-        }
+        output_values[0] = std::make_shared<op::Constant>(*data_const, shape);
         return true;
     }
     return false;
diff --git a/ngraph/core/src/op/util/binary_elementwise_comparison.cpp b/ngraph/core/src/op/util/binary_elementwise_comparison.cpp
index e8b878c3ed7..3fd5ee8d3bc 100644
--- a/ngraph/core/src/op/util/binary_elementwise_comparison.cpp
+++ b/ngraph/core/src/op/util/binary_elementwise_comparison.cpp
@@ -10,6 +10,8 @@
 using namespace std;
 using namespace ngraph;
 
+NGRAPH_RTTI_DEFINITION(op::util::BinaryElementwiseComparison, "BinaryElementwiseComparison", 0);
+
 op::util::BinaryElementwiseComparison::BinaryElementwiseComparison(const AutoBroadcastSpec& autob)
     : m_autob(autob)
 {
diff --git a/ngraph/core/src/op/util/max_pool_base.cpp b/ngraph/core/src/op/util/max_pool_base.cpp
new file mode 100644
index 00000000000..9ac14aa376c
--- /dev/null
+++ b/ngraph/core/src/op/util/max_pool_base.cpp
@@ -0,0 +1,157 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph/op/util/max_pool_base.hpp"
+#include "itt.hpp"
+#include "ngraph/shape.hpp"
+
+#include <ngraph/validation_util.hpp>
+
+using namespace std;
+using namespace ngraph;
+
+NGRAPH_RTTI_DEFINITION(op::util::MaxPoolBase, "MaxPoolBase", 8);
+
+op::util::MaxPoolBase::MaxPoolBase(const Output<Node>& arg,
+                                   const Strides& strides,
+                                   const Shape& pads_begin,
+                                   const Shape& pads_end,
+                                   const Shape& kernel,
+                                   const op::RoundingType rounding_type,
+                                   const op::PadType auto_pad)
+    : Op({arg})
+    , m_kernel(kernel)
+    , m_strides(strides)
+    , m_pads_begin(pads_begin)
+    , m_pads_end(pads_end)
+    , m_auto_pad(auto_pad)
+    , m_rounding_type(rounding_type)
+{
+    constructor_validate_and_infer_types();
+}
+
+void op::util::MaxPoolBase::validate_and_infer_types()
+{
+    NGRAPH_OP_SCOPE(util_MaxPoolBase_validate_and_infer_types);
+
+    if (0 == m_strides.size())
+    {
+        m_strides = Strides(m_kernel.size(), 1);
+    }
+
+    if (0 == m_pads_begin.size())
+    {
+        m_pads_begin = Shape(m_kernel.size(), 0);
+    }
+
+    if (0 == m_pads_end.size())
+    {
+        m_pads_end = Shape(m_kernel.size(), 0);
+    }
+
+    const PartialShape& arg_shape = get_input_partial_shape(0);
+
+    NODE_VALIDATION_CHECK(this,
+                          arg_shape.rank().compatible(3) || arg_shape.rank().compatible(4) ||
+                              arg_shape.rank().compatible(5),
+                          "Expected a 3D, 4D or 5D tensor for the input. Got: ",
+                          arg_shape);
+
+    if (arg_shape.rank().is_static())
+    {
+        NODE_VALIDATION_CHECK(this,
+                              static_cast<int64_t>(m_pads_end.size()) ==
+                                  arg_shape.rank().get_max_length() - 2,
+                              "Expected pads_end size to be equal to input size - 2. Got: ",
+                              m_pads_end.size());
+
+        NODE_VALIDATION_CHECK(this,
+                              static_cast<int64_t>(m_pads_begin.size()) ==
+                                  arg_shape.rank().get_max_length() - 2,
+                              "Expected pads_begin size to be equal to input size - 2. Got: ",
+                              m_pads_begin.size());
+        NODE_VALIDATION_CHECK(this,
+                              static_cast<int64_t>(m_kernel.size()) ==
+                                  arg_shape.rank().get_max_length() - 2,
+                              "Expected kernel size to be equal to input size - 2. Got: ",
+                              m_kernel.size());
+        NODE_VALIDATION_CHECK(this,
+                              static_cast<int64_t>(m_strides.size()) ==
+                                  arg_shape.rank().get_max_length() - 2,
+                              "Expected strides size to be equal to input size - 2. Got: ",
+                              m_strides.size());
+    }
+}
+
+PartialShape op::util::MaxPoolBase::infer_output_shape(const Strides& dilations)
+{
+    NGRAPH_OP_SCOPE(util_MaxPoolBase_infer_output_shape);
+
+    const auto& arg_shape = get_input_partial_shape(0);
+
+    bool update_auto_padding_succeed = true;
+
+    if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER)
+    {
+        const auto filter_dilations = dilations.empty() ? Strides(m_kernel.size(), 1) : dilations;
+        update_auto_padding_succeed =
+            update_auto_padding(arg_shape, filter_dilations, m_pads_end, m_pads_begin);
+    }
+    if (m_auto_pad == PadType::VALID)
+    {
+        m_pads_end = Shape(m_pads_end.size(), 0);
+        m_pads_begin = Shape(m_pads_begin.size(), 0);
+    }
+
+    auto output_shape = PartialShape::dynamic();
+    if (update_auto_padding_succeed)
+    {
+        CoordinateDiff pads_begin(m_pads_begin.begin(), m_pads_begin.end());
+        CoordinateDiff pads_end(m_pads_end.begin(), m_pads_end.end());
+        output_shape = infer_batched_pooling_forward(this,
+                                                     get_input_partial_shape(0),
+                                                     pads_begin,
+                                                     pads_end,
+                                                     m_kernel,
+                                                     m_strides,
+                                                     true,
+                                                     m_rounding_type == op::RoundingType::CEIL,
+                                                     dilations);
+    }
+    else
+    {
+        if (arg_shape.rank().is_static())
+        {
+            output_shape =
+                std::vector<Dimension>(arg_shape.rank().get_max_length(), Dimension::dynamic());
+            if (arg_shape[0].is_static())
+            {
+                output_shape[0] = arg_shape[0]; // batch size
+            }
+            if (arg_shape[1].is_static())
+            {
+                output_shape[1] = arg_shape[1]; // channel size
+            }
+        }
+    }
+
+    return output_shape;
+}
+
+bool op::util::MaxPoolBase::update_auto_padding(const PartialShape& in_shape,
+                                                const Strides& filter_dilations,
+                                                Shape& new_pads_end,
+                                                Shape& new_pads_begin) const
+{
+    bool update_auto_padding_succeed = true;
+    if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER)
+    {
+        CoordinateDiff pads_end, pads_begin;
+        update_auto_padding_succeed = try_apply_auto_padding(
+            in_shape, m_kernel, m_strides, filter_dilations, m_auto_pad, pads_end, pads_begin);
+        new_pads_end = Shape(pads_end.begin(), pads_end.end());
+        new_pads_begin = Shape(pads_begin.begin(), pads_begin.end());
+    }
+    return update_auto_padding_succeed;
+}
diff --git a/ngraph/core/src/op/util/nms_base.cpp b/ngraph/core/src/op/util/nms_base.cpp
index 4fce4c46fc4..7a9b4f3d35c 100644
--- a/ngraph/core/src/op/util/nms_base.cpp
+++ b/ngraph/core/src/op/util/nms_base.cpp
@@ -163,7 +163,7 @@ void op::util::NmsBase::validate_and_infer_types()
 namespace ngraph
 {
     template <>
-    EnumNames<op::util::NmsBase::SortResultType>&
+    NGRAPH_API EnumNames<op::util::NmsBase::SortResultType>&
         EnumNames<op::util::NmsBase::SortResultType>::get()
     {
         static auto enum_names = EnumNames<op::util::NmsBase::SortResultType>(
diff --git a/ngraph/core/src/partial_shape.cpp b/ngraph/core/src/partial_shape.cpp
index e02425c4daa..c5222863a1b 100644
--- a/ngraph/core/src/partial_shape.cpp
+++ b/ngraph/core/src/partial_shape.cpp
@@ -34,15 +34,15 @@ PartialShape::PartialShape(const Shape& shape)
 {
 }
 
-PartialShape::PartialShape(bool rank_is_static, const std::vector<Dimension>& dimensions)
+PartialShape::PartialShape(bool rank_is_static, std::vector<Dimension> dimensions)
     : m_rank_is_static(rank_is_static)
-    , m_dimensions(dimensions)
+    , m_dimensions(std::move(dimensions))
 {
 }
 
-PartialShape::PartialShape(const std::vector<Dimension>& dimensions)
+PartialShape::PartialShape(std::vector<Dimension> dimensions)
     : m_rank_is_static(true)
-    , m_dimensions(dimensions)
+    , m_dimensions(std::move(dimensions))
 {
 }
 
@@ -387,7 +387,7 @@ bool PartialShape::broadcast_merge_into(PartialShape& dst,
                     i < (new_rank - src_rank) ? Dimension(1) : src[i - (new_rank - src_rank)];
                 success &= Dimension::broadcast_merge(dims[i], dsti, srci);
             }
-            dst = PartialShape(dims);
+            dst = PartialShape(std::move(dims));
             return success;
         }
     }
diff --git a/ngraph/core/src/pass/constant_folding.cpp b/ngraph/core/src/pass/constant_folding.cpp
index 824df2ac26b..edb7f7980f6 100644
--- a/ngraph/core/src/pass/constant_folding.cpp
+++ b/ngraph/core/src/pass/constant_folding.cpp
@@ -6,6 +6,7 @@
 #include <ngraph/op/constant.hpp>
 #include "ngraph/op/util/sub_graph_base.hpp"
 #include "ngraph/rt_info.hpp"
+#include "ngraph/validation_util.hpp"
 
 using namespace std;
 using namespace ngraph;
@@ -101,7 +102,23 @@ bool ngraph::pass::ConstantFolding::pre_calculated_values_folding(
 
         for (auto& input_value : curr_node->input_values())
         {
-            if (input_value.get_tensor().has_and_set_bound())
+            // Check that ConstantFolding is not disabled on this path
+            std::vector<Node*> order;
+            auto status = could_propagate(input_value, order);
+            if (status)
+            {
+                for (const auto& node : order)
+                {
+                    const auto& rt_info = node->get_rt_info();
+                    if (rt_info.count("DISABLED_CONSTANT_FOLDING"))
+                    {
+                        status = false;
+                        break;
+                    }
+                }
+            }
+
+            if (status && input_value.get_tensor().has_and_set_bound())
             {
                 auto input_node = input_value.get_node_shared_ptr();
                 auto replacement =
diff --git a/ngraph/core/src/pass/graph_rewrite.cpp b/ngraph/core/src/pass/graph_rewrite.cpp
index 75e6b7ca9ae..048101e5a62 100644
--- a/ngraph/core/src/pass/graph_rewrite.cpp
+++ b/ngraph/core/src/pass/graph_rewrite.cpp
@@ -76,7 +76,7 @@ namespace ngraph
 bool pass::BackwardGraphRewrite::run_on_function(std::shared_ptr<ngraph::Function> f)
 {
     // Initialize execution queue with nodes in topological order
-    deque<std::shared_ptr<Node>> nodes_to_run;
+    deque<std::weak_ptr<Node>> nodes_to_run;
     for (auto& node : f->get_ordered_ops())
     {
         nodes_to_run.emplace_front(node);
@@ -87,7 +87,7 @@ bool pass::BackwardGraphRewrite::run_on_function(std::shared_ptr<ngraph::Functio
 bool pass::GraphRewrite::run_on_function(std::shared_ptr<ngraph::Function> f)
 {
     // Initialize execution queue with nodes in topological order
-    deque<std::shared_ptr<Node>> nodes_to_run;
+    deque<std::weak_ptr<Node>> nodes_to_run;
     for (auto& node : f->get_ordered_ops())
     {
         nodes_to_run.emplace_back(node);
@@ -96,7 +96,7 @@ bool pass::GraphRewrite::run_on_function(std::shared_ptr<ngraph::Function> f)
 }
 
 bool pass::GraphRewrite::apply_matcher_passes(shared_ptr<Function> f,
-                                              deque<std::shared_ptr<Node>> nodes_to_run)
+                                              deque<std::weak_ptr<Node>> nodes_to_run)
 {
     OV_ITT_SCOPED_TASK(itt::domains::nGraph, "pass::GraphRewrite::run_on_function");
 
@@ -196,8 +196,13 @@ bool pass::GraphRewrite::apply_matcher_passes(shared_ptr<Function> f,
 
     while (!nodes_to_run.empty())
     {
-        auto node = nodes_to_run.front();
+        auto weak_node = nodes_to_run.front();
         nodes_to_run.pop_front();
+
+        auto node = weak_node.lock();
+        if (!node)
+            continue;
+
         // Recursive apply Matchers for sub-graph based nodes
         if (auto sub_graph_node = std::dynamic_pointer_cast<op::util::SubGraphOp>(node))
         {
diff --git a/ngraph/core/src/rt_info.cpp b/ngraph/core/src/rt_info.cpp
index c444be5d531..1409162d7e6 100644
--- a/ngraph/core/src/rt_info.cpp
+++ b/ngraph/core/src/rt_info.cpp
@@ -8,46 +8,47 @@
 
 ngraph::Node::RTMap mergeRuntimeInfo(const ngraph::NodeVector& nodes)
 {
-    ngraph::Node::RTMap mergedInfo;
-    for (auto& node : nodes)
+    std::unordered_map<std::string, std::vector<std::shared_ptr<ngraph::Variant>>> attrs;
+    for (const auto& node : nodes)
     {
-        for (auto& item : node->get_rt_info())
+        for (const auto& item : node->get_rt_info())
         {
-            mergedInfo[item.first] = item.second;
-        }
-    }
-
-    ngraph::Node::RTMap newInfo;
-    for (auto& item : mergedInfo)
-    {
-        size_t attributes_count = 0;
-        for (auto& node : nodes)
-        {
-            const auto& rt_info = node->get_rt_info();
-            if (rt_info.count(item.first))
+            if (item.second->is_copyable())
             {
-                attributes_count++;
+                attrs[item.first].push_back(item.second);
             }
         }
+    }
 
-        if (attributes_count == 1)
+    ngraph::Node::RTMap merged_attrs;
+    for (auto& item : attrs)
+    {
+        auto attr = *item.second.begin();
+        if (item.second.size() == 1)
         {
-            newInfo[item.first] = item.second;
+            merged_attrs[item.first] = attr;
         }
-        else if (auto merge_attr = item.second->merge(nodes))
+        else if (auto merge_attr = attr->merge(nodes))
         {
-            newInfo[item.first] = merge_attr;
+            merged_attrs[item.first] = merge_attr;
         }
     }
 
-    return newInfo;
+    return merged_attrs;
 }
 
 void ngraph::copy_runtime_info(std::shared_ptr<ngraph::Node> from, std::shared_ptr<ngraph::Node> to)
 {
-    auto& rtInfoFrom = from->get_rt_info();
-    auto& rtInfoTo = to->get_rt_info();
-    rtInfoTo = rtInfoFrom;
+    auto& attrs = to->get_rt_info();
+    attrs.clear();
+
+    for (const auto& item : from->get_rt_info())
+    {
+        if (item.second->is_copyable())
+        {
+            attrs[item.first] = item.second;
+        }
+    }
 }
 
 void ngraph::copy_runtime_info(std::shared_ptr<ngraph::Node> from, ngraph::NodeVector to)
diff --git a/ngraph/core/src/runtime/host_tensor.cpp b/ngraph/core/src/runtime/host_tensor.cpp
index f1440640dd5..7974967122e 100644
--- a/ngraph/core/src/runtime/host_tensor.cpp
+++ b/ngraph/core/src/runtime/host_tensor.cpp
@@ -16,9 +16,8 @@ static const size_t alignment = 64;
 
 runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
                                 const Shape& shape,
-                                void* memory_pointer,
-                                const string& name)
-    : runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, name))
+                                void* memory_pointer)
+    : runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, ""))
     , m_memory_pointer(memory_pointer)
 {
     if (get_partial_shape().is_static() && get_element_type().is_static())
@@ -31,31 +30,27 @@ runtime::HostTensor::HostTensor(const ngraph::element::Type& element_type,
     }
 }
 
-runtime::HostTensor::HostTensor(const element::Type& element_type,
-                                const Shape& shape,
-                                const std::string& name)
-    : HostTensor(element_type, shape, nullptr, name)
+runtime::HostTensor::HostTensor(const element::Type& element_type, const Shape& shape)
+    : HostTensor(element_type, shape, nullptr)
 {
 }
 
 runtime::HostTensor::HostTensor(const element::Type& element_type,
-                                const PartialShape& partial_shape,
-                                const std::string& name)
-    : runtime::Tensor(
-          std::make_shared<ngraph::descriptor::Tensor>(element_type, partial_shape, name))
+                                const PartialShape& partial_shape)
+    : runtime::Tensor(std::make_shared<ngraph::descriptor::Tensor>(element_type, partial_shape, ""))
     , m_buffer_size(0)
 {
     // Defer allocation until ptr is requested
 }
 
-runtime::HostTensor::HostTensor(const std::string& name)
+runtime::HostTensor::HostTensor()
     : HostTensor(element::dynamic, PartialShape::dynamic())
 {
 }
 
 NGRAPH_SUPPRESS_DEPRECATED_START
 runtime::HostTensor::HostTensor(const Output<Node>& value)
-    : HostTensor(value.get_element_type(), value.get_partial_shape(), value.get_tensor().get_name())
+    : HostTensor(value.get_element_type(), value.get_partial_shape())
 {
 }
 NGRAPH_SUPPRESS_DEPRECATED_END
@@ -93,7 +88,7 @@ void runtime::HostTensor::allocate_buffer()
 
 NGRAPH_SUPPRESS_DEPRECATED_START
 runtime::HostTensor::HostTensor(const std::shared_ptr<op::v0::Constant>& constant)
-    : HostTensor(constant->output(0).get_tensor().get_name())
+    : HostTensor()
 {
     initialize(constant);
 }
diff --git a/ngraph/core/src/runtime/tensor.cpp b/ngraph/core/src/runtime/tensor.cpp
index 55f8b3f0c3e..a963d7709c8 100644
--- a/ngraph/core/src/runtime/tensor.cpp
+++ b/ngraph/core/src/runtime/tensor.cpp
@@ -41,13 +41,3 @@ const std::string& runtime::Tensor::get_name() const
     return m_descriptor->get_name();
     NGRAPH_SUPPRESS_DEPRECATED_END
 }
-
-bool runtime::Tensor::get_stale() const
-{
-    return m_stale;
-}
-
-void runtime::Tensor::set_stale(bool val)
-{
-    m_stale = val;
-}
diff --git a/ngraph/core/src/specialize_function.cpp b/ngraph/core/src/specialize_function.cpp
index e63c6ebe692..2a33dd7c777 100644
--- a/ngraph/core/src/specialize_function.cpp
+++ b/ngraph/core/src/specialize_function.cpp
@@ -9,6 +9,7 @@
 #include "ngraph/op/util/op_types.hpp"
 
 using namespace ngraph;
+NGRAPH_SUPPRESS_DEPRECATED_START;
 
 std::shared_ptr<Function>
     ngraph::specialize_function(std::shared_ptr<Function> f,
diff --git a/ngraph/core/src/type/element_type.cpp b/ngraph/core/src/type/element_type.cpp
index 8d688fbf995..fd91450cbc7 100644
--- a/ngraph/core/src/type/element_type.cpp
+++ b/ngraph/core/src/type/element_type.cpp
@@ -12,45 +12,47 @@
 #include "ngraph/type/element_type_traits.hpp"
 
 using namespace ngraph;
-using namespace std;
 
 constexpr DiscreteTypeInfo AttributeAdapter<element::Type>::type_info;
-
-class TypeInfo
+namespace
 {
-public:
-    TypeInfo(size_t bitwidth,
-             bool is_real,
-             bool is_signed,
-             bool is_quantized,
-             const std::string& cname,
-             const std::string& type_name)
-        : m_bitwidth{bitwidth}
-        , m_is_real{is_real}
-        , m_is_signed{is_signed}
-        , m_is_quantized{is_quantized}
-        , m_cname{cname}
-        , m_type_name{type_name}
+    class TypeInfo
     {
-    }
-    size_t m_bitwidth;
-    bool m_is_real;
-    bool m_is_signed;
-    bool m_is_quantized;
-    std::string m_cname;
-    std::string m_type_name;
-};
+    public:
+        TypeInfo(size_t bitwidth,
+                 bool is_real,
+                 bool is_signed,
+                 bool is_quantized,
+                 const std::string& cname,
+                 const std::string& type_name)
+            : m_bitwidth{bitwidth}
+            , m_is_real{is_real}
+            , m_is_signed{is_signed}
+            , m_is_quantized{is_quantized}
+            , m_cname{cname}
+            , m_type_name{type_name}
+        {
+        }
+        size_t m_bitwidth;
+        bool m_is_real;
+        bool m_is_signed;
+        bool m_is_quantized;
+        std::string m_cname;
+        std::string m_type_name;
+    };
 
-struct element_type_hash
-{
-    size_t operator()(element::Type_t t) const { return static_cast<size_t>(t); }
-};
+    struct ElementTypes
+    {
+        struct TypeHash
+        {
+            size_t operator()(element::Type_t t) const { return static_cast<size_t>(t); }
+        };
 
-typedef unordered_map<element::Type_t, const TypeInfo, element_type_hash> element_types_map_t;
+        using ElementsMap = std::unordered_map<element::Type_t, TypeInfo, TypeHash>;
+        static const ElementsMap elements_map;
+    };
 
-static const element_types_map_t& get_type_info_map()
-{
-    static element_types_map_t s_type_info_map{
+    const ElementTypes::ElementsMap ElementTypes::elements_map{
         {element::Type_t::undefined,
          TypeInfo(
              std::numeric_limits<size_t>::max(), false, false, false, "undefined", "undefined")},
@@ -72,8 +74,20 @@ static const element_types_map_t& get_type_info_map()
         {element::Type_t::u32, TypeInfo(32, false, false, false, "uint32_t", "u32")},
         {element::Type_t::u64, TypeInfo(64, false, false, false, "uint64_t", "u64")},
     };
-    return s_type_info_map;
-};
+
+    const ElementTypes::ElementsMap& get_type_info_map() { return ElementTypes::elements_map; };
+
+    const TypeInfo& get_type_info(element::Type_t type)
+    {
+        const auto& tim = get_type_info_map();
+        const auto& found = tim.find(type);
+        if (found == tim.end())
+        {
+            throw std::out_of_range{"element::Type_t not supported"};
+        }
+        return found->second;
+    };
+} // namespace
 
 std::vector<const element::Type*> element::Type::get_known_types()
 {
@@ -103,7 +117,7 @@ element::Type::Type(size_t bitwidth,
                     bool is_quantized,
                     const std::string& /* cname */)
 {
-    for (auto& t : get_type_info_map())
+    for (const auto& t : get_type_info_map())
     {
         const TypeInfo& info = t.second;
         if (bitwidth == info.m_bitwidth && is_real == info.m_is_real &&
@@ -117,7 +131,7 @@ element::Type::Type(size_t bitwidth,
 
 const std::string& element::Type::c_type_string() const
 {
-    return get_type_info_map().at(m_type).m_cname;
+    return get_type_info(m_type).m_cname;
 }
 
 size_t element::Type::size() const
@@ -132,7 +146,7 @@ size_t element::Type::hash() const
 
 const std::string& element::Type::get_type_name() const
 {
-    return get_type_info_map().at(m_type).m_type_name;
+    return get_type_info(m_type).m_type_name;
 }
 
 namespace ngraph
@@ -247,12 +261,12 @@ bool element::Type::merge(element::Type& dst, const element::Type& t1, const ele
 
 bool element::Type::is_static() const
 {
-    return get_type_info_map().at(m_type).m_bitwidth != 0;
+    return get_type_info(m_type).m_bitwidth != 0;
 }
 
 bool element::Type::is_real() const
 {
-    return get_type_info_map().at(m_type).m_is_real;
+    return get_type_info(m_type).m_is_real;
 }
 
 bool element::Type::is_integral_number() const
@@ -262,17 +276,17 @@ bool element::Type::is_integral_number() const
 
 bool element::Type::is_signed() const
 {
-    return get_type_info_map().at(m_type).m_is_signed;
+    return get_type_info(m_type).m_is_signed;
 }
 
 bool element::Type::is_quantized() const
 {
-    return get_type_info_map().at(m_type).m_is_quantized;
+    return get_type_info(m_type).m_is_quantized;
 }
 
 size_t element::Type::bitwidth() const
 {
-    return get_type_info_map().at(m_type).m_bitwidth;
+    return get_type_info(m_type).m_bitwidth;
 }
 
 size_t ngraph::compiler_byte_size(element::Type_t et)
diff --git a/ngraph/core/src/validation_util.cpp b/ngraph/core/src/validation_util.cpp
index 1671892879a..ff7f3d75ccc 100644
--- a/ngraph/core/src/validation_util.cpp
+++ b/ngraph/core/src/validation_util.cpp
@@ -479,7 +479,8 @@ PartialShape ngraph::infer_batched_pooling_forward(const Node* node,
                                                    const PartialShape& window_shape,
                                                    const Strides& window_strides,
                                                    bool is_window_all_in_padding_allowed,
-                                                   bool ceil_mode)
+                                                   bool ceil_mode,
+                                                   const Strides& window_dilation)
 {
     NODE_VALIDATION_CHECK(node,
                           data_batch_shape.rank().is_dynamic() ||
@@ -536,7 +537,14 @@ PartialShape ngraph::infer_batched_pooling_forward(const Node* node,
 
         // For pooling ops we don't need dilation, so we fill in the identity value (all 1).
         Strides data_dilation(data_spatial_shape.rank().get_length(), 1);
-        Strides window_dilation(data_spatial_shape.rank().get_length(), 1);
+        Strides dilations = window_dilation;
+        // if the window_dilation was not specified, generate the default value (no dilations)
+        if (window_dilation.empty())
+        {
+            // dilations equal to 1 for each spatial axis mean that the window is not dilated
+            dilations = Strides(data_spatial_shape.rank().get_length(), 1);
+        }
+
         data_output_spatial_shape =
             infer_windowed_reduction_output_shape(node,
                                                   data_spatial_shape,
@@ -545,7 +553,7 @@ PartialShape ngraph::infer_batched_pooling_forward(const Node* node,
                                                   data_padding_above,
                                                   window_shape,
                                                   window_strides,
-                                                  window_dilation,
+                                                  dilations,
                                                   is_window_all_in_padding_allowed,
                                                   ceil_mode);
     }
@@ -1298,7 +1306,7 @@ void ngraph::evaluate_nodes(std::map<RawNodeOutput, HostTensorPtr>& value_map,
     }
 }
 
-bool could_propagate(const Output<Node>& output, std::vector<Node*>& order)
+bool ngraph::could_propagate(const Output<Node>& output, std::vector<Node*>& order)
 {
     bool status = true;
 
@@ -1359,7 +1367,7 @@ void propagate_rt_info(Node* node, const Output<Node>& final_port)
                 auto& rt_info = consumer->get_rt_info();
                 for (const auto& it : orig_rt_info)
                 {
-                    if (rt_info.find(it.first) == rt_info.end())
+                    if (rt_info.find(it.first) == rt_info.end() && it.second->is_copyable())
                         rt_info[it.first] = it.second;
                 }
             }
diff --git a/ngraph/core/src/variant.cpp b/ngraph/core/src/variant.cpp
index a4b780c41e8..43500cb555a 100644
--- a/ngraph/core/src/variant.cpp
+++ b/ngraph/core/src/variant.cpp
@@ -22,5 +22,10 @@ std::shared_ptr<ngraph::Variant> Variant::merge(const ngraph::NodeVector& nodes)
     return nullptr;
 }
 
+bool Variant::is_copyable() const
+{
+    return true;
+}
+
 template class ngraph::VariantImpl<std::string>;
 template class ngraph::VariantImpl<int64_t>;
diff --git a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend.hpp b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend.hpp
index 34456d4df7f..8af4b93464e 100644
--- a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend.hpp
+++ b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend.hpp
@@ -61,9 +61,7 @@ namespace ngraph
 
             /// \brief Completely convert the remaining, not converted part of a function.
             /// \param partiallyConverted partially converted nGraph function
-            /// \return fully converted nGraph function
-            virtual std::shared_ptr<ngraph::Function>
-                convert(std::shared_ptr<ngraph::Function> partially_converted) const;
+            virtual void convert(std::shared_ptr<ngraph::Function> partially_converted) const;
 
             /// \brief Convert only those parts of the model that can be converted leaving others
             /// as-is. Converted parts are not normalized by additional transformations; normalize
diff --git a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp
index 2b92a6386b5..e917c89c83a 100644
--- a/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp
+++ b/ngraph/frontend/frontend_manager/include/frontend_manager/frontend_manager.hpp
@@ -96,11 +96,10 @@ namespace ngraph
     } // namespace frontend
 
     template <>
-    class FRONTEND_API VariantWrapper<std::shared_ptr<std::istream>>
-        : public VariantImpl<std::shared_ptr<std::istream>>
+    class FRONTEND_API VariantWrapper<std::istream*> : public VariantImpl<std::istream*>
     {
     public:
-        static constexpr VariantTypeInfo type_info{"Variant::std::shared_ptr<std::istream>", 0};
+        static constexpr VariantTypeInfo type_info{"Variant::std::istream*", 0};
         const VariantTypeInfo& get_type_info() const override { return type_info; }
         VariantWrapper(const value_type& value)
             : VariantImpl<value_type>(value)
diff --git a/ngraph/frontend/frontend_manager/include/frontend_manager/input_model.hpp b/ngraph/frontend/frontend_manager/include/frontend_manager/input_model.hpp
index 6ec3f6060fc..4036532659e 100644
--- a/ngraph/frontend/frontend_manager/include/frontend_manager/input_model.hpp
+++ b/ngraph/frontend/frontend_manager/include/frontend_manager/input_model.hpp
@@ -69,13 +69,14 @@ namespace ngraph
             /// \brief Returns a tensor place by a tensor name following framework conventions, or
             /// nullptr if a tensor with this name doesn't exist.
             /// \param tensor_name Name of tensor
-            /// \return Tensor place corresponding to specifed tensor name
+            /// \return Tensor place corresponding to specified tensor name or nullptr if not exists
             virtual Place::Ptr get_place_by_tensor_name(const std::string& tensor_name) const;
 
             /// \brief Returns an operation place by an operation name following framework
-            /// conventions, or nullptr if an operation with this name doesn't exist. \param
-            /// operation_name Name of operation \return Place representing operation
-            virtual Place::Ptr get_place_by_operation_name(const std::string& operation_name);
+            /// conventions, or nullptr if an operation with this name doesn't exist.
+            /// \param operation_name Name of operation
+            /// \return Place representing operation or nullptr if not exists
+            virtual Place::Ptr get_place_by_operation_name(const std::string& operation_name) const;
 
             /// \brief Returns an input port place by operation name and appropriate port index
             /// \param operation_name Name of operation
@@ -88,7 +89,7 @@ namespace ngraph
             /// \brief Returns an output port place by operation name and appropriate port index
             /// \param operation_name Name of operation
             /// \param output_port_index Index of output port for this operation
-            /// \return Place representing output port of operation
+            /// \return Place representing output port of operation or nullptr if not exists
             virtual Place::Ptr
                 get_place_by_operation_name_and_output_port(const std::string& operation_name,
                                                             int output_port_index);
diff --git a/ngraph/frontend/frontend_manager/include/frontend_manager/place.hpp b/ngraph/frontend/frontend_manager/include/frontend_manager/place.hpp
index 5ffb80664c9..c03692b5220 100644
--- a/ngraph/frontend/frontend_manager/include/frontend_manager/place.hpp
+++ b/ngraph/frontend/frontend_manager/include/frontend_manager/place.hpp
@@ -232,14 +232,14 @@ namespace ngraph
             /// \brief For operation node returns reference to an input port; applicable if
             /// operation node has only one input port
             ///
-            /// \return Input port place
+            /// \return Input port place or nullptr if not exists
             virtual Ptr get_input_port() const;
 
             /// \brief For operation node returns reference to an input port with specified index
             ///
             /// \param input_port_index Input port index
             ///
-            /// \return Appropriate input port place
+            /// \return Appropriate input port place or nullptr if not exists
             virtual Ptr get_input_port(int input_port_index) const;
 
             /// \brief For operation node returns reference to an input port with specified name;
@@ -247,7 +247,7 @@ namespace ngraph
             ///
             /// \param input_name Name of port group
             ///
-            /// \return Appropriate input port place
+            /// \return Appropriate input port place or nullptr if not exists
             virtual Ptr get_input_port(const std::string& input_name) const;
 
             /// \brief For operation node returns reference to an input port with specified name and
@@ -257,20 +257,20 @@ namespace ngraph
             ///
             /// \param input_port_index Input port index in a group
             ///
-            /// \return Appropriate input port place
+            /// \return Appropriate input port place or nullptr if not exists
             virtual Ptr get_input_port(const std::string& input_name, int input_port_index) const;
 
             /// \brief For operation node returns reference to an output port; applicable for
             /// operations with only one output port
             ///
-            /// \return Appropriate output port place
+            /// \return Appropriate output port place or nullptr if not exists
             virtual Ptr get_output_port() const;
 
             /// \brief For operation node returns reference to an output port with specified index
             ///
             /// \param output_port_index Output port index
             ///
-            /// \return Appropriate output port place
+            /// \return Appropriate output port place or nullptr if not exists
             virtual Ptr get_output_port(int output_port_index) const;
 
             /// \brief For operation node returns reference to an output port with specified name;
@@ -278,7 +278,7 @@ namespace ngraph
             ///
             /// \param output_name Name of output port group
             ///
-            /// \return Appropriate output port place
+            /// \return Appropriate output port place or nullptr if not exists
             virtual Ptr get_output_port(const std::string& output_name) const;
 
             /// \brief For operation node returns reference to an output port with specified name
@@ -288,7 +288,7 @@ namespace ngraph
             ///
             /// \param output_port_index Output port index
             ///
-            /// \return Appropriate output port place
+            /// \return Appropriate output port place or nullptr if not exists
             virtual Ptr get_output_port(const std::string& output_name,
                                         int output_port_index) const;
 
diff --git a/ngraph/frontend/frontend_manager/src/frontend_manager.cpp b/ngraph/frontend/frontend_manager/src/frontend_manager.cpp
index 0aea5dc7987..05151934192 100644
--- a/ngraph/frontend/frontend_manager/src/frontend_manager.cpp
+++ b/ngraph/frontend/frontend_manager/src/frontend_manager.cpp
@@ -147,7 +147,7 @@ std::shared_ptr<ngraph::Function> FrontEnd::convert(InputModel::Ptr model) const
     FRONT_END_NOT_IMPLEMENTED(convert);
 }
 
-std::shared_ptr<ngraph::Function> FrontEnd::convert(std::shared_ptr<ngraph::Function>) const
+void FrontEnd::convert(std::shared_ptr<ngraph::Function>) const
 {
     FRONT_END_NOT_IMPLEMENTED(convert);
 }
@@ -183,7 +183,7 @@ Place::Ptr InputModel::get_place_by_tensor_name(const std::string& tensor_name)
     return nullptr;
 }
 
-Place::Ptr InputModel::get_place_by_operation_name(const std::string& operation_name)
+Place::Ptr InputModel::get_place_by_operation_name(const std::string& operation_name) const
 {
     return nullptr;
 }
@@ -277,7 +277,7 @@ void InputModel::set_partial_shape(Place::Ptr place, const ngraph::PartialShape&
 
 ngraph::PartialShape InputModel::get_partial_shape(Place::Ptr place) const
 {
-    FRONT_END_NOT_IMPLEMENTED(set_partial_shape);
+    FRONT_END_NOT_IMPLEMENTED(get_partial_shape);
 }
 
 void InputModel::set_element_type(Place::Ptr place, const ngraph::element::Type&)
@@ -454,7 +454,7 @@ std::vector<Place::Ptr> Place::get_consuming_operations(const std::string& outpu
     return {};
 }
 
-constexpr VariantTypeInfo VariantWrapper<std::shared_ptr<std::istream>>::type_info;
+constexpr VariantTypeInfo VariantWrapper<std::istream*>::type_info;
 
 #if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
 constexpr VariantTypeInfo VariantWrapper<std::wstring>::type_info;
diff --git a/ngraph/frontend/onnx/frontend/include/onnx_frontend/frontend.hpp b/ngraph/frontend/onnx/frontend/include/onnx_frontend/frontend.hpp
index fdc004365d6..20dbde0922e 100644
--- a/ngraph/frontend/onnx/frontend/include/onnx_frontend/frontend.hpp
+++ b/ngraph/frontend/onnx/frontend/include/onnx_frontend/frontend.hpp
@@ -20,8 +20,7 @@ namespace ngraph
         {
         public:
             std::shared_ptr<ngraph::Function> convert(InputModel::Ptr model) const override;
-            std::shared_ptr<ngraph::Function>
-                convert(std::shared_ptr<ngraph::Function> partially_converted) const override;
+            void convert(std::shared_ptr<ngraph::Function> partially_converted) const override;
             std::shared_ptr<ngraph::Function> decode(InputModel::Ptr model) const override;
 
         protected:
diff --git a/ngraph/frontend/onnx/frontend/src/frontend.cpp b/ngraph/frontend/onnx/frontend/src/frontend.cpp
index 3caa85db68c..cad2fa337ce 100644
--- a/ngraph/frontend/onnx/frontend/src/frontend.cpp
+++ b/ngraph/frontend/onnx/frontend/src/frontend.cpp
@@ -42,10 +42,9 @@ std::shared_ptr<ngraph::Function> FrontEndONNX::convert(InputModel::Ptr model) c
     return model_onnx->convert();
 }
 
-std::shared_ptr<ngraph::Function>
-    FrontEndONNX::convert(std::shared_ptr<ngraph::Function> partially_converted) const
+void FrontEndONNX::convert(std::shared_ptr<ngraph::Function> partially_converted) const
 {
-    return onnx_import::convert_decoded_function(partially_converted);
+    onnx_import::convert_decoded_function(partially_converted);
 }
 
 std::shared_ptr<ngraph::Function> FrontEndONNX::decode(InputModel::Ptr model) const
diff --git a/ngraph/frontend/onnx/frontend/src/input_model.cpp b/ngraph/frontend/onnx/frontend/src/input_model.cpp
index 58ebe098f84..396f3b5ec05 100644
--- a/ngraph/frontend/onnx/frontend/src/input_model.cpp
+++ b/ngraph/frontend/onnx/frontend/src/input_model.cpp
@@ -2,55 +2,143 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "input_model.hpp"
 #include <frontend_manager/frontend_exceptions.hpp>
-#include <input_model.hpp>
-#include <place.hpp>
+#include "place.hpp"
 
 using namespace ngraph;
 using namespace ngraph::frontend;
 
 InputModelONNX::InputModelONNX(const std::string& path)
-    : m_editor(path)
+    : m_editor{std::make_shared<onnx_editor::ONNXModelEditor>(path)}
 {
 }
 
 std::vector<Place::Ptr> InputModelONNX::get_inputs() const
 {
-    auto inputs = m_editor.model_inputs();
-    std::vector<Place::Ptr> ret;
-    ret.reserve(inputs.size());
+    const auto& inputs = m_editor->model_inputs();
+    std::vector<Place::Ptr> in_places;
+    in_places.reserve(inputs.size());
     for (const auto& input : inputs)
     {
-        ret.push_back(std::make_shared<PlaceTensorONNX>(input, m_editor));
+        in_places.push_back(std::make_shared<PlaceTensorONNX>(input, m_editor));
     }
-    return ret;
+    return in_places;
+}
+
+std::vector<Place::Ptr> InputModelONNX::get_outputs() const
+{
+    const auto& outputs = m_editor->model_outputs();
+    std::vector<Place::Ptr> out_places;
+    out_places.reserve(outputs.size());
+    for (const auto& output : outputs)
+    {
+        out_places.push_back(std::make_shared<PlaceTensorONNX>(output, m_editor));
+    }
+    return out_places;
 }
 
 Place::Ptr InputModelONNX::get_place_by_tensor_name(const std::string& tensor_name) const
 {
+    NGRAPH_CHECK(m_editor->is_correct_tensor_name(tensor_name),
+                 "The tensor with name: " + tensor_name + " does not exist in the graph");
     return std::make_shared<PlaceTensorONNX>(tensor_name, m_editor);
 }
 
+Place::Ptr
+    InputModelONNX::get_place_by_operation_name_and_input_port(const std::string& operation_name,
+                                                               int input_port_index)
+{
+    const auto edge =
+        m_editor->find_input_edge(onnx_editor::EditorNode(operation_name), input_port_index);
+    return std::make_shared<PlaceInputEdgeONNX>(edge, m_editor);
+}
+
 void InputModelONNX::set_partial_shape(Place::Ptr place, const ngraph::PartialShape& shape)
 {
     std::map<std::string, ngraph::PartialShape> m;
     m[place->get_names()[0]] = shape;
-    m_editor.set_input_shapes(m);
+    m_editor->set_input_shapes(m);
+}
+
+ngraph::PartialShape InputModelONNX::get_partial_shape(Place::Ptr place) const
+{
+    return m_editor->get_tensor_shape(place->get_names().at(0));
 }
 
 void InputModelONNX::set_element_type(Place::Ptr place, const ngraph::element::Type& type)
 {
     std::map<std::string, ngraph::element::Type_t> m;
     m[place->get_names()[0]] = type;
-    m_editor.set_input_types(m);
+    m_editor->set_input_types(m);
 }
 
 std::shared_ptr<Function> InputModelONNX::decode()
 {
-    return m_editor.decode();
+    return m_editor->decode();
 }
 
 std::shared_ptr<Function> InputModelONNX::convert()
 {
-    return m_editor.get_function();
+    return m_editor->get_function();
+}
+
+// Editor features
+void InputModelONNX::override_all_outputs(const std::vector<Place::Ptr>& outputs)
+{
+    extract_subgraph({}, outputs);
+    NGRAPH_CHECK(m_editor->model_outputs().size() == outputs.size(),
+                 "Unexpected number of outputs after override_all_outputs");
+    NGRAPH_CHECK(std::all_of(std::begin(outputs),
+                             std::end(outputs),
+                             [](const Place::Ptr& place) { return place->is_output(); }),
+                 "Not all provided arguments of override_all_outputs are new outputs of the model");
+}
+
+void InputModelONNX::override_all_inputs(const std::vector<Place::Ptr>& inputs)
+{
+    const auto outputs_before_extraction = m_editor->model_outputs();
+    extract_subgraph({inputs}, {});
+    NGRAPH_CHECK(std::equal(std::begin(outputs_before_extraction),
+                            std::end(outputs_before_extraction),
+                            std::begin(m_editor->model_outputs())),
+                 "All outputs should be preserved after override_all_inputs. Provided inputs does "
+                 "not satisfy all outputs");
+    NGRAPH_CHECK(m_editor->model_inputs().size() == inputs.size(),
+                 "Unexpected number of inputs after override_all_inputs");
+}
+
+void InputModelONNX::extract_subgraph(const std::vector<Place::Ptr>& inputs,
+                                      const std::vector<Place::Ptr>& outputs)
+{
+    std::vector<onnx_editor::InputEdge> onnx_inputs;
+    onnx_inputs.reserve(inputs.size());
+    for (const auto& input : inputs)
+    {
+        if (const auto input_port = std::dynamic_pointer_cast<PlaceInputEdgeONNX>(input))
+        {
+            onnx_inputs.push_back(input_port->get_input_edge());
+        }
+        else if (const auto tensor = std::dynamic_pointer_cast<PlaceTensorONNX>(input))
+        {
+            auto name = tensor->get_names()[0];
+            const auto consumers = m_editor->find_output_consumers(name);
+            std::transform(std::begin(consumers),
+                           std::end(consumers),
+                           std::back_inserter(onnx_inputs),
+                           [](const onnx_editor::InputEdge& edge) { return edge; });
+        }
+    }
+
+    std::vector<onnx_editor::OutputEdge> onnx_outputs;
+    onnx_outputs.reserve(outputs.size());
+    for (const auto& output : outputs)
+    {
+        const auto output_port = output->get_producing_port();
+        const auto onnx_output_edge = std::dynamic_pointer_cast<PlaceOutputEdgeONNX>(output_port);
+        NGRAPH_CHECK(onnx_output_edge,
+                     "Non-onnx output place was passed as extraction subgraph argument");
+        onnx_outputs.push_back(onnx_output_edge->get_output_edge());
+    }
+    m_editor->cut_graph_fragment(onnx_inputs, onnx_outputs);
 }
diff --git a/ngraph/frontend/onnx/frontend/src/input_model.hpp b/ngraph/frontend/onnx/frontend/src/input_model.hpp
index e1003e3c1bb..47d632e657a 100644
--- a/ngraph/frontend/onnx/frontend/src/input_model.hpp
+++ b/ngraph/frontend/onnx/frontend/src/input_model.hpp
@@ -17,15 +17,25 @@ namespace ngraph
             InputModelONNX(const std::string& path);
 
             std::vector<Place::Ptr> get_inputs() const override;
+            std::vector<Place::Ptr> get_outputs() const override;
             Place::Ptr get_place_by_tensor_name(const std::string& tensor_name) const override;
+            Place::Ptr get_place_by_operation_name_and_input_port(const std::string& operation_name,
+                                                                  int input_port_index) override;
             void set_partial_shape(Place::Ptr place, const ngraph::PartialShape& shape) override;
+            ngraph::PartialShape get_partial_shape(Place::Ptr place) const override;
             void set_element_type(Place::Ptr place, const ngraph::element::Type& type) override;
 
             std::shared_ptr<Function> decode();
             std::shared_ptr<Function> convert();
 
+            // Editor features
+            void override_all_outputs(const std::vector<Place::Ptr>& outputs) override;
+            void override_all_inputs(const std::vector<Place::Ptr>& inputs) override;
+            void extract_subgraph(const std::vector<Place::Ptr>& inputs,
+                                  const std::vector<Place::Ptr>& outputs) override;
+
         private:
-            onnx_editor::ONNXModelEditor m_editor;
+            std::shared_ptr<onnx_editor::ONNXModelEditor> m_editor;
         };
 
     } // namespace frontend
diff --git a/ngraph/frontend/onnx/frontend/src/place.cpp b/ngraph/frontend/onnx/frontend/src/place.cpp
new file mode 100644
index 00000000000..bae56645f5a
--- /dev/null
+++ b/ngraph/frontend/onnx/frontend/src/place.cpp
@@ -0,0 +1,134 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "place.hpp"
+#include <frontend_manager/frontend_exceptions.hpp>
+
+using namespace ngraph;
+using namespace ngraph::frontend;
+
+PlaceInputEdgeONNX::PlaceInputEdgeONNX(const onnx_editor::InputEdge& edge,
+                                       std::shared_ptr<onnx_editor::ONNXModelEditor> editor)
+    : m_edge{edge}
+    , m_editor{editor}
+{
+}
+
+onnx_editor::InputEdge PlaceInputEdgeONNX::get_input_edge() const
+{
+    return m_edge;
+}
+
+bool PlaceInputEdgeONNX::is_input() const
+{
+    return m_editor->is_input(m_edge);
+}
+
+bool PlaceInputEdgeONNX::is_output() const
+{
+    return false;
+}
+
+bool PlaceInputEdgeONNX::is_equal(Place::Ptr another) const
+{
+    if (const auto in_edge = std::dynamic_pointer_cast<PlaceInputEdgeONNX>(another))
+    {
+        const auto& editor_edge = in_edge->get_input_edge();
+        return (editor_edge.m_node_idx == m_edge.m_node_idx) &&
+               (editor_edge.m_port_idx == m_edge.m_port_idx);
+    }
+    return false;
+}
+
+PlaceOutputEdgeONNX::PlaceOutputEdgeONNX(const onnx_editor::OutputEdge& edge,
+                                         std::shared_ptr<onnx_editor::ONNXModelEditor> editor)
+    : m_edge{edge}
+    , m_editor{editor}
+{
+}
+
+onnx_editor::OutputEdge PlaceOutputEdgeONNX::get_output_edge() const
+{
+    return m_edge;
+}
+
+bool PlaceOutputEdgeONNX::is_input() const
+{
+    return false;
+}
+
+bool PlaceOutputEdgeONNX::is_output() const
+{
+    return m_editor->is_output(m_edge);
+}
+
+bool PlaceOutputEdgeONNX::is_equal(Place::Ptr another) const
+{
+    if (const auto out_edge = std::dynamic_pointer_cast<PlaceOutputEdgeONNX>(another))
+    {
+        const auto& editor_edge = out_edge->get_output_edge();
+        return (editor_edge.m_node_idx == m_edge.m_node_idx) &&
+               (editor_edge.m_port_idx == m_edge.m_port_idx);
+    }
+    return false;
+}
+
+PlaceTensorONNX::PlaceTensorONNX(const std::string& name,
+                                 std::shared_ptr<onnx_editor::ONNXModelEditor> editor)
+    : m_name(name)
+    , m_editor(editor)
+{
+}
+
+std::vector<std::string> PlaceTensorONNX::get_names() const
+{
+    return {m_name};
+}
+
+Place::Ptr PlaceTensorONNX::get_producing_port() const
+{
+    return std::make_shared<PlaceOutputEdgeONNX>(m_editor->find_output_edge(m_name), m_editor);
+}
+
+std::vector<Place::Ptr> PlaceTensorONNX::get_consuming_ports() const
+{
+    std::vector<Place::Ptr> ret;
+    auto edges = m_editor->find_output_consumers(m_name);
+    std::transform(edges.begin(),
+                   edges.end(),
+                   std::back_inserter(ret),
+                   [this](const onnx_editor::InputEdge& edge) {
+                       return std::make_shared<PlaceInputEdgeONNX>(edge, this->m_editor);
+                   });
+    return ret;
+}
+
+Place::Ptr PlaceTensorONNX::get_input_port(int input_port_index) const
+{
+    return std::make_shared<PlaceInputEdgeONNX>(
+        m_editor->find_input_edge(onnx_editor::EditorOutput(m_name),
+                                  onnx_editor::EditorInput(input_port_index)),
+        m_editor);
+}
+
+bool PlaceTensorONNX::is_input() const
+{
+    const auto inputs = m_editor->model_inputs();
+    return std::find(std::begin(inputs), std::end(inputs), m_name) != std::end(inputs);
+}
+
+bool PlaceTensorONNX::is_output() const
+{
+    const auto outputs = m_editor->model_outputs();
+    return std::find(std::begin(outputs), std::end(outputs), m_name) != std::end(outputs);
+}
+
+bool PlaceTensorONNX::is_equal(Place::Ptr another) const
+{
+    if (const auto tensor = std::dynamic_pointer_cast<PlaceTensorONNX>(another))
+    {
+        return m_name == tensor->get_names().at(0);
+    }
+    return false;
+}
diff --git a/ngraph/frontend/onnx/frontend/src/place.hpp b/ngraph/frontend/onnx/frontend/src/place.hpp
index 28bbc558741..c356e1e8f54 100644
--- a/ngraph/frontend/onnx/frontend/src/place.hpp
+++ b/ngraph/frontend/onnx/frontend/src/place.hpp
@@ -4,7 +4,9 @@
 
 #pragma once
 
+#include <memory>
 #include <frontend_manager/place.hpp>
+#include <onnx_editor/editor.hpp>
 
 namespace ngraph
 {
@@ -13,65 +15,63 @@ namespace ngraph
         class PlaceInputEdgeONNX : public Place
         {
         public:
-            PlaceInputEdgeONNX(const onnx_editor::InputEdge& edge)
-                : m_edge(edge)
-            {
-            }
+            PlaceInputEdgeONNX(const onnx_editor::InputEdge& edge,
+                               std::shared_ptr<onnx_editor::ONNXModelEditor> editor);
+
+            onnx_editor::InputEdge get_input_edge() const;
+
+            bool is_input() const override;
+
+            bool is_output() const override;
+
+            bool is_equal(Place::Ptr another) const override;
 
         private:
             onnx_editor::InputEdge m_edge;
+            const std::shared_ptr<onnx_editor::ONNXModelEditor> m_editor;
         };
 
         class PlaceOutputEdgeONNX : public Place
         {
         public:
-            PlaceOutputEdgeONNX(const onnx_editor::OutputEdge& edge)
-                : m_edge(edge)
-            {
-            }
+            PlaceOutputEdgeONNX(const onnx_editor::OutputEdge& edge,
+                                std::shared_ptr<onnx_editor::ONNXModelEditor> editor);
+
+            onnx_editor::OutputEdge get_output_edge() const;
+
+            bool is_input() const override;
+
+            bool is_output() const override;
+
+            bool is_equal(Place::Ptr another) const override;
 
         private:
             onnx_editor::OutputEdge m_edge;
+            std::shared_ptr<onnx_editor::ONNXModelEditor> m_editor;
         };
 
         class PlaceTensorONNX : public Place
         {
         public:
-            PlaceTensorONNX(const std::string& name, const onnx_editor::ONNXModelEditor& editor)
-                : m_name(name)
-                , m_editor(editor)
-            {
-            }
+            PlaceTensorONNX(const std::string& name, std::shared_ptr<onnx_editor::ONNXModelEditor> editor);
 
-            std::vector<std::string> get_names() const override { return {m_name}; }
+            std::vector<std::string> get_names() const override;
 
-            Place::Ptr get_producing_port() const override
-            {
-                return std::make_shared<PlaceOutputEdgeONNX>(m_editor.find_output_edge(m_name));
-            }
+            Place::Ptr get_producing_port() const override;
 
-            std::vector<Place::Ptr> get_consuming_ports() const override
-            {
-                std::vector<Place::Ptr> ret;
-                auto edges = m_editor.find_output_consumers(m_name);
-                std::transform(edges.begin(),
-                               edges.end(),
-                               std::back_inserter(ret),
-                               [](const onnx_editor::InputEdge& edge) {
-                                   return std::make_shared<PlaceInputEdgeONNX>(edge);
-                               });
-                return ret;
-            }
+            std::vector<Place::Ptr> get_consuming_ports() const override;
 
-            Ptr get_input_port(int input_port_index) const override
-            {
-                return std::make_shared<PlaceInputEdgeONNX>(m_editor.find_input_edge(
-                    onnx_editor::EditorNode(m_name), onnx_editor::EditorInput(input_port_index)));
-            }
+            Ptr get_input_port(int input_port_index) const override;
+
+            bool is_input() const override;
+
+            bool is_output() const override;
+
+            bool is_equal(Place::Ptr another) const override;
 
         private:
             std::string m_name;
-            const onnx_editor::ONNXModelEditor& m_editor;
+            std::shared_ptr<onnx_editor::ONNXModelEditor> m_editor;
         };
     } // namespace frontend
 
diff --git a/ngraph/frontend/onnx/onnx_common/CMakeLists.txt b/ngraph/frontend/onnx/onnx_common/CMakeLists.txt
index ef1a5341400..ec31841e12c 100644
--- a/ngraph/frontend/onnx/onnx_common/CMakeLists.txt
+++ b/ngraph/frontend/onnx/onnx_common/CMakeLists.txt
@@ -2,8 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE OFF)
-
 set(TARGET_NAME "onnx_common")
 
 file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
@@ -29,7 +27,7 @@ target_include_directories(${TARGET_NAME} PUBLIC $<BUILD_INTERFACE:${ONNX_COMMON
                                                  $<INSTALL_INTERFACE:${FRONTEND_INSTALL_INCLUDE}>)
 
 target_link_libraries(${TARGET_NAME} PRIVATE ngraph)
-link_system_libraries(${TARGET_NAME} PUBLIC onnx_proto onnx ${Protobuf_LIBRARIES})
+link_system_libraries(${TARGET_NAME} PUBLIC onnx_proto onnx ${Protobuf_LITE_LIBRARIES})
 
 target_include_directories(${TARGET_NAME} PRIVATE ${ONNX_COMMON_SRC_DIR})
 
diff --git a/ngraph/frontend/onnx/onnx_common/include/onnx_common/utils.hpp b/ngraph/frontend/onnx/onnx_common/include/onnx_common/utils.hpp
index c16d11f2a65..8253aa323bc 100644
--- a/ngraph/frontend/onnx/onnx_common/include/onnx_common/utils.hpp
+++ b/ngraph/frontend/onnx/onnx_common/include/onnx_common/utils.hpp
@@ -1,6 +1,7 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+#include "ngraph/partial_shape.hpp"
 #include "ngraph/type/element_type.hpp"
 
 namespace ONNX_NAMESPACE
@@ -38,5 +39,11 @@ namespace ngraph
         ///
         bool is_supported_ng_type(const element::Type_t& ng_type);
 
+        /// \brief Retuns nG PartialShape based on onnx_shape.
+        ///
+        /// \param onnx_shape A shape of tensor represented in ONNX way.
+        ///
+        PartialShape to_ng_shape(const ONNX_NAMESPACE::TensorShapeProto& onnx_shape);
+
     } // namespace onnx_common
 } // namespace ngraph
diff --git a/ngraph/frontend/onnx/onnx_common/src/utils.cpp b/ngraph/frontend/onnx/onnx_common/src/utils.cpp
index 1fbe610137f..7a52317966c 100644
--- a/ngraph/frontend/onnx/onnx_common/src/utils.cpp
+++ b/ngraph/frontend/onnx/onnx_common/src/utils.cpp
@@ -88,5 +88,27 @@ namespace ngraph
             return NG_2_ONNX_TYPES.count(ng_type) > 0;
         }
 
+        PartialShape to_ng_shape(const ONNX_NAMESPACE::TensorShapeProto& onnx_shape)
+        {
+            if (onnx_shape.dim_size() == 0)
+            {
+                return Shape{}; // empty list of dimensions denotes a scalar
+            }
+
+            std::vector<Dimension> dims;
+            for (const auto& onnx_dim : onnx_shape.dim())
+            {
+                if (onnx_dim.has_dim_value())
+                {
+                    dims.emplace_back(onnx_dim.dim_value());
+                }
+                else // has_dim_param() == true or it is empty dim
+                {
+                    dims.push_back(Dimension::dynamic());
+                }
+            }
+            return PartialShape{dims};
+        }
+
     } // namespace onnx_common
 } // namespace ngraph
diff --git a/ngraph/frontend/onnx/onnx_import/CMakeLists.txt b/ngraph/frontend/onnx/onnx_import/CMakeLists.txt
index b8eab155d97..61df1f6443d 100644
--- a/ngraph/frontend/onnx/onnx_import/CMakeLists.txt
+++ b/ngraph/frontend/onnx/onnx_import/CMakeLists.txt
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE OFF)
 set(ONNX_OPSET_VERSION 13 CACHE INTERNAL "Supported version of ONNX operator set")
 
 file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
diff --git a/ngraph/frontend/onnx/onnx_import/include/onnx_editor/edge_mapper.hpp b/ngraph/frontend/onnx/onnx_import/include/onnx_editor/edge_mapper.hpp
index 2829d536b01..9c5e2e45f42 100644
--- a/ngraph/frontend/onnx/onnx_import/include/onnx_editor/edge_mapper.hpp
+++ b/ngraph/frontend/onnx/onnx_import/include/onnx_editor/edge_mapper.hpp
@@ -99,6 +99,24 @@ namespace ngraph
             ///
             ONNX_IMPORTER_API bool is_correct_and_unambiguous_node(const EditorNode& node) const;
 
+            /// \brief Returns true if a provided tensor name is correct (exists in a graph).
+            ///
+            /// \param name The name of tensor in a graph.
+            ///
+            bool is_correct_tensor_name(const std::string& name) const;
+
+            /// \brief     Get name of input port indicated by the input edge.
+            ///
+            /// \note      Empty string is returned if the port name is not found.
+            ///
+            std::string get_input_port_name(const InputEdge& edge) const;
+
+            /// \brief     Get name of output port indicated by the input edge.
+            ///
+            /// \note      Empty string is returned if the port name is not found.
+            ///
+            std::string get_output_port_name(const OutputEdge& edge) const;
+
         private:
             std::vector<int> find_node_indexes(const std::string& node_name,
                                                const std::string& output_name) const;
diff --git a/ngraph/frontend/onnx/onnx_import/include/onnx_editor/editor.hpp b/ngraph/frontend/onnx/onnx_import/include/onnx_editor/editor.hpp
index 67052da9969..8eda1e19ee8 100644
--- a/ngraph/frontend/onnx/onnx_import/include/onnx_editor/editor.hpp
+++ b/ngraph/frontend/onnx/onnx_import/include/onnx_editor/editor.hpp
@@ -53,6 +53,12 @@ namespace ngraph
             ///                     the inputs specified in its parameter.
             void set_input_shapes(const std::map<std::string, ngraph::PartialShape>& input_shapes);
 
+            /// \brief Get shape of ONNX tensor indicated by the tensor_name.
+            ///
+            /// \param tensor_name The name of ONNX tensor.
+            ///
+            PartialShape get_tensor_shape(const std::string& tensor_name) const;
+
             /// \brief Extracts a subgraph constrained by input edges and output edges. In the end
             ///        the underlying ModelProto is modified - obsolete inputs, initializers, nodes
             ///        and outputs are removed from the in-memory model.
@@ -86,12 +92,25 @@ namespace ngraph
             /// \brief     Converts an edited ONNX model to an nGraph Function representation.
             std::shared_ptr<Function> get_function() const;
 
-            /// \brief Returns a list of all inputs of the in-memory model, including initializers.
+            /// \brief Returns a list of all inputs of the in-memory model.
             ///        The returned value might depend on the previous operations executed on an
             ///        instance of the model editor, in particular the subgraph extraction which
-            ///        can discard some inputs and initializers from the original graph.
+            ///        can discard some inputs from the original graph.
+            ///
+            ///  \note ONNX initializers is not treated as input of the model.
             std::vector<std::string> model_inputs() const;
 
+            /// \brief Returns a list of all outputs of the in-memory model.
+            ///        The returned value might depend on the previous operations executed on an
+            ///        instance of the model editor.
+            std::vector<std::string> model_outputs() const;
+
+            /// \brief     Returns true if input edge is input of the model. Otherwise false.
+            bool is_input(const InputEdge& edge) const;
+
+            /// \brief     Returns true if output edge is input of the model. Otherwise false.
+            bool is_output(const OutputEdge& edge) const;
+
             /// \brief Returns the path to the original model file
             const std::string& model_path() const;
 
@@ -161,6 +180,12 @@ namespace ngraph
             ///
             bool is_correct_and_unambiguous_node(const EditorNode& node) const;
 
+            /// \brief Returns true if a provided tensor name is correct (exists in a graph).
+            ///
+            /// \param name The name of tensor in a graph.
+            ///
+            bool is_correct_tensor_name(const std::string& name) const;
+
             /// \brief Returns a nGraph function based on edited model
             ///        decoded to framework nodes
             ///
diff --git a/ngraph/frontend/onnx/onnx_import/include/onnx_import/onnx.hpp b/ngraph/frontend/onnx/onnx_import/include/onnx_import/onnx.hpp
index 39b923328c1..54ee83d6708 100644
--- a/ngraph/frontend/onnx/onnx_import/include/onnx_import/onnx.hpp
+++ b/ngraph/frontend/onnx/onnx_import/include/onnx_import/onnx.hpp
@@ -76,10 +76,8 @@ namespace ngraph
         /// \brief     Converts a nGraph function (onnx model decoded to function with
         /// ONNXFrameworkNode(s))
         ///            to a complete function with actual compute operations
-        ///
-        /// \return    A nGraph function.
         ONNX_IMPORTER_API
-        std::shared_ptr<Function> convert_decoded_function(std::shared_ptr<Function> function);
+        void convert_decoded_function(std::shared_ptr<Function> function);
     } // namespace onnx_import
 
 } // namespace ngraph
diff --git a/ngraph/frontend/onnx/onnx_import/include/onnx_import/utils/onnx_internal.hpp b/ngraph/frontend/onnx/onnx_import/include/onnx_import/utils/onnx_internal.hpp
index 006fcc561e6..6f9adcf5c64 100644
--- a/ngraph/frontend/onnx/onnx_import/include/onnx_import/utils/onnx_internal.hpp
+++ b/ngraph/frontend/onnx/onnx_import/include/onnx_import/utils/onnx_internal.hpp
@@ -53,7 +53,7 @@ namespace ngraph
                 decode_to_framework_nodes(std::shared_ptr<ONNX_NAMESPACE::ModelProto> model_proto,
                                           const std::string& model_path);
 
-            std::shared_ptr<Function> convert_decoded_function(std::shared_ptr<Function> function);
+            void convert_decoded_function(std::shared_ptr<Function> function);
         } // namespace detail
     }     // namespace onnx_import
 } // namespace ngraph
diff --git a/ngraph/frontend/onnx/onnx_import/src/core/graph.cpp b/ngraph/frontend/onnx/onnx_import/src/core/graph.cpp
index 1b1bdcaf8e6..c844e89ede1 100644
--- a/ngraph/frontend/onnx/onnx_import/src/core/graph.cpp
+++ b/ngraph/frontend/onnx/onnx_import/src/core/graph.cpp
@@ -327,7 +327,7 @@ namespace ngraph
             {
                 ng_node_vector = ng_node_factory(onnx_node);
             }
-            catch (const ::ngraph::onnx_import::error::OnnxNodeValidationFailure& exc)
+            catch (const ::ngraph::onnx_import::error::OnnxNodeValidationFailure&)
             {
                 // Do nothing OnnxNodeValidationFailure exception already has ONNX node information.
                 throw;
diff --git a/ngraph/frontend/onnx/onnx_import/src/core/value_info.hpp b/ngraph/frontend/onnx/onnx_import/src/core/value_info.hpp
index 76b3357c6ab..c1415c55442 100644
--- a/ngraph/frontend/onnx/onnx_import/src/core/value_info.hpp
+++ b/ngraph/frontend/onnx/onnx_import/src/core/value_info.hpp
@@ -12,6 +12,7 @@
 #include "ngraph/op/parameter.hpp"
 #include "ngraph/partial_shape.hpp"
 #include "ngraph/type/element_type.hpp"
+#include "onnx_common/utils.hpp"
 #include "onnx_import/core/node.hpp"
 #include "utils/common.hpp"
 
@@ -35,7 +36,7 @@ namespace ngraph
 
                     if (onnx_tensor.has_shape())
                     {
-                        m_partial_shape = to_ng_shape(onnx_tensor.shape());
+                        m_partial_shape = onnx_common::to_ng_shape(onnx_tensor.shape());
                     }
                     else
                     {
@@ -87,28 +88,6 @@ namespace ngraph
                 return tensor.get_ng_constant();
             }
 
-            PartialShape to_ng_shape(const ONNX_NAMESPACE::TensorShapeProto& onnx_shape) const
-            {
-                if (onnx_shape.dim_size() == 0)
-                {
-                    return Shape{}; // empty list of dimensions denotes a scalar
-                }
-
-                std::vector<Dimension> dims;
-                for (const auto& onnx_dim : onnx_shape.dim())
-                {
-                    if (onnx_dim.has_dim_value())
-                    {
-                        dims.emplace_back(onnx_dim.dim_value());
-                    }
-                    else // has_dim_param() == true or it is empty dim
-                    {
-                        dims.push_back(Dimension::dynamic());
-                    }
-                }
-                return PartialShape{dims};
-            }
-
         private:
             const ONNX_NAMESPACE::ValueInfoProto* m_value_info_proto;
             PartialShape m_partial_shape;
diff --git a/ngraph/frontend/onnx/onnx_import/src/edge_mapper.cpp b/ngraph/frontend/onnx/onnx_import/src/edge_mapper.cpp
index 414f0f4ba02..c42316034cd 100644
--- a/ngraph/frontend/onnx/onnx_import/src/edge_mapper.cpp
+++ b/ngraph/frontend/onnx/onnx_import/src/edge_mapper.cpp
@@ -256,3 +256,38 @@ bool onnx_editor::EdgeMapper::is_correct_and_unambiguous_node(const EditorNode&
 {
     return find_node_indexes(node.m_node_name, node.m_output_name).size() == 1;
 }
+
+bool onnx_editor::EdgeMapper::is_correct_tensor_name(const std::string& name) const
+{
+    if (m_node_output_name_to_index.find(name) != std::end(m_node_output_name_to_index))
+    {
+        return true;
+    }
+    if (m_output_consumers_index.find(name) != std::end(m_output_consumers_index))
+    {
+        return true;
+    }
+    return false;
+}
+
+std::string onnx_editor::EdgeMapper::get_input_port_name(const InputEdge& edge) const
+{
+    if (edge.m_node_idx >= 0 && edge.m_node_idx < static_cast<int>(m_node_inputs.size()) &&
+        edge.m_port_idx >= 0 &&
+        edge.m_port_idx < static_cast<int>(m_node_inputs[edge.m_node_idx].size()))
+    {
+        return m_node_inputs[edge.m_node_idx][edge.m_port_idx];
+    }
+    return "";
+}
+
+std::string onnx_editor::EdgeMapper::get_output_port_name(const OutputEdge& edge) const
+{
+    if (edge.m_node_idx >= 0 && edge.m_node_idx < static_cast<int>(m_node_outputs.size()) &&
+        edge.m_port_idx >= 0 &&
+        edge.m_port_idx < static_cast<int>(m_node_outputs[edge.m_node_idx].size()))
+    {
+        return m_node_outputs[edge.m_node_idx][edge.m_port_idx];
+    }
+    return "";
+}
diff --git a/ngraph/frontend/onnx/onnx_import/src/editor.cpp b/ngraph/frontend/onnx/onnx_import/src/editor.cpp
index ef33fad5fcd..81737b8a9ed 100644
--- a/ngraph/frontend/onnx/onnx_import/src/editor.cpp
+++ b/ngraph/frontend/onnx/onnx_import/src/editor.cpp
@@ -35,6 +35,20 @@ namespace
         return nullptr;
     }
 
+    ValueInfoProto* find_graph_output(GraphProto& graph, const std::string& name)
+    {
+        for (int i = 0; i < graph.output_size(); ++i)
+        {
+            auto* output_desc = graph.mutable_output(i);
+            if (output_desc->has_name() && output_desc->name() == name)
+            {
+                return output_desc;
+            }
+        }
+
+        return nullptr;
+    }
+
     TensorProto* find_graph_initializer(GraphProto& graph, const std::string& name)
     {
         for (int i = 0; i < graph.initializer_size(); ++i)
@@ -182,6 +196,31 @@ namespace
             tensor_type->set_elem_type(initializer.data_type());
         }
     }
+    class InferShapesAutoRelease
+    {
+    public:
+        InferShapesAutoRelease(std::shared_ptr<ONNX_NAMESPACE::ModelProto> model_proto)
+            : m_model_proto{model_proto}
+            , m_infer_shapes_was_run{false}
+        {
+        }
+        void infer_shapes()
+        {
+            ONNX_NAMESPACE::shape_inference::InferShapes(*m_model_proto);
+            m_infer_shapes_was_run = true;
+        }
+        ~InferShapesAutoRelease()
+        {
+            if (m_infer_shapes_was_run)
+            {
+                m_model_proto->mutable_graph()->clear_value_info();
+            }
+        }
+
+    private:
+        std::shared_ptr<ONNX_NAMESPACE::ModelProto> m_model_proto;
+        bool m_infer_shapes_was_run;
+    };
 } // namespace
 
 /// \brief A helper class used to hold the ModelProto object as its field
@@ -198,9 +237,6 @@ struct onnx_editor::ONNXModelEditor::Impl
               onnx_common::parse_from_file(model_path))}
     {
     }
-
-    void infer_shapes() { ONNX_NAMESPACE::shape_inference::InferShapes(*m_model_proto.get()); }
-    void remove_shape_inference_info() { m_model_proto->mutable_graph()->clear_value_info(); }
 };
 
 onnx_editor::ONNXModelEditor::ONNXModelEditor(const std::string& model_path)
@@ -274,6 +310,58 @@ void onnx_editor::ONNXModelEditor::set_input_shapes(
     }
 }
 
+PartialShape onnx_editor::ONNXModelEditor::get_tensor_shape(const std::string& tensor_name) const
+{
+    const ValueInfoProto* value_info = nullptr;
+    auto* onnx_graph = m_pimpl->m_model_proto->mutable_graph();
+    InferShapesAutoRelease onnx_shapes(m_pimpl->m_model_proto);
+    if (const auto* input = find_graph_input(*onnx_graph, tensor_name))
+    {
+        value_info = input;
+    }
+    else if (const auto* output = find_graph_output(*onnx_graph, tensor_name))
+    {
+        value_info = output;
+    }
+    else
+    {
+        try
+        {
+            onnx_shapes.infer_shapes();
+        }
+        catch (const std::exception& e)
+        {
+            NGRAPH_WARN << "Cannot replace existing shapes during get_tensor_shape";
+            return PartialShape::dynamic();
+        }
+        auto node_it = std::find_if(std::begin(onnx_graph->value_info()),
+                                    std::end(onnx_graph->value_info()),
+                                    [&tensor_name](const ValueInfoProto& value_info) -> bool {
+                                        return value_info.name() == tensor_name;
+                                    });
+        if (node_it != std::end(onnx_graph->value_info()))
+        {
+            value_info = &(*node_it);
+        }
+    }
+    if (value_info != nullptr)
+    {
+        const auto& onnx_tensor_type = value_info->type().tensor_type();
+        if (onnx_tensor_type.has_shape())
+        {
+            return onnx_common::to_ng_shape(onnx_tensor_type.shape());
+        }
+        else
+        {
+            return PartialShape::dynamic();
+        }
+    }
+    else
+    {
+        throw ngraph_error("The tensor: " + tensor_name + " was not found in the graph");
+    }
+}
+
 void onnx_editor::ONNXModelEditor::cut_graph_fragment(const std::vector<InputEdge>& inputs,
                                                       const std::vector<OutputEdge>& outputs)
 {
@@ -282,35 +370,78 @@ void onnx_editor::ONNXModelEditor::cut_graph_fragment(const std::vector<InputEdg
         return;
     }
 
-    m_pimpl->infer_shapes();
+    InferShapesAutoRelease onnx_shapes(m_pimpl->m_model_proto);
+    onnx_shapes.infer_shapes();
 
     SubgraphExtractor editor{*(m_pimpl->m_model_proto->mutable_graph())};
     editor.add_new_inputs(inputs);
     editor.add_new_outputs(outputs);
     editor.extract_subgraph(outputs);
 
-    m_pimpl->remove_shape_inference_info();
     m_pimpl->m_is_mapper_updated = false;
 }
 
 std::vector<std::string> onnx_editor::ONNXModelEditor::model_inputs() const
 {
     const auto& graph = m_pimpl->m_model_proto->graph();
+    std::vector<std::string> inputs;
+    for (const auto& in : graph.input())
+    {
+        // ignore inputs which are initializers
+        if (std::find_if(graph.initializer().begin(),
+                         graph.initializer().end(),
+                         [&in](const TensorProto& initializer) {
+                             return initializer.name() == in.name();
+                         }) == graph.initializer().end())
+        {
+            inputs.push_back(in.name());
+        }
+    }
+    return inputs;
+}
 
-    std::vector<std::string> inputs_and_initializers;
-    inputs_and_initializers.reserve(graph.input_size() + graph.initializer_size());
+std::vector<std::string> onnx_editor::ONNXModelEditor::model_outputs() const
+{
+    const auto& graph = m_pimpl->m_model_proto->graph();
+    std::vector<std::string> outputs;
+    outputs.reserve(graph.output_size());
 
-    std::transform(graph.input().begin(),
-                   graph.input().end(),
-                   std::back_inserter(inputs_and_initializers),
+    std::transform(graph.output().begin(),
+                   graph.output().end(),
+                   std::back_inserter(outputs),
                    extract_name<ONNX_NAMESPACE::ValueInfoProto>);
 
-    std::transform(graph.initializer().begin(),
-                   graph.initializer().end(),
-                   std::back_inserter(inputs_and_initializers),
-                   extract_name<ONNX_NAMESPACE::TensorProto>);
+    return outputs;
+}
 
-    return inputs_and_initializers;
+bool onnx_editor::ONNXModelEditor::is_input(const InputEdge& edge) const
+{
+    update_mapper_if_needed();
+    const auto& port_name = m_pimpl->m_edge_mapper.get_input_port_name(edge);
+    if (port_name.empty())
+    {
+        return false;
+    }
+    else
+    {
+        const auto& inputs = model_inputs();
+        return std::count(std::begin(inputs), std::end(inputs), port_name) > 0;
+    }
+}
+
+bool onnx_editor::ONNXModelEditor::is_output(const OutputEdge& edge) const
+{
+    update_mapper_if_needed();
+    const auto& port_name = m_pimpl->m_edge_mapper.get_output_port_name(edge);
+    if (port_name.empty())
+    {
+        return false;
+    }
+    else
+    {
+        const auto& outputs = model_outputs();
+        return std::count(std::begin(outputs), std::end(outputs), port_name) > 0;
+    }
 }
 
 std::string onnx_editor::ONNXModelEditor::model_string() const
@@ -393,6 +524,12 @@ bool onnx_editor::ONNXModelEditor::is_correct_and_unambiguous_node(const EditorN
     return m_pimpl->m_edge_mapper.is_correct_and_unambiguous_node(node);
 }
 
+bool onnx_editor::ONNXModelEditor::is_correct_tensor_name(const std::string& name) const
+{
+    update_mapper_if_needed();
+    return m_pimpl->m_edge_mapper.is_correct_tensor_name(name);
+}
+
 std::shared_ptr<Function> onnx_editor::ONNXModelEditor::decode()
 {
     return onnx_import::detail::decode_to_framework_nodes(m_pimpl->m_model_proto, m_model_path);
diff --git a/ngraph/frontend/onnx/onnx_import/src/onnx.cpp b/ngraph/frontend/onnx/onnx_import/src/onnx.cpp
index 39beac60108..35d12346c04 100644
--- a/ngraph/frontend/onnx/onnx_import/src/onnx.cpp
+++ b/ngraph/frontend/onnx/onnx_import/src/onnx.cpp
@@ -58,9 +58,9 @@ namespace ngraph
                 op_name, version, domain == "ai.onnx" ? "" : domain);
         }
 
-        std::shared_ptr<Function> convert_decoded_function(std::shared_ptr<Function> function)
+        void convert_decoded_function(std::shared_ptr<Function> function)
         {
-            return detail::convert_decoded_function(function);
+            detail::convert_decoded_function(function);
         }
 
     } // namespace onnx_import
diff --git a/ngraph/frontend/onnx/onnx_import/src/op/affine.cpp b/ngraph/frontend/onnx/onnx_import/src/op/affine.cpp
new file mode 100644
index 00000000000..47cec96e1a1
--- /dev/null
+++ b/ngraph/frontend/onnx/onnx_import/src/op/affine.cpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "default_opset.hpp"
+#include "exceptions.hpp"
+#include "ngraph/builder/autobroadcast.hpp"
+#include "ngraph/shape.hpp"
+
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                OutputVector affine(const Node& node)
+                {
+                    // Affine is an obsolete experimental ONNX operation.
+                    // It takes one input tensor and produces one output tensor where
+                    // the affine function, y = alpha * x + beta, is applied to the input
+                    // elementwise.
+                    const auto inputs = node.get_ng_inputs();
+
+                    CHECK_VALID_NODE(node,
+                                     inputs.size() == 1,
+                                     "Affine expects 1 input tensor. Got: ",
+                                     inputs.size());
+                    CHECK_VALID_NODE(
+                        node, node.has_attribute("alpha"), "\"alpha\" attribute is required.");
+                    CHECK_VALID_NODE(
+                        node, node.has_attribute("beta"), "\"beta\" attribute is required.");
+
+                    const auto data = inputs[0];
+                    const auto alpha = node.get_attribute_value<float>("alpha");
+                    const auto beta = node.get_attribute_value<float>("beta");
+
+                    const auto alpha_const =
+                        default_opset::Constant::create(data.get_element_type(), Shape{}, {alpha});
+                    const auto beta_const =
+                        default_opset::Constant::create(data.get_element_type(), Shape{}, {beta});
+
+                    return {std::make_shared<default_opset::Add>(
+                        std::make_shared<default_opset::Multiply>(data, alpha_const), beta_const)};
+                }
+
+            } // namespace set_1
+
+        } // namespace op
+
+    } // namespace onnx_import
+
+} // namespace ngraph
diff --git a/ngraph/frontend/onnx/onnx_import/src/op/affine.hpp b/ngraph/frontend/onnx/onnx_import/src/op/affine.hpp
new file mode 100644
index 00000000000..756a46c50b2
--- /dev/null
+++ b/ngraph/frontend/onnx/onnx_import/src/op/affine.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "onnx_import/core/node.hpp"
+
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                OutputVector affine(const Node& node);
+
+            } // namespace set_1
+
+        } // namespace op
+
+    } // namespace onnx_import
+
+} // namespace ngraph
diff --git a/ngraph/frontend/onnx/onnx_import/src/ops_bridge.cpp b/ngraph/frontend/onnx/onnx_import/src/ops_bridge.cpp
index 0055b8afc4a..2cfc808db24 100644
--- a/ngraph/frontend/onnx/onnx_import/src/ops_bridge.cpp
+++ b/ngraph/frontend/onnx/onnx_import/src/ops_bridge.cpp
@@ -14,6 +14,7 @@
 #include "op/acos.hpp"
 #include "op/acosh.hpp"
 #include "op/add.hpp"
+#include "op/affine.hpp"
 #include "op/and.hpp"
 #include "op/argmax.hpp"
 #include "op/argmin.hpp"
@@ -421,7 +422,6 @@ namespace ngraph
             REGISTER_OPERATOR("RNN", 1, rnn);
             REGISTER_OPERATOR("RoiAlign", 1, roi_align);
             REGISTER_OPERATOR("Round", 1, round);
-            REGISTER_OPERATOR("Scatter", 1, scatter_elements);
             REGISTER_OPERATOR("ScatterElements", 1, scatter_elements);
             REGISTER_OPERATOR("ScatterND", 1, scatter_nd);
             REGISTER_OPERATOR("Selu", 1, selu);
@@ -459,13 +459,17 @@ namespace ngraph
             REGISTER_OPERATOR("Transpose", 1, transpose);
             REGISTER_OPERATOR("Unsqueeze", 1, unsqueeze);
             REGISTER_OPERATOR("Unsqueeze", 13, unsqueeze);
-            REGISTER_OPERATOR("Upsample", 1, upsample);
-            REGISTER_OPERATOR("Upsample", 7, upsample);
-            REGISTER_OPERATOR("Upsample", 9, upsample);
             REGISTER_OPERATOR("Where", 1, where);
             REGISTER_OPERATOR("Xor", 1, logical_xor);
 
-            // custom OPs
+            // deprecated ops
+            REGISTER_OPERATOR("Affine", 1, affine);
+            REGISTER_OPERATOR("Scatter", 1, scatter_elements);
+            REGISTER_OPERATOR("Upsample", 1, upsample);
+            REGISTER_OPERATOR("Upsample", 7, upsample);
+            REGISTER_OPERATOR("Upsample", 9, upsample);
+
+            // custom ops
             REGISTER_OPERATOR_WITH_DOMAIN(
                 OPENVINO_ONNX_DOMAIN, "DeformableConv2D", 1, deformable_conv_2d);
             REGISTER_OPERATOR_WITH_DOMAIN(
diff --git a/ngraph/frontend/onnx/onnx_import/src/utils/onnx_internal.cpp b/ngraph/frontend/onnx/onnx_import/src/utils/onnx_internal.cpp
index 689eef00cc3..ce1943df013 100644
--- a/ngraph/frontend/onnx/onnx_import/src/utils/onnx_internal.cpp
+++ b/ngraph/frontend/onnx/onnx_import/src/utils/onnx_internal.cpp
@@ -61,7 +61,7 @@ namespace ngraph
                 }
             }
 
-            std::shared_ptr<Function> convert_decoded_function(std::shared_ptr<Function> function)
+            void convert_decoded_function(std::shared_ptr<Function> function)
             {
                 for (const auto& node : function->get_ordered_ops())
                 {
@@ -87,8 +87,6 @@ namespace ngraph
                 }
                 detail::remove_dangling_parameters(function);
                 detail::remove_dangling_results(function);
-
-                return function;
             }
 
             void apply_transformations(ONNX_NAMESPACE::ModelProto& model_proto,
diff --git a/ngraph/frontend/paddlepaddle/CMakeLists.txt b/ngraph/frontend/paddlepaddle/CMakeLists.txt
index 198d0f21243..ab9c5bcef84 100644
--- a/ngraph/frontend/paddlepaddle/CMakeLists.txt
+++ b/ngraph/frontend/paddlepaddle/CMakeLists.txt
@@ -71,7 +71,7 @@ endif()
 link_system_libraries(${TARGET_NAME} PRIVATE ${Protobuf_LITE_LIBRARIES})
 
 target_link_libraries(${TARGET_NAME} PRIVATE ngraph::frontend_manager::static
-                                     PRIVATE ngraph::builder)
+                                     PRIVATE ngraph::builder inference_engine_transformations)
 
 add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}
                         EXCLUDE_PATTERNS ${PROTO_SRCS} ${PROTO_HDRS})
diff --git a/ngraph/frontend/paddlepaddle/include/paddlepaddle_frontend/frontend.hpp b/ngraph/frontend/paddlepaddle/include/paddlepaddle_frontend/frontend.hpp
index 410068b2e26..d872e5fedf0 100644
--- a/ngraph/frontend/paddlepaddle/include/paddlepaddle_frontend/frontend.hpp
+++ b/ngraph/frontend/paddlepaddle/include/paddlepaddle_frontend/frontend.hpp
@@ -12,6 +12,8 @@ namespace ngraph
 {
     namespace frontend
     {
+        class OpPlacePDPD;
+
         class PDPD_API FrontEndPDPD : public FrontEnd
         {
         public:
@@ -22,6 +24,25 @@ namespace ngraph
             /// \return fully converted nGraph function
             std::shared_ptr<Function> convert(InputModel::Ptr model) const override;
 
+            /// \brief Completely convert the remaining, not converted part of a function.
+            /// \param partiallyConverted partially converted nGraph function
+            void convert(std::shared_ptr<Function> partiallyConverted) const override;
+
+            /// \brief Convert only those parts of the model that can be converted leaving others
+            /// as-is. Converted parts are not normalized by additional transformations; normalize
+            /// function or another form of convert function should be called to finalize the
+            /// conversion process.
+            /// \param model Input model
+            /// \return partially converted nGraph function
+            std::shared_ptr<Function> convert_partially(InputModel::Ptr model) const override;
+
+            /// \brief Convert operations with one-to-one mapping with decoding nodes.
+            /// Each decoding node is an nGraph node representing a single FW operation node with
+            /// all attributes represented in FW-independent way.
+            /// \param model Input model
+            /// \return nGraph function after decoding
+            std::shared_ptr<Function> decode(InputModel::Ptr model) const override;
+
         protected:
             /// \brief Check if FrontEndPDPD can recognize model from given parts
             /// \param params Can be path to folder which contains __model__ file or path to
@@ -40,7 +61,10 @@ namespace ngraph
 
         private:
             static std::shared_ptr<Function>
-                convert_model(const std::shared_ptr<InputModelPDPD>& model);
+                convert_each_node(const std::shared_ptr<InputModelPDPD>& model,
+                                  std::function<std::map<std::string, OutputVector>(
+                                      const std::map<std::string, Output<Node>>&,
+                                      const std::shared_ptr<OpPlacePDPD>&)> func);
         };
 
     } // namespace frontend
diff --git a/ngraph/frontend/paddlepaddle/src/decoder.cpp b/ngraph/frontend/paddlepaddle/src/decoder.cpp
index 1758893b254..bfe845a4df0 100644
--- a/ngraph/frontend/paddlepaddle/src/decoder.cpp
+++ b/ngraph/frontend/paddlepaddle/src/decoder.cpp
@@ -99,6 +99,31 @@ namespace ngraph
             return output_names;
         }
 
+        size_t DecoderPDPDProto::get_output_size() const
+        {
+            size_t res = 0;
+            for (const auto& output : op_place->get_desc().outputs())
+            {
+                res += output.arguments().size();
+            }
+            return res;
+        }
+
+        std::map<std::string, std::vector<ngraph::element::Type>>
+            DecoderPDPDProto::get_output_type_map() const
+        {
+            std::map<std::string, std::vector<ngraph::element::Type>> output_types;
+            for (const auto& out_port_pair : op_place->get_output_ports())
+            {
+                for (const auto& p_place : out_port_pair.second)
+                {
+                    output_types[out_port_pair.first].push_back(
+                        p_place->get_target_tensor_pdpd()->get_element_type());
+                }
+            }
+            return output_types;
+        }
+
         ngraph::element::Type
             DecoderPDPDProto::get_out_port_type(const std::string& port_name) const
         {
@@ -135,5 +160,40 @@ namespace ngraph
                                     " Expected number: 0 or 1");
             return attrs;
         }
+
+        namespace
+        {
+            inline std::map<std::string, OutputVector> map_for_each_input_impl(
+                const google::protobuf::RepeatedPtrField<paddle::framework::proto::OpDesc_Var>& c,
+                const std::function<Output<Node>(const std::string&, size_t)>& func)
+            {
+                size_t idx = 0;
+                std::map<std::string, OutputVector> res;
+                for (const auto& port : c)
+                {
+                    std::vector<Output<Node>> v;
+                    v.reserve(port.arguments_size());
+                    for (const auto& inp : port.arguments())
+                    {
+                        v.push_back(func(inp, idx++));
+                    }
+                    res.emplace(std::make_pair(port.parameter(), v));
+                }
+                return res;
+            }
+        } // namespace
+
+        std::map<std::string, OutputVector> DecoderPDPDProto::map_for_each_input(
+            const std::function<Output<Node>(const std::string&, size_t)>& func) const
+        {
+            return map_for_each_input_impl(op_place->get_desc().inputs(), func);
+        }
+
+        std::map<std::string, OutputVector> DecoderPDPDProto::map_for_each_output(
+            const std::function<Output<Node>(const std::string&, size_t)>& func) const
+        {
+            return map_for_each_input_impl(op_place->get_desc().outputs(), func);
+        }
+
     } // namespace frontend
 } // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/decoder.hpp b/ngraph/frontend/paddlepaddle/src/decoder.hpp
index 67be6694f86..e7f6f6f4bb2 100644
--- a/ngraph/frontend/paddlepaddle/src/decoder.hpp
+++ b/ngraph/frontend/paddlepaddle/src/decoder.hpp
@@ -40,10 +40,20 @@ namespace ngraph
 
             std::vector<pdpd::OutPortName> get_output_names() const override;
 
+            size_t get_output_size() const override;
+
             ngraph::element::Type get_out_port_type(const std::string& port_name) const override;
 
             std::string get_op_type() const override;
 
+            std::map<std::string, std::vector<ngraph::element::Type>> get_output_type_map() const;
+
+            std::map<std::string, OutputVector> map_for_each_input(
+                const std::function<Output<Node>(const std::string&, size_t)>& func) const;
+
+            std::map<std::string, OutputVector> map_for_each_output(
+                const std::function<Output<Node>(const std::string&, size_t)>& func) const;
+
         private:
             std::vector<paddle::framework::proto::OpDesc_Attr>
                 decode_attribute_helper(const std::string& name) const;
diff --git a/ngraph/frontend/paddlepaddle/src/frontend.cpp b/ngraph/frontend/paddlepaddle/src/frontend.cpp
index 84c5e9301d6..69904ea7206 100644
--- a/ngraph/frontend/paddlepaddle/src/frontend.cpp
+++ b/ngraph/frontend/paddlepaddle/src/frontend.cpp
@@ -21,10 +21,9 @@
 #include "decoder.hpp"
 #include "node_context.hpp"
 #include "op_table.hpp"
+#include "pdpd_fw_node.hpp"
 #include "pdpd_utils.hpp"
 
-#include "frontend_manager/frontend_manager.hpp"
-
 using namespace ngraph::opset7;
 using namespace ngraph;
 using namespace ngraph::frontend;
@@ -35,53 +34,111 @@ namespace ngraph
     {
         namespace pdpd
         {
-            NamedOutputs make_ng_node(std::map<pdpd::TensorName, Output<Node>>& nodes,
+            NamedOutputs make_ng_node(const std::map<pdpd::TensorName, Output<Node>>& nodes,
                                       const std::shared_ptr<OpPlacePDPD>& op_place,
                                       const std::map<std::string, CreatorFunction>& CREATORS_MAP)
             {
-                const auto& op = op_place->get_desc();
+                const auto& op_desc = op_place->get_desc();
 
-                FRONT_END_OP_CONVERSION_CHECK(CREATORS_MAP.find(op.type()) != CREATORS_MAP.end(),
+                auto creator_it = CREATORS_MAP.find(op_desc.type());
+                FRONT_END_OP_CONVERSION_CHECK(creator_it != CREATORS_MAP.end(),
                                               "No creator found for ",
-                                              op.type(),
+                                              op_desc.type(),
                                               " node.");
-                pdpd::NamedInputs named_inputs;
-                const auto& input_ports = op_place->get_input_ports();
-                for (const auto& name_to_ports : input_ports)
+                NamedInputs named_inputs;
+                for (const auto& input_port : op_desc.inputs())
                 {
-                    for (const auto& port : name_to_ports.second)
+                    for (const auto& in_tensor_name : input_port.arguments())
                     {
-                        const auto& var_desc = port->get_source_tensor_pdpd()->get_desc();
-                        if (nodes.count(var_desc.name()))
-                            named_inputs[name_to_ports.first].push_back(nodes.at(var_desc.name()));
-                        else
-                            // return empty map when not all inputs exist. It usually means that
-                            // these nodes are not used because model inputs were overwritten
-                            return NamedOutputs();
+                        auto node_it = nodes.find(in_tensor_name);
+                        // general check, because in case of error partial conversion should fail
+                        FRONT_END_GENERAL_CHECK(
+                            node_it != nodes.end(),
+                            "Input ",
+                            in_tensor_name,
+                            " for node with type ",
+                            op_desc.type(),
+                            " wasn't found. It may happen if model was cut incorrectly.");
+                        named_inputs[input_port.parameter()].push_back(node_it->second);
                     }
                 }
 
-                try
+                return creator_it->second(NodeContext(DecoderPDPDProto(op_place), named_inputs));
+            }
+
+            NamedOutputs make_framework_node(const std::map<pdpd::TensorName, Output<Node>>& nodes,
+                                             const std::shared_ptr<OpPlacePDPD>& op_place)
+            {
+                const auto& op_desc = op_place->get_desc();
+
+                OutputVector inputs_vector;
+                std::vector<std::string> inputs_names;
+                NamedOutputs named_outputs;
+                for (const auto& input_port : op_desc.inputs())
                 {
-                    return CREATORS_MAP.at(op.type())(
-                        NodeContext(DecoderPDPDProto(op_place), named_inputs));
+                    for (const auto& in_tensor_name : input_port.arguments())
+                    {
+                        auto it = nodes.find(in_tensor_name);
+                        // general check, because in case of error partial conversion should fail
+                        FRONT_END_GENERAL_CHECK(
+                            it != nodes.end(),
+                            "Input ",
+                            in_tensor_name,
+                            " for node with type ",
+                            op_desc.type(),
+                            " wasn't found. It may happen if model was cut incorrectly.");
+                        inputs_vector.push_back(it->second);
+                        inputs_names.push_back(in_tensor_name);
+                    }
                 }
-                catch (...)
+
+                auto node = std::make_shared<ngraph::frontend::PDPDFrameworkNode>(
+                    DecoderPDPDProto(op_place), inputs_vector, inputs_names);
+
+                return node->return_named_outputs();
+            }
+
+            bool
+                normalize_framework_node(const std::shared_ptr<PDPDFrameworkNode>& node,
+                                         const std::map<std::string, CreatorFunction>& CREATORS_MAP)
+            {
+                auto type = node->get_op_type();
+                auto creator_it = CREATORS_MAP.find(type);
+                FRONT_END_OP_CONVERSION_CHECK(
+                    creator_it != CREATORS_MAP.end(), "No creator found for ", type, " node.");
+
+                auto new_node_outputs =
+                    creator_it->second(NodeContext(node->get_decoder(), node->get_named_inputs()));
+                auto new_node = new_node_outputs.begin()->second[0].get_node_shared_ptr();
+                new_node->set_friendly_name(node->get_friendly_name());
+                auto node_outputs = node->return_named_outputs();
+
+                auto new_ports = new_node_outputs.begin();
+                auto old_ports = node_outputs.begin();
+                for (; new_ports != new_node_outputs.end() && old_ports != node_outputs.end();
+                     ++new_ports, ++old_ports)
                 {
-                    // TODO: define exception types
-                    // In case of partial conversion we need to create generic ngraph op here
-                    return NamedOutputs();
+                    FRONT_END_OP_CONVERSION_CHECK(new_ports->first == old_ports->first,
+                                                  "Node outputs inconsistent after normalization: ",
+                                                  node->get_friendly_name());
+                    auto new_output = new_ports->second.begin();
+                    auto old_output = old_ports->second.begin();
+                    for (; new_output != new_ports->second.end() &&
+                           old_output != old_ports->second.end();
+                         ++old_output, ++new_output)
+                    {
+                        old_output->replace(*new_output);
+                    }
                 }
+                return true;
             }
 
             std::istream* variant_to_stream_ptr(const std::shared_ptr<Variant>& variant,
                                                 std::ifstream& ext_stream)
             {
-                if (is_type<VariantWrapper<std::shared_ptr<std::istream>>>(variant))
+                if (is_type<VariantWrapper<std::istream*>>(variant))
                 {
-                    auto m_stream =
-                        as_type_ptr<VariantWrapper<std::shared_ptr<std::istream>>>(variant)->get();
-                    return m_stream.get();
+                    return as_type_ptr<VariantWrapper<std::istream*>>(variant)->get();
                 }
                 else if (is_type<VariantWrapper<std::string>>(variant))
                 {
@@ -101,19 +158,18 @@ namespace ngraph
                                                "Cannot open model file.");
                 return &ext_stream;
             }
-
         } // namespace pdpd
 
-        std::shared_ptr<Function>
-            FrontEndPDPD::convert_model(const std::shared_ptr<InputModelPDPD>& model)
+        std::shared_ptr<Function> FrontEndPDPD::convert_each_node(
+            const std::shared_ptr<InputModelPDPD>& model,
+            std::function<std::map<std::string, OutputVector>(
+                const std::map<std::string, Output<Node>>&, const std::shared_ptr<OpPlacePDPD>&)>
+                func)
         {
-            // std::cout << "Convert Model Start" << std::endl;
-
-            std::map<pdpd::TensorName, Output<Node>> nodes_dict(model->getTensorValues());
+            auto nodes_dict(model->getTensorValues());
             ParameterVector parameter_nodes;
             ResultVector result_nodes;
 
-            std::map<std::string, pdpd::CreatorFunction> CREATORS_MAP = pdpd::get_supported_ops();
             for (const auto& _inp_place : model->get_inputs())
             {
                 const auto& inp_place = std::dynamic_pointer_cast<TensorPlacePDPD>(_inp_place);
@@ -130,45 +186,44 @@ namespace ngraph
             const auto& op_places = model->getOpPlaces();
             for (const auto& op_place : op_places)
             {
-                const auto& op_type = op_place->get_desc().type();
-                if (op_type == "feed" || op_type == "fetch")
+                const auto& op_desc = op_place->get_desc();
+                if (op_desc.type() == "feed" || op_desc.type() == "fetch")
                 {
                     // inputs and outputs are stored in the model already
                     continue;
                 }
                 else
                 {
-                    const auto& named_outputs =
-                        pdpd::make_ng_node(nodes_dict, op_place, CREATORS_MAP);
+                    pdpd::NamedOutputs named_outputs = func(nodes_dict, op_place);
 
-                    // set layer name by the name of first output var
                     if (!named_outputs.empty())
                     {
-                        const auto& first_output_var = op_place->get_output_ports()
-                                                           .begin()
-                                                           ->second.at(0)
-                                                           ->get_target_tensor_pdpd()
-                                                           ->get_desc();
+                        // set layer name by the name of first output var
+                        const auto& tensor_name = op_desc.outputs().begin()->arguments()[0];
                         auto node = named_outputs.begin()->second[0].get_node_shared_ptr();
-                        node->set_friendly_name(first_output_var.name());
-                    }
+                        node->set_friendly_name(tensor_name);
 
-                    const auto& out_ports = op_place->get_output_ports();
-                    for (const auto& name_to_outputs : named_outputs)
-                    {
-                        const auto& ports = out_ports.at(name_to_outputs.first);
-                        FRONT_END_OP_CONVERSION_CHECK(
-                            ports.size() == name_to_outputs.second.size(),
-                            "The number of output tensors must be equal to "
-                            "the number of outputs of the ngraph node.");
-                        for (size_t idx = 0; idx < ports.size(); ++idx)
+                        const auto& out_ports = op_desc.outputs();
+                        for (const auto& port : out_ports)
                         {
-                            const auto& var = ports[idx]->get_target_tensor_pdpd()->get_desc();
-                            name_to_outputs.second[idx].get_tensor().set_names({var.name()});
-                            // if nodes_dict already has node mapped to this tensor name it usually
-                            // means that it was overwritten using setTensorValue
-                            if (!nodes_dict.count(var.name()))
-                                nodes_dict[var.name()] = name_to_outputs.second[idx];
+                            // TODO: figure a way to safely handle unused outputs
+                            if (named_outputs.count(port.parameter()))
+                            {
+                                const auto& ng_outputs = named_outputs.at(port.parameter());
+                                FRONT_END_OP_CONVERSION_CHECK(
+                                    ng_outputs.size() == port.arguments_size(),
+                                    "The number of output tensors must be equal to "
+                                    "the number of outputs of the ngraph node.");
+                                for (size_t idx = 0; idx < ng_outputs.size(); ++idx)
+                                {
+                                    const auto& var_name = port.arguments()[idx];
+                                    ng_outputs[idx].get_tensor().set_names({var_name});
+                                    // if nodes_dict already has node mapped to this tensor name it
+                                    // usually means that it was overwritten using setTensorValue
+                                    if (!nodes_dict.count(var_name))
+                                        nodes_dict[var_name] = ng_outputs[idx];
+                                }
+                            }
                         }
                     }
                 }
@@ -225,13 +280,13 @@ namespace ngraph
                 return model_str && model_str.is_open();
             }
 #endif
-            else if (is_type<VariantWrapper<std::shared_ptr<std::istream>>>(variants[0]))
+            else if (is_type<VariantWrapper<std::istream*>>(variants[0]))
             {
                 // Validating first stream, it must contain a model
-                std::shared_ptr<std::istream> p_model_stream =
-                    as_type_ptr<VariantWrapper<std::shared_ptr<std::istream>>>(variants[0])->get();
+                auto p_model_stream =
+                    as_type_ptr<VariantWrapper<std::istream*>>(variants[0])->get();
                 paddle::framework::proto::ProgramDesc fw;
-                return fw.ParseFromIstream(p_model_stream.get());
+                return fw.ParseFromIstream(p_model_stream);
             }
             return false;
         }
@@ -258,13 +313,12 @@ namespace ngraph
 #endif
                 // The case with only model stream provided and no weights. This means model has
                 // no learnable weights
-                else if (is_type<VariantWrapper<std::shared_ptr<std::istream>>>(variants[0]))
+                else if (is_type<VariantWrapper<std::istream*>>(variants[0]))
                 {
-                    std::shared_ptr<std::istream> p_model_stream =
-                        as_type_ptr<VariantWrapper<std::shared_ptr<std::istream>>>(variants[0])
-                            ->get();
+                    auto p_model_stream =
+                        as_type_ptr<VariantWrapper<std::istream*>>(variants[0])->get();
                     return std::make_shared<InputModelPDPD>(
-                        std::vector<std::istream*>{p_model_stream.get()});
+                        std::vector<std::istream*>{p_model_stream});
                 }
             }
             else if (variants.size() == 2)
@@ -288,10 +342,63 @@ namespace ngraph
         std::shared_ptr<ngraph::Function> FrontEndPDPD::convert(InputModel::Ptr model) const
         {
             auto pdpd_model = std::dynamic_pointer_cast<InputModelPDPD>(model);
-            auto f = convert_model(pdpd_model);
+            std::map<std::string, pdpd::CreatorFunction> CREATORS_MAP = pdpd::get_supported_ops();
+            auto f =
+                convert_each_node(pdpd_model,
+                                  [&](const std::map<std::string, Output<Node>>& nodes_dict,
+                                      const std::shared_ptr<OpPlacePDPD>& op_place) {
+                                      return pdpd::make_ng_node(nodes_dict, op_place, CREATORS_MAP);
+                                  });
             return f;
         }
 
+        void FrontEndPDPD::convert(std::shared_ptr<ngraph::Function> partiallyConverted) const
+        {
+            for (const auto& node : partiallyConverted->get_ordered_ops())
+            {
+                if (is_type<PDPDFrameworkNode>(node))
+                {
+                    pdpd::normalize_framework_node(
+                        std::dynamic_pointer_cast<PDPDFrameworkNode>(node),
+                        pdpd::get_supported_ops());
+                }
+            }
+            for (auto result : partiallyConverted->get_results())
+            {
+                result->validate_and_infer_types();
+            }
+        }
+
+        std::shared_ptr<ngraph::Function>
+            FrontEndPDPD::convert_partially(InputModel::Ptr model) const
+        {
+            auto pdpd_model = std::dynamic_pointer_cast<InputModelPDPD>(model);
+            std::map<std::string, pdpd::CreatorFunction> CREATORS_MAP = pdpd::get_supported_ops();
+            auto f = convert_each_node(
+                pdpd_model,
+                [&](const std::map<std::string, Output<Node>>& nodes_dict,
+                    const std::shared_ptr<OpPlacePDPD>& op_place) {
+                    pdpd::NamedOutputs named_outputs;
+                    try
+                    {
+                        named_outputs = pdpd::make_ng_node(nodes_dict, op_place, CREATORS_MAP);
+                    }
+                    catch (const OpConversionFailure&)
+                    {
+                        named_outputs = pdpd::make_framework_node(nodes_dict, op_place);
+                    }
+                    return named_outputs;
+                });
+            return f;
+        }
+
+        std::shared_ptr<ngraph::Function> FrontEndPDPD::decode(InputModel::Ptr model) const
+        {
+            auto pdpd_model = std::dynamic_pointer_cast<InputModelPDPD>(model);
+            std::map<std::string, pdpd::CreatorFunction> CREATORS_MAP = pdpd::get_supported_ops();
+            auto f = convert_each_node(pdpd_model, pdpd::make_framework_node);
+            return f;
+        }
     } // namespace frontend
 } // namespace ngraph
 
@@ -303,7 +410,7 @@ extern "C" PDPD_API FrontEndVersion GetAPIVersion()
 extern "C" PDPD_API void* GetFrontEndData()
 {
     FrontEndPluginInfo* res = new FrontEndPluginInfo();
-    res->m_name = "pdpd";
+    res->m_name = "paddle";
     res->m_creator = []() { return std::make_shared<FrontEndPDPD>(); };
     return res;
 }
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/model.cpp b/ngraph/frontend/paddlepaddle/src/model.cpp
index 0c212e29acf..582fbf64cb8 100644
--- a/ngraph/frontend/paddlepaddle/src/model.cpp
+++ b/ngraph/frontend/paddlepaddle/src/model.cpp
@@ -7,7 +7,10 @@
 #include <paddlepaddle_frontend/place.hpp>
 
 #include <fstream>
+#include <queue>
+
 #include <ngraph/opsets/opset7.hpp>
+
 #include "decoder.hpp"
 #include "framework.pb.h"
 #include "node_context.hpp"
@@ -44,7 +47,7 @@ namespace ngraph
             void setElementType(Place::Ptr place, const ngraph::element::Type&);
             void setTensorValue(Place::Ptr place, const void* value);
 
-            std::vector<std::shared_ptr<OpPlacePDPD>> getOpPlaces() const { return m_op_places; }
+            std::vector<std::shared_ptr<OpPlacePDPD>> getOpPlaces() const;
             std::map<std::string, std::shared_ptr<TensorPlacePDPD>> getVarPlaces() const
             {
                 return m_var_places;
@@ -59,6 +62,7 @@ namespace ngraph
             template <typename T>
             void loadConsts(const std::basic_string<T>& folder_with_weights,
                             std::istream* weight_stream);
+            std::vector<std::shared_ptr<OpPlacePDPD>> determine_cut_nodes() const;
 
             std::vector<std::shared_ptr<OpPlacePDPD>> m_op_places;
             std::map<std::string, std::shared_ptr<TensorPlacePDPD>> m_var_places;
@@ -67,6 +71,9 @@ namespace ngraph
             std::vector<Place::Ptr> m_inputs;
             std::vector<Place::Ptr> m_outputs;
             std::map<pdpd::TensorName, Output<Node>> m_tensor_values;
+
+            // shows if some nodes might be deleted from graph
+            bool m_graph_changed = false;
         };
 
         void InputModelPDPD::InputModelPDPDImpl::loadPlaces()
@@ -228,6 +235,69 @@ namespace ngraph
 #endif
         } // namespace pdpd
 
+        std::vector<std::shared_ptr<OpPlacePDPD>>
+            InputModelPDPD::InputModelPDPDImpl::getOpPlaces() const
+        {
+            if (m_graph_changed)
+            {
+                return determine_cut_nodes();
+            }
+            return m_op_places;
+        }
+
+        std::vector<std::shared_ptr<OpPlacePDPD>>
+            InputModelPDPD::InputModelPDPDImpl::determine_cut_nodes() const
+        {
+            std::queue<OpPlacePDPD*> q;
+            std::unordered_set<OpPlacePDPD*> visited;
+            std::vector<std::shared_ptr<OpPlacePDPD>> new_op_places;
+            new_op_places.reserve(m_op_places.size());
+            // Marking nodes from outputs to inputs/constants
+            for (const auto& output : getOutputs())
+            {
+                if (!output->is_input())
+                {
+                    auto pdpd_output_op =
+                        std::dynamic_pointer_cast<OpPlacePDPD>(output->get_producing_operation());
+                    PDPD_ASSERT(pdpd_output_op != nullptr,
+                                "Output doesn't have producing operation");
+                    if (!visited.count(pdpd_output_op.get()))
+                    {
+                        visited.insert(pdpd_output_op.get());
+                        q.push(pdpd_output_op.get());
+                        new_op_places.push_back(pdpd_output_op);
+                    }
+                }
+            }
+            while (!q.empty())
+            {
+                auto p_op = q.front();
+                q.pop();
+                for (const auto& map_pair : p_op->get_input_ports())
+                {
+                    for (const auto& port : map_pair.second)
+                    {
+                        auto tensor = port->get_source_tensor();
+                        if (tensor && !tensor->is_input() &&
+                            !m_tensor_values.count(tensor->get_names()[0]))
+                        {
+                            std::shared_ptr<OpPlacePDPD> pdpd_op =
+                                std::dynamic_pointer_cast<OpPlacePDPD>(
+                                    tensor->get_producing_operation());
+                            if (pdpd_op && !visited.count(pdpd_op.get()))
+                            {
+                                visited.insert(pdpd_op.get());
+                                q.push(pdpd_op.get());
+                                new_op_places.push_back(pdpd_op);
+                            }
+                        }
+                    }
+                }
+            }
+            std::reverse(new_op_places.begin(), new_op_places.end());
+            return new_op_places;
+        }
+
         template <typename T>
         void InputModelPDPD::InputModelPDPDImpl::loadConsts(
             const std::basic_string<T>& folder_with_weights, std::istream* weight_stream)
@@ -368,6 +438,7 @@ namespace ngraph
         void InputModelPDPD::InputModelPDPDImpl::overrideAllInputs(
             const std::vector<Place::Ptr>& inputs)
         {
+            m_graph_changed = true;
             m_inputs.clear();
             for (const auto& inp : inputs)
             {
@@ -378,6 +449,7 @@ namespace ngraph
         void InputModelPDPD::InputModelPDPDImpl::overrideAllOutputs(
             const std::vector<Place::Ptr>& outputs)
         {
+            m_graph_changed = true;
             m_outputs.clear();
             for (const auto& outp : outputs)
             {
@@ -388,6 +460,7 @@ namespace ngraph
         void InputModelPDPD::InputModelPDPDImpl::extractSubgraph(
             const std::vector<Place::Ptr>& inputs, const std::vector<Place::Ptr>& outputs)
         {
+            m_graph_changed = true;
             overrideAllInputs(inputs);
             overrideAllOutputs(outputs);
         }
@@ -419,6 +492,7 @@ namespace ngraph
 
         void InputModelPDPD::InputModelPDPDImpl::setTensorValue(Place::Ptr place, const void* value)
         {
+            m_graph_changed = true;
             auto tensor_place = pdpd::castToTensorPlace(place);
             auto p_shape = tensor_place->get_partial_shape();
             auto type = tensor_place->get_element_type();
diff --git a/ngraph/frontend/paddlepaddle/src/node_context.hpp b/ngraph/frontend/paddlepaddle/src/node_context.hpp
index 3cee4812d71..21201003c34 100644
--- a/ngraph/frontend/paddlepaddle/src/node_context.hpp
+++ b/ngraph/frontend/paddlepaddle/src/node_context.hpp
@@ -54,6 +54,8 @@ namespace ngraph
 
                 virtual std::vector<OutPortName> get_output_names() const = 0;
 
+                virtual size_t get_output_size() const = 0;
+
                 /// \brief Get output port type
                 ///
                 /// Current API assumes that output port has only one output type.
@@ -141,6 +143,18 @@ namespace ngraph
                     return name_map.at(name);
                 }
 
+                /// Returns all inputs in order they appear in map. This is used for FrameworkNode
+                /// creation
+                OutputVector get_all_ng_inputs() const
+                {
+                    OutputVector res;
+                    for (const auto& entry : name_map)
+                    {
+                        res.insert(res.end(), entry.second.begin(), entry.second.end());
+                    }
+                    return res;
+                }
+
                 std::vector<OutPortName> get_output_names() const
                 {
                     return decoder.get_output_names();
diff --git a/ngraph/frontend/paddlepaddle/src/op/argmax.cpp b/ngraph/frontend/paddlepaddle/src/op/argmax.cpp
index 7d8c069031d..ac63500d59b 100644
--- a/ngraph/frontend/paddlepaddle/src/op/argmax.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op/argmax.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "argmax.hpp"
 #include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
 
 namespace ngraph
 {
diff --git a/ngraph/frontend/paddlepaddle/src/op/argmax.hpp b/ngraph/frontend/paddlepaddle/src/op/argmax.hpp
deleted file mode 100644
index 20d9db406be..00000000000
--- a/ngraph/frontend/paddlepaddle/src/op/argmax.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include "node_context.hpp"
-
-namespace ngraph
-{
-    namespace frontend
-    {
-        namespace pdpd
-        {
-            namespace op
-            {
-                NamedOutputs argmax(const NodeContext& node);
-            }
-        } // namespace pdpd
-    }     // namespace frontend
-} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/assign_value.cpp b/ngraph/frontend/paddlepaddle/src/op/assign_value.cpp
index fb503abbba8..490acde7155 100644
--- a/ngraph/frontend/paddlepaddle/src/op/assign_value.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op/assign_value.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "assign_value.hpp"
 #include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
 namespace ngraph
 {
     namespace frontend
diff --git a/ngraph/frontend/paddlepaddle/src/op/assign_value.hpp b/ngraph/frontend/paddlepaddle/src/op/assign_value.hpp
deleted file mode 100644
index b954b3a04cc..00000000000
--- a/ngraph/frontend/paddlepaddle/src/op/assign_value.hpp
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "node_context.hpp"
-
-namespace ngraph
-{
-    namespace frontend
-    {
-        namespace pdpd
-        {
-            namespace op
-            {
-                NamedOutputs assign_value(const NodeContext& node);
-            }
-        } // namespace pdpd
-    }     // namespace frontend
-} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/batch_norm.cpp b/ngraph/frontend/paddlepaddle/src/op/batch_norm.cpp
index c38c4189fa0..54bcc85dcfa 100644
--- a/ngraph/frontend/paddlepaddle/src/op/batch_norm.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op/batch_norm.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "batch_norm.hpp"
 #include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
 
 namespace ngraph
 {
diff --git a/ngraph/frontend/paddlepaddle/src/op/batch_norm.hpp b/ngraph/frontend/paddlepaddle/src/op/batch_norm.hpp
deleted file mode 100644
index 3757421bba6..00000000000
--- a/ngraph/frontend/paddlepaddle/src/op/batch_norm.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include "node_context.hpp"
-
-namespace ngraph
-{
-    namespace frontend
-    {
-        namespace pdpd
-        {
-            namespace op
-            {
-                NamedOutputs batch_norm(const NodeContext& node);
-            }
-        } // namespace pdpd
-    }     // namespace frontend
-} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/cast.cpp b/ngraph/frontend/paddlepaddle/src/op/cast.cpp
index 2cb181f0b24..db1161f94e1 100644
--- a/ngraph/frontend/paddlepaddle/src/op/cast.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op/cast.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "cast.hpp"
 #include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
 
 namespace ngraph
 {
diff --git a/ngraph/frontend/paddlepaddle/src/op/cast.hpp b/ngraph/frontend/paddlepaddle/src/op/cast.hpp
deleted file mode 100644
index 1e3a19aaf59..00000000000
--- a/ngraph/frontend/paddlepaddle/src/op/cast.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include "node_context.hpp"
-
-namespace ngraph
-{
-    namespace frontend
-    {
-        namespace pdpd
-        {
-            namespace op
-            {
-                NamedOutputs cast(const NodeContext& node);
-            }
-        } // namespace pdpd
-    }     // namespace frontend
-} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/clip.cpp b/ngraph/frontend/paddlepaddle/src/op/clip.cpp
index 1909e392eaf..61996414b21 100644
--- a/ngraph/frontend/paddlepaddle/src/op/clip.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op/clip.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "clip.hpp"
 #include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
 
 namespace ngraph
 {
diff --git a/ngraph/frontend/paddlepaddle/src/op/clip.hpp b/ngraph/frontend/paddlepaddle/src/op/clip.hpp
deleted file mode 100644
index babfa2ccd95..00000000000
--- a/ngraph/frontend/paddlepaddle/src/op/clip.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include "node_context.hpp"
-
-namespace ngraph
-{
-    namespace frontend
-    {
-        namespace pdpd
-        {
-            namespace op
-            {
-                NamedOutputs clip(const NodeContext& node);
-            }
-        } // namespace pdpd
-    }     // namespace frontend
-} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/concat.cpp b/ngraph/frontend/paddlepaddle/src/op/concat.cpp
index a9c6fa6388d..76fe9557db0 100644
--- a/ngraph/frontend/paddlepaddle/src/op/concat.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op/concat.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "concat.hpp"
 #include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
 
 namespace ngraph
 {
diff --git a/ngraph/frontend/paddlepaddle/src/op/concat.hpp b/ngraph/frontend/paddlepaddle/src/op/concat.hpp
deleted file mode 100644
index 0d32fa22f6e..00000000000
--- a/ngraph/frontend/paddlepaddle/src/op/concat.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include "node_context.hpp"
-
-namespace ngraph
-{
-    namespace frontend
-    {
-        namespace pdpd
-        {
-            namespace op
-            {
-                NamedOutputs concat(const NodeContext& node);
-            }
-        } // namespace pdpd
-    }     // namespace frontend
-} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/conv2d.cpp b/ngraph/frontend/paddlepaddle/src/op/conv2d.cpp
index 294e08134f1..425022b0209 100644
--- a/ngraph/frontend/paddlepaddle/src/op/conv2d.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op/conv2d.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "conv2d.hpp"
 #include <ngraph/opsets/opset6.hpp>
 #include "conv2d_utils.hpp"
 
diff --git a/ngraph/frontend/paddlepaddle/src/op/conv2d.hpp b/ngraph/frontend/paddlepaddle/src/op/conv2d.hpp
deleted file mode 100644
index a2368afab9e..00000000000
--- a/ngraph/frontend/paddlepaddle/src/op/conv2d.hpp
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright (C) 2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include "node_context.hpp"
-
-namespace ngraph
-{
-    namespace frontend
-    {
-        namespace pdpd
-        {
-            namespace op
-            {
-                NamedOutputs conv2d(const NodeContext& node_context);
-
-            }
-        } // namespace pdpd
-    }     // namespace frontend
-} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/conv2d_transpose.cpp b/ngraph/frontend/paddlepaddle/src/op/conv2d_transpose.cpp
new file mode 100644
index 00000000000..b30c56652d4
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/conv2d_transpose.cpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+#include "conv2d_utils.hpp"
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs conv2d_transpose(const NodeContext& node)
+                {
+                    return conv2d_base<opset6::GroupConvolutionBackpropData,
+                                       opset6::ConvolutionBackpropData>(node);
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/deformable_conv.cpp b/ngraph/frontend/paddlepaddle/src/op/deformable_conv.cpp
new file mode 100644
index 00000000000..918a23b9b3c
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/deformable_conv.cpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <ngraph/opsets/opset8.hpp>
+#include <node_context.hpp>
+#include "conv2d_utils.hpp"
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs deformable_conv(const NodeContext& node)
+                {
+                    auto input = node.get_ng_input("Input");
+                    auto filter = node.get_ng_input("Filter");
+                    auto offset = node.get_ng_input("Offset");
+
+                    auto strides = node.get_attribute<std::vector<int>>("strides");
+                    auto dilations = node.get_attribute<std::vector<int>>("dilations");
+
+                    auto groups = node.get_attribute<int>("groups");
+                    auto deformable_groups = node.get_attribute<int>("deformable_groups");
+
+                    const auto paddings = get_pads(node);
+                    const auto pads_begin = paddings.first;
+                    const auto pads_end = paddings.second;
+
+                    const ngraph::op::PadType auto_pad{ngraph::op::PadType::EXPLICIT};
+
+                    std::shared_ptr<Node> output_node;
+                    if (node.has_ng_input("Mask"))
+                    {
+                        auto mask = node.get_ng_input("Mask");
+                        output_node = std::make_shared<ngraph::opset8::DeformableConvolution>(
+                            input,
+                            offset,
+                            filter,
+                            mask,
+                            ngraph::Strides(strides.begin(), strides.end()),
+                            pads_begin,
+                            pads_end,
+                            ngraph::Strides(dilations.begin(), dilations.end()),
+                            auto_pad,
+                            groups,
+                            deformable_groups,
+                            true);
+                    }
+                    else
+                    {
+                        output_node = std::make_shared<ngraph::opset8::DeformableConvolution>(
+                            input,
+                            offset,
+                            filter,
+                            ngraph::Strides(strides.begin(), strides.end()),
+                            pads_begin,
+                            pads_end,
+                            ngraph::Strides(dilations.begin(), dilations.end()),
+                            auto_pad,
+                            groups,
+                            deformable_groups,
+                            true);
+                    }
+
+                    return node.default_single_output_mapping({output_node}, {"Output"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/dropout.cpp b/ngraph/frontend/paddlepaddle/src/op/dropout.cpp
new file mode 100644
index 00000000000..b52b4eeb4c0
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/dropout.cpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs dropout(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    auto dropout_implementation =
+                        node.get_attribute<std::string>("dropout_implementation");
+                    PDPD_OP_VALIDATION_CHECK(node,
+                                             (dropout_implementation == "downgrade_in_infer" ||
+                                              dropout_implementation == "upscale_in_train"),
+                                             "Unsupported dropout mode!");
+                    if (dropout_implementation == "downgrade_in_infer")
+                    {
+                        auto dropout_prob = ngraph::opset6::Constant::create(
+                            ngraph::element::f32,
+                            {1},
+                            {1 - node.get_attribute<float>("dropout_prob")});
+                        return node.default_single_output_mapping(
+                            {std::make_shared<ngraph::opset6::Multiply>(data, dropout_prob)},
+                            {"Out"});
+                    }
+                    else
+                    {
+                        return node.default_single_output_mapping(data.get_node_shared_ptr(),
+                                                                  {"Out"});
+                    }
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/elementwise_ops.cpp b/ngraph/frontend/paddlepaddle/src/op/elementwise_ops.cpp
index 9a0ef491fda..a736d238a56 100644
--- a/ngraph/frontend/paddlepaddle/src/op/elementwise_ops.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op/elementwise_ops.cpp
@@ -5,7 +5,7 @@
 #include <map>
 
 #include <ngraph/opsets/opset6.hpp>
-#include "elementwise_ops.hpp"
+#include <node_context.hpp>
 
 namespace ngraph
 {
@@ -96,6 +96,16 @@ namespace ngraph
                     return elementwise_ops<ngraph::opset6::Power>(node_context);
                 }
 
+                NamedOutputs elementwise_equal(const NodeContext& node_context)
+                {
+                    return elementwise_ops<ngraph::opset6::Equal>(node_context);
+                }
+
+                NamedOutputs elementwise_greater_equal(const NodeContext& node_context)
+                {
+                    return elementwise_ops<ngraph::opset6::GreaterEqual>(node_context);
+                }
+
             } // namespace op
         }     // namespace pdpd
     }         // namespace frontend
diff --git a/ngraph/frontend/paddlepaddle/src/op/elementwise_ops.hpp b/ngraph/frontend/paddlepaddle/src/op/elementwise_ops.hpp
deleted file mode 100644
index 981dc927421..00000000000
--- a/ngraph/frontend/paddlepaddle/src/op/elementwise_ops.hpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include "node_context.hpp"
-
-namespace ngraph
-{
-    namespace frontend
-    {
-        namespace pdpd
-        {
-            namespace op
-            {
-                NamedOutputs elementwise_add(const NodeContext& node_context);
-                NamedOutputs elementwise_sub(const NodeContext& node_context);
-                NamedOutputs elementwise_mul(const NodeContext& node_context);
-                NamedOutputs elementwise_div(const NodeContext& node_context);
-                NamedOutputs elementwise_min(const NodeContext& node_context);
-                NamedOutputs elementwise_max(const NodeContext& node_context);
-                NamedOutputs elementwise_pow(const NodeContext& node_context);
-            } // namespace op
-        }     // namespace pdpd
-    }         // namespace frontend
-} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/expand_v2.cpp b/ngraph/frontend/paddlepaddle/src/op/expand_v2.cpp
new file mode 100644
index 00000000000..1b5ec1dd3d6
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/expand_v2.cpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+#include <paddlepaddle_frontend/utility.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs expand_v2(const NodeContext& node)
+                {
+                    auto x = node.get_ng_input("X");
+                    Output<Node> shape_expected_node;
+                    if (node.has_ng_input("Shape"))
+                    {
+                        shape_expected_node = node.get_ng_input("Shape");
+                    }
+                    else if (node.has_ng_input("expand_shapes_tensor"))
+                    {
+                        auto inputs = node.get_ng_inputs("expand_shapes_tensor");
+                        ngraph::NodeVector node_vec;
+                        for (auto& input : inputs)
+                        {
+                            auto cast =
+                                std::make_shared<ngraph::opset6::Convert>(input, element::i32);
+                            node_vec.push_back(cast);
+                        }
+                        shape_expected_node = std::make_shared<ngraph::opset6::Concat>(node_vec, 0);
+                    }
+                    else
+                    {
+                        std::vector<int32_t> shape_expected;
+                        if (node.has_attribute<std::vector<int32_t>>("shape"))
+                        {
+                            shape_expected = node.get_attribute<std::vector<int32_t>>("shape");
+                        }
+                        else
+                        {
+                            throw std::runtime_error("expand: has no shape attribute");
+                        }
+                        shape_expected_node = ngraph::opset6::Constant::create(
+                            ngraph::element::i32, {shape_expected.size()}, shape_expected);
+                    }
+                    // if -1 in shape we will copy the orginal value from input
+                    auto zero_node =
+                        ngraph::opset6::Constant::create(ngraph::element::i32, {1}, {0});
+                    auto mask_node =
+                        std::make_shared<ngraph::opset6::Greater>(shape_expected_node, zero_node);
+                    auto input_shape_node =
+                        std::make_shared<ngraph::opset6::ShapeOf>(x, element::i32);
+                    auto fixed_shape_node = std::make_shared<ngraph::opset6::Select>(
+                        mask_node, shape_expected_node, input_shape_node);
+                    auto repeated_node = std::make_shared<ngraph::opset6::Divide>(
+                        fixed_shape_node, input_shape_node, false);
+
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Tile>(
+                            x,
+                            std::make_shared<ngraph::opset6::Convert>(repeated_node,
+                                                                      element::i64))},
+                        {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/fill_constant.cpp b/ngraph/frontend/paddlepaddle/src/op/fill_constant.cpp
new file mode 100644
index 00000000000..426dbf31642
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/fill_constant.cpp
@@ -0,0 +1,73 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs fill_constant(const NodeContext& node)
+                {
+                    auto shape = node.get_attribute<std::vector<int64_t>>("shape");
+                    auto dtype = node.get_attribute<ngraph::element::Type>("dtype");
+                    Output<Node> value_node;
+                    Output<Node> shape_node;
+                    if (node.has_ng_input("ValueTensor"))
+                    {
+                        value_node = node.get_ng_input("ValueTensor");
+                    }
+                    else if (dtype == element::i32)
+                    {
+                        int32_t value = static_cast<int32_t>(node.get_attribute<float>("value"));
+                        value_node = opset6::Constant::create(dtype, {1}, {value});
+                    }
+                    else if (dtype == element::f32)
+                    {
+                        float value = node.get_attribute<float>("value");
+                        value_node = opset6::Constant::create(dtype, {1}, {value});
+                    }
+                    else if (dtype == element::i64)
+                    {
+                        int64_t value = static_cast<int64_t>(node.get_attribute<float>("value"));
+                        value_node = opset6::Constant::create(dtype, {1}, {value});
+                    }
+                    else
+                    {
+                        PDPD_ASSERT(false, "fill_constant only supports i32, f32, i64");
+                    }
+
+                    PDPD_ASSERT(shape.size() > 0 || node.has_ng_input("ShapeTensor") ||
+                                    node.has_ng_input("ShapeTensorList"),
+                                "fill_constant shape not set");
+
+                    if (node.has_ng_input("ShapeTensor"))
+                    {
+                        shape_node = node.get_ng_input("ShapeTensor");
+                    }
+                    else if (node.has_ng_input("ShapeTensorList"))
+                    {
+                        auto shape_tensor_list = node.get_ng_inputs("ShapeTensorList");
+                        shape_node =
+                            Output<Node>{std::make_shared<opset6::Concat>(shape_tensor_list, 0)};
+                    }
+                    else
+                    {
+                        shape_node = opset6::Constant::create(element::i64, {shape.size()}, shape);
+                    }
+
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Broadcast>(value_node, shape_node)},
+                        {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/fill_constant_batch_size_like.cpp b/ngraph/frontend/paddlepaddle/src/op/fill_constant_batch_size_like.cpp
new file mode 100644
index 00000000000..954168ce9f9
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/fill_constant_batch_size_like.cpp
@@ -0,0 +1,127 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <limits.h>
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+#include <paddlepaddle_frontend/utility.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                static std::shared_ptr<Node> get_val(int32_t idx, const Output<Node>& data)
+                {
+                    auto startsNode = ngraph::opset6::Constant::create(element::i32, {1}, {idx});
+                    auto endsNode = ngraph::opset6::Constant::create(element::i32, {1}, {idx + 1});
+                    auto stridesNode = ngraph::opset6::Constant::create(element::i32, {1}, {1});
+                    return std::make_shared<ngraph::opset6::StridedSlice>(
+                        data,
+                        startsNode,
+                        endsNode,
+                        stridesNode,
+                        std::vector<int64_t>(1, 0),
+                        std::vector<int64_t>(1, 0));
+                }
+
+                static std::shared_ptr<Node> set_val(int32_t idx,
+                                                     std::shared_ptr<Node> val_node,
+                                                     std::shared_ptr<Node> array_node)
+                {
+                    NodeVector nodes;
+                    if (idx > 0)
+                    {
+                        // [0, idx)
+                        auto startsNode = ngraph::opset6::Constant::create(element::i32, {1}, {0});
+                        auto endsNode = ngraph::opset6::Constant::create(element::i32, {1}, {idx});
+                        auto stridesNode = ngraph::opset6::Constant::create(element::i32, {1}, {1});
+                        auto head = std::make_shared<ngraph::opset6::StridedSlice>(
+                            array_node,
+                            startsNode,
+                            endsNode,
+                            stridesNode,
+                            std::vector<int64_t>(1, 0),
+                            std::vector<int64_t>(1, 0));
+                        nodes.push_back(head);
+                    }
+                    nodes.push_back(val_node);
+                    // [idx + 1, max)
+                    auto startsNode =
+                        ngraph::opset6::Constant::create(element::i32, {1}, {idx + 1});
+                    auto endsNode = ngraph::opset6::Constant::create(element::i32, {1}, {INT_MAX});
+                    auto stridesNode = ngraph::opset6::Constant::create(element::i32, {1}, {1});
+                    auto tail =
+                        std::make_shared<ngraph::opset6::StridedSlice>(array_node,
+                                                                       startsNode,
+                                                                       endsNode,
+                                                                       stridesNode,
+                                                                       std::vector<int64_t>(1, 0),
+                                                                       std::vector<int64_t>(1, 0));
+                    nodes.push_back(tail);
+
+                    return std::make_shared<ngraph::opset6::Concat>(nodes, 0);
+                }
+
+                static Output<Node> get_seed_node(const NodeContext& node)
+                {
+                    auto dtype = node.get_attribute<element::Type>("dtype");
+                    Output<Node> val_node;
+                    auto str_value = node.get_attribute<std::string>("str_value");
+                    switch (dtype)
+                    {
+                    case element::i32:
+                        val_node =
+                            ngraph::opset6::Constant::create(dtype, {1}, {std::stoi(str_value)});
+                        break;
+                    case element::i64:
+                        val_node =
+                            ngraph::opset6::Constant::create(dtype, {1}, {std::stoll(str_value)});
+                        break;
+                    case element::f32:
+                        val_node =
+                            ngraph::opset6::Constant::create(dtype, {1}, {std::stof(str_value)});
+                        break;
+                    case element::f64:
+                        val_node =
+                            ngraph::opset6::Constant::create(dtype, {1}, {std::stod(str_value)});
+                        break;
+                    default:
+                        throw std::runtime_error(
+                            "fill_constant_batch_size_like: dtype value is invalid");
+                    }
+
+                    return val_node;
+                }
+
+                NamedOutputs fill_constant_batch_size_like(const NodeContext& node)
+                {
+                    auto input_dim_idx = node.get_attribute<int32_t>("input_dim_idx");
+                    auto output_dim_idx = node.get_attribute<int32_t>("output_dim_idx");
+                    auto shapes = node.get_attribute<std::vector<int32_t>>("shape");
+                    auto input = node.get_ng_input("Input");
+                    auto input_shape =
+                        std::make_shared<ngraph::opset6::ShapeOf>(input, element::i32);
+                    // 1, cat the array:
+                    //   shape[0, shape[output_dim_idx]) + input_shape[input_dim_idx] +
+                    //   shape[shape[output_dim_idx + 1], -1]
+                    auto input_val_node = get_val(input_dim_idx, input_shape);
+                    auto shapes_node = ngraph::opset6::Constant::create(
+                        ngraph::element::i32, {shapes.size()}, shapes);
+                    auto shape_node = set_val(output_dim_idx, input_val_node, shapes_node);
+
+                    // 2, use the shape broadcast the node
+                    auto val_node = get_seed_node(node);
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Broadcast>(val_node, shape_node)},
+                        {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/flatten_contiguous_range.cpp b/ngraph/frontend/paddlepaddle/src/op/flatten_contiguous_range.cpp
new file mode 100644
index 00000000000..e5702fa1785
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/flatten_contiguous_range.cpp
@@ -0,0 +1,57 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/builder/reshape.hpp>
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs flatten_contiguous_range(const NodeContext& node)
+                {
+                    auto x_node = node.get_ng_input("X");
+                    auto shape_of_x = std::make_shared<opset6::ShapeOf>(x_node);
+                    int dims = x_node.get_partial_shape().rank().get_length();
+                    auto start_axis = node.get_attribute<int32_t>("start_axis");
+                    auto stop_axis = node.get_attribute<int32_t>("stop_axis");
+
+                    auto axis1_begin = opset6::Constant::create(element::i64, {1}, {0});
+                    auto axis1_end = opset6::Constant::create(element::i64, {1}, {start_axis});
+                    auto axis1 = std::make_shared<opset6::StridedSlice>(shape_of_x,
+                                                                        axis1_begin,
+                                                                        axis1_end,
+                                                                        std::vector<int64_t>{0},
+                                                                        std::vector<int64_t>{0});
+                    OutputVector axes{axis1,
+                                      opset6::Constant::create(element::i64, Shape{1}, {-1.0})};
+
+                    if (stop_axis < dims - 1)
+                    {
+                        auto axis2_begin =
+                            opset6::Constant::create(element::i64, {1}, {stop_axis + 1});
+                        auto axis2_end = opset6::Constant::create(element::i64, {1}, {dims});
+                        auto axis2_node =
+                            std::make_shared<opset6::StridedSlice>(shape_of_x,
+                                                                   axis2_begin,
+                                                                   axis2_end,
+                                                                   std::vector<int64_t>{0},
+                                                                   std::vector<int64_t>{0});
+                        axes.push_back(axis2_node);
+                    }
+
+                    auto new_shape_node = std::make_shared<opset6::Concat>(axes, 0);
+                    return node.default_single_output_mapping(
+                        {std::make_shared<opset6::Reshape>(x_node, new_shape_node, false)},
+                        {"Out"});
+                }
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/hard_sigmoid.cpp b/ngraph/frontend/paddlepaddle/src/op/hard_sigmoid.cpp
new file mode 100644
index 00000000000..cd52f83eda4
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/hard_sigmoid.cpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+#include <paddlepaddle_frontend/utility.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs hard_sigmoid(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    auto dtype = data.get_element_type();
+                    float slope = node.get_attribute<float>("slope", 0.2f);
+                    float offset = node.get_attribute<float>("offset", 0.5f);
+                    auto alpha = ngraph::opset6::Constant::create(dtype, Shape{}, {slope});
+                    auto beta = ngraph::opset6::Constant::create(dtype, Shape{}, {offset});
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::HardSigmoid>(data, alpha, beta)},
+                        {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/hard_swish.cpp b/ngraph/frontend/paddlepaddle/src/op/hard_swish.cpp
new file mode 100644
index 00000000000..b981dfbe402
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/hard_swish.cpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+#include <paddlepaddle_frontend/utility.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs hard_swish(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    if (node.has_attribute<float>("threshold"))
+                    {
+                        auto threshold = node.get_attribute<float>("threshold");
+                        PDPD_ASSERT(std::abs(threshold - 6.0) < 0.001,
+                                    "hard_swish: Only threshold = 6.0 is currently supported");
+                    }
+                    if (node.has_attribute<float>("scale"))
+                    {
+                        auto scale = node.get_attribute<float>("scale");
+                        PDPD_ASSERT(std::abs(scale - 6.0) < 0.001,
+                                    "hard_swish: Only scale = 6.0 is currently supported");
+                    }
+                    if (node.has_attribute<float>("offset"))
+                    {
+                        auto offset = node.get_attribute<float>("offset");
+                        PDPD_ASSERT(std::abs(offset - 3.0) < 0.001,
+                                    "hard_swish: Only offset = 3.0 is currently supported");
+                    }
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::HSwish>(data)}, {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/interp.cpp b/ngraph/frontend/paddlepaddle/src/op/interp.cpp
new file mode 100644
index 00000000000..6c0f3b9c680
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/interp.cpp
@@ -0,0 +1,182 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                std::shared_ptr<ngraph::Node>
+                    calculate_output_shape_based_on_scales(const Output<ngraph::Node>& data,
+                                                           const std::vector<float>& scale,
+                                                           Output<ngraph::Node>& scales)
+                {
+                    FRONT_END_GENERAL_CHECK(scale.size() > 0);
+                    if (scale.size() == 1)
+                        scales = opset6::Constant::create<float>(
+                            element::f32, Shape{4}, {1, 1, scale[0], scale[0]});
+                    else if (scale.size() == 2)
+                        scales = opset6::Constant::create<float>(
+                            element::f32, Shape{4}, {1, 1, scale[0], scale[1]});
+                    else if (scale.size() == 3)
+                        scales = opset6::Constant::create<float>(
+                            element::f32, Shape{4}, {1, scale[0], scale[1], scale[2]});
+                    else
+                        scales = opset6::Constant::create<float>(
+                            element::f32,
+                            Shape{scale.size()},
+                            std::vector<float>(scale.begin(), scale.end()));
+                    const auto shape_of_data = std::make_shared<opset6::Convert>(
+                        std::make_shared<opset6::ShapeOf>(data), scales.get_element_type());
+                    const auto multiply = std::make_shared<opset6::Multiply>(shape_of_data, scales);
+                    const auto output_shape =
+                        std::make_shared<opset6::Convert>(multiply, ngraph::element::i64);
+
+                    return output_shape;
+                }
+
+                std::shared_ptr<ngraph::Node>
+                    calculate_scales_based_on_sizes(const Output<ngraph::Node>& data,
+                                                    const Output<ngraph::Node>& sizes)
+                {
+                    const float epsilon = 1.0e-5;
+                    const auto shape_of_data = std::make_shared<opset6::Convert>(
+                        std::make_shared<opset6::ShapeOf>(data), ngraph::element::f32);
+                    const auto converted_sizes =
+                        std::make_shared<opset6::Convert>(sizes, ngraph::element::f32);
+                    const auto divide =
+                        std::make_shared<opset6::Divide>(converted_sizes, shape_of_data);
+                    const auto eps_node =
+                        std::make_shared<opset6::Constant>(ngraph::element::f32, Shape{}, epsilon);
+                    const auto scales = std::make_shared<opset6::Add>(divide, eps_node);
+
+                    return scales;
+                }
+
+                std::shared_ptr<ngraph::Node>
+                    extract_out_sizes(const Output<ngraph::Node>& data,
+                                      const std::vector<int64_t>& out_sizes)
+                {
+                    const auto shape_of_x = std::make_shared<opset6::ShapeOf>(data);
+                    auto shape_begin = opset6::Constant::create(element::i64, {1}, {0});
+                    auto shape_end = opset6::Constant::create(element::i64, Shape{1}, {2});
+                    auto nc_node = std::make_shared<opset6::StridedSlice>(shape_of_x,
+                                                                          shape_begin,
+                                                                          shape_end,
+                                                                          std::vector<int64_t>{0},
+                                                                          std::vector<int64_t>{0});
+                    auto hw_node =
+                        opset6::Constant::create<int64_t>(element::i64, Shape{2}, out_sizes);
+                    return std::make_shared<opset6::Concat>(OutputVector{nc_node, hw_node}, 0);
+                }
+
+                // TODO support different data_layout #55170
+
+                NamedOutputs interpolate(const NodeContext& node,
+                                         const ngraph::opset6::Interpolate::InterpolateMode& mode)
+                {
+                    auto x = node.get_ng_input("X");
+                    using InterpolateMode = ngraph::opset6::Interpolate::InterpolateMode;
+                    using CoordinateTransformMode =
+                        ngraph::opset6::Interpolate::CoordinateTransformMode;
+                    using Nearest_mode = ngraph::opset6::Interpolate::NearestMode;
+                    using InterpolateAttrs = ngraph::opset6::Interpolate::InterpolateAttrs;
+                    using ShapeCalcMode = ngraph::opset6::Interpolate::ShapeCalcMode;
+
+                    InterpolateAttrs attrs;
+
+                    attrs.mode = mode;
+
+                    auto out_w = node.get_attribute<int>("out_w");
+                    auto out_h = node.get_attribute<int>("out_h");
+                    auto scale = node.get_attribute<std::vector<float>>("scale");
+                    Output<Node> scales;
+                    Output<Node> target_spatial_shape;
+
+                    if (node.has_ng_input("OutSize"))
+                    {
+                        attrs.shape_calculation_mode = ShapeCalcMode::sizes;
+                        auto hw_shape = node.get_ng_input("OutSize");
+                        const auto shape_of_x = std::make_shared<opset6::ShapeOf>(x);
+                        auto shape_begin = opset6::Constant::create(element::i64, {1}, {0});
+                        auto shape_end = opset6::Constant::create(element::i64, Shape{1}, {2});
+                        auto nc_node =
+                            std::make_shared<opset6::StridedSlice>(shape_of_x,
+                                                                   shape_begin,
+                                                                   shape_end,
+                                                                   std::vector<int64_t>{0},
+                                                                   std::vector<int64_t>{0});
+                        target_spatial_shape = std::make_shared<opset6::Concat>(
+                            OutputVector{nc_node,
+                                         std::make_shared<opset6::Convert>(hw_shape, element::i64)},
+                            0);
+                        scales = calculate_scales_based_on_sizes(x, target_spatial_shape);
+                    }
+                    else if (out_w <= 0 || out_h <= 0)
+                    {
+                        attrs.shape_calculation_mode = ShapeCalcMode::scales;
+                        target_spatial_shape =
+                            calculate_output_shape_based_on_scales(x, scale, scales);
+                    }
+                    else
+                    {
+                        attrs.shape_calculation_mode = ShapeCalcMode::sizes;
+                        target_spatial_shape = extract_out_sizes(x, {out_h, out_w});
+                        scales = calculate_scales_based_on_sizes(x, target_spatial_shape);
+                    }
+
+                    bool align_corners = node.get_attribute<bool>("align_corners");
+                    int32_t align_mode = node.get_attribute<int32_t>("align_mode");
+
+                    if (mode == InterpolateMode::nearest)
+                    {
+                        attrs.coordinate_transformation_mode = CoordinateTransformMode::asymmetric;
+                    }
+                    else if (!align_corners && align_mode == 1)
+                    {
+                        attrs.coordinate_transformation_mode = CoordinateTransformMode::asymmetric;
+                    }
+                    else if (!align_corners && align_mode == 0)
+                    {
+                        attrs.coordinate_transformation_mode = CoordinateTransformMode::half_pixel;
+                    }
+                    else if (align_corners)
+                    {
+                        attrs.coordinate_transformation_mode =
+                            CoordinateTransformMode::align_corners;
+                    }
+
+                    attrs.nearest_mode = Nearest_mode::simple;
+                    attrs.antialias = false;
+                    attrs.pads_begin = {0, 0, 0, 0};
+                    attrs.pads_end = {0, 0, 0, 0};
+
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Interpolate>(
+                            x, target_spatial_shape, scales, attrs)},
+                        {"Out"});
+                }
+
+                NamedOutputs bilinear_interp_v2(const NodeContext& node)
+                {
+                    auto mode = ngraph::opset6::Interpolate::InterpolateMode::linear_onnx;
+                    return interpolate(node, mode);
+                }
+
+                NamedOutputs nearest_interp_v2(const NodeContext& node)
+                {
+                    auto mode = ngraph::opset6::Interpolate::InterpolateMode::nearest;
+                    return interpolate(node, mode);
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/leakyrelu.cpp b/ngraph/frontend/paddlepaddle/src/op/leakyrelu.cpp
new file mode 100644
index 00000000000..6d9daa35bd5
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/leakyrelu.cpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs leaky_relu(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    auto alpha = ngraph::opset6::Constant::create(
+                        ngraph::element::f32, {1}, {node.get_attribute<float>("alpha")});
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::PRelu>(data, alpha)}, {"Out"});
+                }
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/log.cpp b/ngraph/frontend/paddlepaddle/src/op/log.cpp
new file mode 100644
index 00000000000..69dd65586e4
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/log.cpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+#include <paddlepaddle_frontend/utility.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs log(const NodeContext& node)
+                {
+                    auto x = node.get_ng_input("X");
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Log>(x)}, {"Out"});
+                }
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/logical_not.cpp b/ngraph/frontend/paddlepaddle/src/op/logical_not.cpp
new file mode 100644
index 00000000000..8a07fd3bc84
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/logical_not.cpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs logical_not(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::LogicalNot>(data)}, {"Out"});
+                }
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/lstm.cpp b/ngraph/frontend/paddlepaddle/src/op/lstm.cpp
new file mode 100644
index 00000000000..3f7e71e1f01
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/lstm.cpp
@@ -0,0 +1,237 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+#include "ngraph/builder/reshape.hpp"
+#include "paddlepaddle_frontend/utility.hpp"
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                namespace
+                {
+                    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INPUT NODES PARSING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+                    enum class LSTMInput
+                    {
+                        LSTM_INPUT_X,
+                        LSTM_INPUT_W,
+                        LSTM_INPUT_R,
+                        LSTM_INPUT_B,
+                        LSTM_INPUT_SEQ_LENGTHS,
+                        LSTM_INPUT_INIT_H,
+                        LSTM_INPUT_INIT_C,
+                        LSTM_INPUT_P
+                    };
+
+                    struct LSTMNgInputMap
+                    {
+                        explicit LSTMNgInputMap(const NodeContext& node,
+                                                Output<Node>& prev_output,
+                                                int layer)
+                        {
+                            auto input_x = builder::opset1::reorder_axes(prev_output, {1, 0, 2});
+                            //[begin. end)
+                            auto weight_list = node.get_ng_inputs("WeightList");
+                            auto weight_begin = weight_list.begin();
+                            auto weight_end = std::next(weight_begin, weight_list.size() / 2);
+                            auto bias_begin = weight_end;
+                            int bidirect_len = node.get_attribute<bool>("is_bidirec") ? 4 : 2;
+                            int layer_weight_start = layer * bidirect_len;
+                            int layer_weight_end = bidirect_len + layer * bidirect_len;
+                            int layer_bias_start = layer * bidirect_len;
+                            int layer_bias_end = layer * bidirect_len + bidirect_len;
+                            OutputVector layer_input_weight;
+                            OutputVector layer_hidden_weight;
+                            OutputVector layer_weight_bias;
+                            OutputVector layer_hidden_bias;
+
+                            m_input_map[LSTMInput::LSTM_INPUT_X] = input_x;
+                            // Parsing W R B
+                            auto axis_const =
+                                std::make_shared<opset6::Constant>(element::i64, Shape{}, 0);
+                            for (int i = layer_weight_start; i < layer_weight_end; i++)
+                            {
+                                auto weight_node = std::next(weight_begin, i);
+                                if (i & 0x1)
+                                    layer_hidden_weight.push_back(
+                                        std::make_shared<opset6::Unsqueeze>(*weight_node,
+                                                                            axis_const));
+                                else
+                                    layer_input_weight.push_back(
+                                        std::make_shared<opset6::Unsqueeze>(*weight_node,
+                                                                            axis_const));
+                            }
+
+                            for (int i = layer_bias_start; i < layer_bias_end; i++)
+                            {
+                                auto weight_node = std::next(bias_begin, i);
+
+                                if (i & 0x1)
+                                    layer_hidden_bias.push_back(std::make_shared<opset6::Unsqueeze>(
+                                        *weight_node, axis_const));
+                                else
+                                    layer_weight_bias.push_back(std::make_shared<opset6::Unsqueeze>(
+                                        *weight_node, axis_const));
+                            }
+
+                            auto input_weight =
+                                std::make_shared<opset6::Concat>(layer_input_weight, 0);
+                            auto hidden_weight =
+                                std::make_shared<opset6::Concat>(layer_hidden_weight, 0);
+                            auto weight_bias =
+                                std::make_shared<opset6::Concat>(layer_weight_bias, 0);
+                            auto hidden_bias =
+                                std::make_shared<opset6::Concat>(layer_hidden_bias, 0);
+                            auto bias = std::make_shared<opset6::Add>(weight_bias, hidden_bias);
+                            m_input_map[LSTMInput::LSTM_INPUT_W] =
+                                ngraph::op::util::convert_lstm_node_format(
+                                    input_weight,
+                                    ngraph::op::util::LSTMWeightsFormat::IFCO,
+                                    ngraph::op::util::LSTMWeightsFormat::FICO,
+                                    1);
+                            m_input_map[LSTMInput::LSTM_INPUT_R] =
+                                ngraph::op::util::convert_lstm_node_format(
+                                    hidden_weight,
+                                    ngraph::op::util::LSTMWeightsFormat::IFCO,
+                                    ngraph::op::util::LSTMWeightsFormat::FICO,
+                                    1);
+                            m_input_map[LSTMInput::LSTM_INPUT_B] =
+                                ngraph::op::util::convert_lstm_node_format(
+                                    bias,
+                                    ngraph::op::util::LSTMWeightsFormat::IFCO,
+                                    ngraph::op::util::LSTMWeightsFormat::FICO,
+                                    1);
+
+                            // Get dimensions needed for default inputs creation
+                            // Parsing init hidden state
+                            auto shape_of_x = std::make_shared<opset6::ShapeOf>(input_x);
+
+                            auto axes = opset6::Constant::create(element::i64, Shape{1}, {0});
+
+                            auto batch_size_node = std::make_shared<opset6::Gather>(
+                                shape_of_x,
+                                opset6::Constant::create(element::i64, Shape{1}, {0}),
+                                axes);
+
+                            auto seq_length_node = std::make_shared<opset6::Gather>(
+                                shape_of_x,
+                                opset6::Constant::create(element::i64, Shape{1}, {1}),
+                                axes);
+
+                            // TODO Specify SEQ_LEN for each batch #55404
+                            m_input_map[LSTMInput::LSTM_INPUT_SEQ_LENGTHS] =
+                                std::make_shared<opset6::Broadcast>(seq_length_node,
+                                                                    batch_size_node);
+
+                            auto init_states = node.get_ng_inputs("PreState");
+                            // 0 for init_h, 1 for init_cell, update bidirect_len for init states
+                            bidirect_len = node.get_attribute<bool>("is_bidirec") ? 2 : 1;
+
+                            auto h_begin =
+                                opset6::Constant::create(element::i64, {1}, {layer * bidirect_len});
+                            auto h_end = opset6::Constant::create(
+                                element::i64, Shape{1}, {layer * bidirect_len + bidirect_len});
+                            auto c_begin =
+                                opset6::Constant::create(element::i64, {1}, {layer * bidirect_len});
+                            auto c_end = opset6::Constant::create(
+                                element::i64, {1}, {layer * bidirect_len + bidirect_len});
+
+                            m_input_map[LSTMInput::LSTM_INPUT_INIT_H] =
+                                builder::opset1::reorder_axes(
+                                    std::make_shared<opset6::StridedSlice>(init_states[0],
+                                                                           h_begin,
+                                                                           h_end,
+                                                                           std::vector<int64_t>{0},
+                                                                           std::vector<int64_t>{0}),
+                                    {1, 0, 2});
+                            m_input_map[LSTMInput::LSTM_INPUT_INIT_C] =
+                                builder::opset1::reorder_axes(
+                                    std::make_shared<opset6::StridedSlice>(init_states[1],
+                                                                           c_begin,
+                                                                           c_end,
+                                                                           std::vector<int64_t>{0},
+                                                                           std::vector<int64_t>{0}),
+                                    {1, 0, 2});
+                        }
+
+                        Output<ngraph::Node>& at(const LSTMInput& key)
+                        {
+                            return m_input_map.at(key);
+                        }
+
+                        std::map<LSTMInput, Output<ngraph::Node>> m_input_map;
+                    };
+
+                    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ATTRIBUTES PARSING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+                    struct LSTMAttributes
+                    {
+                        explicit LSTMAttributes(const NodeContext& node)
+                            : m_direction(
+                                  node.get_attribute<bool>("is_bidirec")
+                                      ? ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL
+                                      : ngraph::op::RecurrentSequenceDirection::FORWARD)
+                            , m_hidden_size(node.get_attribute<int32_t>("hidden_size"))
+                            , m_layers(node.get_attribute<int32_t>("num_layers"))
+
+                                  {};
+
+                        ngraph::op::RecurrentSequenceDirection m_direction;
+                        int32_t m_hidden_size;
+                        int32_t m_layers;
+                    };
+                } // namespace
+                NamedOutputs lstm(const NodeContext& node)
+                {
+                    auto mode = node.get_attribute<std::string>("mode");
+                    PDPD_ASSERT(mode == "LSTM", "RNN only support LSTM now");
+                    auto prev_inputs = node.get_ng_inputs("Input");
+                    Output<Node> prev_output = prev_inputs[0];
+                    LSTMAttributes attrs(node);
+                    OutputVector final_h;
+                    OutputVector final_c;
+                    auto axis_const = std::make_shared<opset6::Constant>(element::i64, Shape{}, 0);
+                    for (int i = 0; i < attrs.m_layers; i++)
+                    {
+                        LSTMNgInputMap input_map(node, prev_output, i);
+                        auto lstm_sequence = std::make_shared<opset6::LSTMSequence>(
+                            input_map.at(LSTMInput::LSTM_INPUT_X),
+                            input_map.at(LSTMInput::LSTM_INPUT_INIT_H),
+                            input_map.at(LSTMInput::LSTM_INPUT_INIT_C),
+                            input_map.at(LSTMInput::LSTM_INPUT_SEQ_LENGTHS),
+                            input_map.at(LSTMInput::LSTM_INPUT_W),
+                            input_map.at(LSTMInput::LSTM_INPUT_R),
+                            input_map.at(LSTMInput::LSTM_INPUT_B),
+                            attrs.m_hidden_size,
+                            attrs.m_direction);
+                        prev_output =
+                            builder::opset1::reorder_axes(lstm_sequence->output(0), {2, 0, 1, 3});
+                        auto out_shape =
+                            opset6::Constant::create(element::i64, Shape{3}, {0, 0, -1});
+                        prev_output =
+                            std::make_shared<opset6::Reshape>(prev_output, out_shape, true);
+
+                        final_h.push_back(
+                            builder::opset1::reorder_axes(lstm_sequence->output(1), {1, 0, 2}));
+                        final_c.push_back(
+                            builder::opset1::reorder_axes(lstm_sequence->output(2), {1, 0, 2}));
+                    }
+
+                    NamedOutputs named_outputs;
+                    named_outputs["Out"] = {prev_output};
+                    named_outputs["State"] = {std::make_shared<opset6::Concat>(final_h, 0),
+                                              std::make_shared<opset6::Concat>(final_c, 0)};
+                    return named_outputs;
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/matmul.cpp b/ngraph/frontend/paddlepaddle/src/op/matmul.cpp
new file mode 100644
index 00000000000..a29acd6a00e
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/matmul.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs matmul(const NodeContext& node)
+                {
+                    auto x = node.get_ng_input("X");
+                    auto y = node.get_ng_input("Y");
+                    auto alpha = node.get_attribute<float>("alpha", 1);
+                    auto transpose_a = node.get_attribute<bool>("transpose_X", false);
+                    auto transpose_b = node.get_attribute<bool>("transpose_Y", false);
+                    auto mm =
+                        std::make_shared<ngraph::opset6::MatMul>(x, y, transpose_a, transpose_b);
+                    if (alpha == 1)
+                    {
+                        return node.default_single_output_mapping({mm}, {"Out"});
+                    }
+                    else
+                    {
+                        auto alpha_node =
+                            ngraph::opset6::Constant::create(ngraph::element::f32, {1}, {alpha});
+                        return node.default_single_output_mapping(
+                            {std::make_shared<ngraph::opset6::Multiply>(mm, alpha_node)}, {"Out"});
+                    }
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/matrix_nms.cpp b/ngraph/frontend/paddlepaddle/src/op/matrix_nms.cpp
new file mode 100644
index 00000000000..74d280c8d0a
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/matrix_nms.cpp
@@ -0,0 +1,100 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <ngraph/opsets/opset8.hpp>
+#include <node_context.hpp>
+#include <paddlepaddle_frontend/utility.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs matrix_nms(const NodeContext& node)
+                {
+                    using namespace ngraph;
+                    using namespace opset8;
+                    using namespace element;
+
+                    auto bboxes = node.get_ng_input("BBoxes");
+                    auto scores = node.get_ng_input("Scores");
+
+                    auto score_threshold = node.get_attribute<float>("score_threshold");
+                    auto post_threshold = node.get_attribute<float>("post_threshold");
+                    auto nms_top_k = node.get_attribute<int>("nms_top_k");
+                    auto keep_top_k = node.get_attribute<int>("keep_top_k");
+                    auto background_class = node.get_attribute<int>("background_label");
+
+                    auto gaussian_sigma = node.get_attribute<float>("gaussian_sigma");
+                    auto use_gaussian = node.get_attribute<bool>("use_gaussian");
+                    auto decay_function = MatrixNms::DecayFunction::LINEAR;
+                    if (use_gaussian)
+                    {
+                        decay_function = MatrixNms::DecayFunction::GAUSSIAN;
+                    }
+
+                    auto out_names = node.get_output_names();
+                    PDPD_ASSERT(out_names.size() == 3 || out_names.size() == 2,
+                                "Unexpected number of outputs of MatrixNMS: " +
+                                    std::to_string(out_names.size()));
+
+                    element::Type type_num = i32;
+                    bool return_rois_num = true;
+                    auto it = std::find(out_names.begin(), out_names.end(), "RoisNum");
+                    if (it != out_names.end())
+                    {
+                        type_num = node.get_out_port_type("RoisNum");
+                    }
+                    else
+                    {
+                        return_rois_num = false;
+                    }
+
+                    auto type_index = node.get_out_port_type("Index");
+                    PDPD_ASSERT((type_index == i32 || type_index == i64) &&
+                                    (type_num == i32 || type_num == i64),
+                                "Unexpected data type of outputs of MatrixNMS");
+
+                    auto normalized = node.get_attribute<bool>("normalized");
+
+                    NamedOutputs named_outputs;
+                    std::vector<Output<Node>> nms_outputs;
+                    MatrixNms::Attributes attrs;
+                    attrs.nms_top_k = nms_top_k;
+                    attrs.post_threshold = post_threshold;
+                    attrs.score_threshold = score_threshold;
+                    attrs.sort_result_type = MatrixNms::SortResultType::SCORE;
+                    attrs.keep_top_k = keep_top_k;
+                    attrs.background_class = background_class;
+                    attrs.normalized = normalized;
+                    attrs.output_type = type_index;
+                    attrs.sort_result_across_batch = false;
+                    attrs.decay_function = decay_function;
+                    attrs.gaussian_sigma = gaussian_sigma;
+
+                    nms_outputs = std::make_shared<MatrixNms>(bboxes, scores, attrs)->outputs();
+
+                    named_outputs["Out"] = {nms_outputs[0]};
+                    named_outputs["Index"] = {nms_outputs[1]};
+                    if (return_rois_num)
+                    {
+                        named_outputs["RoisNum"] = {nms_outputs[2]};
+
+                        if (type_num != type_index)
+                        {
+                            // adapter
+                            auto node_convert = std::make_shared<Convert>(nms_outputs[2], type_num);
+                            named_outputs["RoisNum"] = {node_convert};
+                        }
+                    }
+
+                    return named_outputs;
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/mul.cpp b/ngraph/frontend/paddlepaddle/src/op/mul.cpp
new file mode 100644
index 00000000000..130fd148084
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/mul.cpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs mul(const NodeContext& node)
+                {
+                    auto x = node.get_ng_input("X");
+                    auto y = node.get_ng_input("Y");
+                    PDPD_OP_VALIDATION_CHECK(node,
+                                             x.get_partial_shape().rank().is_static(),
+                                             "matmul: X rank must be static!");
+                    int64_t x_rank = x.get_partial_shape().rank().get_length();
+                    PDPD_OP_VALIDATION_CHECK(node,
+                                             y.get_partial_shape().rank().is_static() &&
+                                                 y.get_partial_shape().rank().get_length() == 2,
+                                             "matmul: Y rank must be static, and 2!");
+                    if (x_rank > 2)
+                    {
+                        auto shape = std::make_shared<ngraph::opset6::ShapeOf>(x);
+                        int64_t x_num_col_dims = node.get_attribute<int32_t>("x_num_col_dims");
+                        auto axis = ngraph::opset6::Constant::create(ngraph::element::i64, {}, {0});
+                        auto split_lengths = ngraph::opset6::Constant::create(
+                            ngraph::element::i64, {2}, {x_num_col_dims, x_rank - x_num_col_dims});
+                        auto split = std::make_shared<ngraph::opset6::VariadicSplit>(
+                            shape, axis, split_lengths);
+                        auto f_dim_red_axis =
+                            ngraph::opset6::Constant::create(ngraph::element::i64, {}, {0});
+                        auto first_dim_reduce = std::make_shared<ngraph::opset6::ReduceProd>(
+                            split->output(0), f_dim_red_axis);
+                        auto f_dim_shape =
+                            ngraph::opset6::Constant::create(ngraph::element::i64, {1}, {1});
+                        auto first_dim = std::make_shared<ngraph::opset6::Reshape>(
+                            first_dim_reduce, f_dim_shape, false);
+                        auto s_dim_red_axis =
+                            ngraph::opset6::Constant::create(ngraph::element::i64, {}, {0});
+                        auto second_dim_reduce = std::make_shared<ngraph::opset6::ReduceProd>(
+                            split->output(1), s_dim_red_axis);
+                        auto s_dim_shape =
+                            ngraph::opset6::Constant::create(ngraph::element::i64, {1}, {1});
+                        auto second_dim = std::make_shared<ngraph::opset6::Reshape>(
+                            second_dim_reduce, s_dim_shape, false);
+                        auto out_shape = std::make_shared<ngraph::opset6::Concat>(
+                            ngraph::NodeVector{first_dim, second_dim}, 0);
+                        auto x_reshaped =
+                            std::make_shared<ngraph::opset6::Reshape>(x, out_shape, false);
+                        return node.default_single_output_mapping(
+                            {std::make_shared<ngraph::opset6::MatMul>(x_reshaped, y)}, {"Out"});
+                    }
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::MatMul>(x, y)}, {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/multiclass_nms.cpp b/ngraph/frontend/paddlepaddle/src/op/multiclass_nms.cpp
new file mode 100644
index 00000000000..d81d7046f8e
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/multiclass_nms.cpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <ngraph/opsets/opset8.hpp>
+#include <node_context.hpp>
+#include <paddlepaddle_frontend/utility.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs multiclass_nms(const NodeContext& node)
+                {
+                    using namespace ngraph;
+                    using namespace opset8;
+                    using namespace element;
+
+                    auto bboxes = node.get_ng_input("BBoxes");
+                    auto scores = node.get_ng_input("Scores");
+
+                    auto score_threshold = node.get_attribute<float>("score_threshold");
+                    auto iou_threshold = node.get_attribute<float>("nms_threshold");
+                    auto nms_top_k = node.get_attribute<int>("nms_top_k");
+                    auto keep_top_k = node.get_attribute<int>("keep_top_k");
+                    auto background_class = node.get_attribute<int>("background_label");
+                    auto nms_eta = node.get_attribute<float>("nms_eta");
+
+                    auto out_names = node.get_output_names();
+                    PDPD_ASSERT(out_names.size() == 3,
+                                "Unexpected number of outputs of MulticlassNMS");
+
+                    auto type_index = node.get_out_port_type("Index");
+                    auto type_num = node.get_out_port_type("NmsRoisNum");
+                    PDPD_ASSERT((type_index == i32 || type_index == i64) &&
+                                    (type_num == i32 || type_num == i64),
+                                "Unexpected data type of outputs of MulticlassNMS: " +
+                                    std::to_string(out_names.size()));
+
+                    auto normalized = node.get_attribute<bool>("normalized");
+
+                    NamedOutputs named_outputs;
+                    std::vector<Output<Node>> nms_outputs;
+                    MulticlassNms::Attributes attrs;
+                    attrs.nms_top_k = nms_top_k;
+                    attrs.iou_threshold = iou_threshold;
+                    attrs.score_threshold = score_threshold;
+                    attrs.sort_result_type = MulticlassNms::SortResultType::CLASSID;
+                    attrs.keep_top_k = keep_top_k;
+                    attrs.background_class = background_class;
+                    attrs.nms_eta = nms_eta;
+                    attrs.normalized = normalized;
+                    attrs.output_type = type_index;
+                    attrs.sort_result_across_batch = false;
+
+                    nms_outputs = std::make_shared<MulticlassNms>(bboxes, scores, attrs)->outputs();
+
+                    named_outputs["Out"] = {nms_outputs[0]};
+                    named_outputs["Index"] = {nms_outputs[1]};
+                    named_outputs["NmsRoisNum"] = {nms_outputs[2]};
+
+                    if (type_num != type_index)
+                    {
+                        // adapter
+                        auto node_convert = std::make_shared<Convert>(nms_outputs[2], type_num);
+                        named_outputs["NmsRoisNum"] = {node_convert};
+                    }
+
+                    return named_outputs;
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/pad3d.cpp b/ngraph/frontend/paddlepaddle/src/op/pad3d.cpp
new file mode 100644
index 00000000000..8b508d04263
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/pad3d.cpp
@@ -0,0 +1,117 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs pad3d(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    auto mode = node.get_attribute<std::string>("mode");
+                    auto value = node.get_attribute<float>("value", 0.0);
+                    auto data_format = node.get_attribute<std::string>("data_format");
+
+                    auto paddings = std::vector<int32_t>(6, 0);
+
+                    // padding of type int feature only supported by PaddlePaddle 'develop'
+                    // version(>=2.1.0)
+                    if (node.has_attribute<std::vector<int32_t>>("paddings"))
+                    {
+                        auto paddings_vector = node.get_attribute<std::vector<int32_t>>("paddings");
+                        PDPD_OP_VALIDATION_CHECK(node,
+                                                 paddings_vector.size() == 6,
+                                                 "paddings Params size should be 6 in pad3d!");
+                        paddings = paddings_vector;
+                    }
+                    else if (node.has_attribute<int32_t>("paddings"))
+                    {
+                        auto padding_int = node.get_attribute<int32_t>("paddings");
+                        for (int i = 0; i < 6; i++)
+                            paddings[i] = padding_int;
+                    }
+                    else
+                    {
+                        throw ngraph::ngraph_error("Unsupported paddings attribute!");
+                    }
+
+                    auto pads_begin = std::vector<int32_t>(5, 0);
+                    auto pads_end = std::vector<int32_t>(5, 0);
+
+                    Output<ngraph::Node> values;
+                    Output<ngraph::Node> padding_begin;
+                    Output<ngraph::Node> padding_end;
+
+                    ngraph::op::PadMode pad_mode;
+                    // TODO Support Circular mode in #55704
+                    if (mode == "constant")
+                    {
+                        pad_mode = ngraph::op::PadMode::CONSTANT;
+                        values = ngraph::opset6::Constant::create(
+                            element::f32, ngraph::Shape{}, {value});
+                    }
+                    else if (mode == "reflect")
+                    {
+                        pad_mode = ngraph::op::PadMode::REFLECT;
+                    }
+                    else if (mode == "replicate")
+                    {
+                        pad_mode = ngraph::op::PadMode::EDGE;
+                    }
+                    else
+                    {
+                        throw ngraph::ngraph_error("Unsupported 3d paddings mode: [" + mode + "]");
+                    }
+
+                    if (data_format == "NCDHW")
+                    {
+                        pads_begin[4] = paddings[0]; // left
+                        pads_end[4] = paddings[1];   // right
+                        pads_begin[3] = paddings[2]; // top
+                        pads_end[3] = paddings[3];   // down
+                        pads_begin[2] = paddings[4]; // front
+                        pads_end[2] = paddings[5];   // back
+                    }
+                    else if (data_format == "NDHWC")
+                    {
+                        pads_begin[3] = paddings[0]; // left
+                        pads_end[3] = paddings[1];   // right
+                        pads_begin[2] = paddings[2]; // top
+                        pads_end[2] = paddings[3];   // down
+                        pads_begin[1] = paddings[4]; // front
+                        pads_end[1] = paddings[5];   // back
+                    }
+                    else
+                    {
+                        throw ngraph::ngraph_error("Unsupported 3d paddings data_format: [" +
+                                                   data_format + "]");
+                    }
+
+                    padding_begin = ngraph::opset6::Constant::create(
+                        element::i32, ngraph::Shape{pads_begin.size()}, pads_begin);
+                    padding_end = ngraph::opset6::Constant::create(
+                        element::i32, ngraph::Shape{pads_end.size()}, pads_end);
+
+                    if (mode == "constant")
+                        return node.default_single_output_mapping(
+                            {std::make_shared<ngraph::opset6::Pad>(
+                                data, padding_begin, padding_end, values, pad_mode)},
+                            {"Out"});
+                    else
+                        return node.default_single_output_mapping(
+                            {std::make_shared<ngraph::opset6::Pad>(
+                                data, padding_begin, padding_end, pad_mode)},
+                            {"Out"});
+                }
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/pool2d.cpp b/ngraph/frontend/paddlepaddle/src/op/pool2d.cpp
new file mode 100644
index 00000000000..65a7c3576b9
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/pool2d.cpp
@@ -0,0 +1,249 @@
+//*****************************************************************************
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+//*****************************************************************************
+
+#include <ngraph/opsets/opset6.hpp>
+#include <ngraph/opsets/opset8.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                // helper func - get pad_begin and pad_end
+                static void get_paddings(const NodeContext& node,
+                                         ngraph::Shape& pad_begin,
+                                         ngraph::Shape& pad_end,
+                                         ngraph::op::PadType& auto_pad)
+                {
+                    if (node.has_attribute<std::string>("padding_algorithm"))
+                    {
+                        auto pad_algo = node.get_attribute<std::string>("padding_algorithm");
+                        if (pad_algo == "SAME")
+                        {
+                            auto_pad = ngraph::op::PadType::SAME_UPPER;
+                        }
+                        else if (pad_algo == "VALID")
+                        {
+                            auto_pad = ngraph::op::PadType::VALID;
+                        }
+                        else if (pad_algo == "EXPLICIT")
+                        {
+                            auto_pad = ngraph::op::PadType::EXPLICIT;
+                        }
+                        else
+                        {
+                            throw std::runtime_error("Unsupported pooling padding_algorithm " +
+                                                     pad_algo);
+                        }
+                    }
+                    else
+                    {
+                        // adaptive_maxpool with no such attr.
+                        auto_pad = ngraph::op::PadType::EXPLICIT;
+                    }
+
+                    /*If pool padding size is a tuple or list, it could be in three forms:
+                    [pad_height, pad_width] or [pad_height_top, pad_height_bottom, pad_width_left,
+                    pad_width_right], and when data_format is “NCHW”, pool_padding can be in the
+                    form [[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left,
+                    pad_width_right]]. when data_format is “NHWC”, pool_padding can be in the form
+                    [[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right],
+                    [0,0]]. Otherwise, the pool padding size will be a square of an int.*/
+                    auto paddings = node.get_attribute<std::vector<int32_t>>("paddings");
+
+                    // Default is empty for 'adaptive max pooling'
+                    auto data_format = node.get_attribute<std::string>("data_format", {});
+
+                    // TODO: need to support NHWC input #55483
+                    switch (paddings.size())
+                    {
+                    case 2:
+                        pad_begin = Shape{static_cast<uint64_t>(paddings[0]),
+                                          static_cast<uint64_t>(paddings[1])};
+                        pad_end = pad_begin;
+                        break;
+                    case 4:
+                        pad_begin = Shape{static_cast<uint64_t>(paddings[0]),
+                                          static_cast<uint64_t>(paddings[2])};
+                        pad_end = Shape{static_cast<uint64_t>(paddings[1]),
+                                        static_cast<uint64_t>(paddings[3])};
+                        break;
+                    default:
+                        throw std::runtime_error("Unsupported pooling paddings " +
+                                                 std::to_string(paddings.size()));
+                    }
+                }
+
+                NamedOutputs pool2d(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+
+                    auto pooling_type = node.get_attribute<std::string>("pooling_type", {});
+                    auto global_pooling = node.get_attribute<bool>("global_pooling");
+                    auto adaptive = node.get_attribute<bool>("adaptive");
+                    auto kernel_shape = node.get_attribute<std::vector<int32_t>>("ksize");
+
+                    auto rounding_type = node.get_attribute<bool>("ceil_mode", false)
+                                             ? ngraph::op::RoundingType::CEIL
+                                             : ngraph::op::RoundingType::FLOOR;
+
+                    if (pooling_type.empty())
+                    {
+                        pooling_type = "max";
+                    }
+
+                    PDPD_ASSERT((pooling_type == "max") || (pooling_type == "avg"),
+                                "pool2d: not supported pooling type !");
+                    PDPD_ASSERT(kernel_shape.size() == 1 || kernel_shape.size() == 2,
+                                "pool2d: ksize must be 1 or 2!");
+
+                    PartialShape input_shape = data.get_partial_shape();
+
+                    int32_t input_rank = input_shape.rank().get_length();
+                    PDPD_ASSERT(input_rank >= 2, "input tensor rank must be greater than 2");
+
+                    auto auto_pad = ngraph::op::PadType::EXPLICIT;
+                    ngraph::Shape pad_begin, pad_end;
+                    get_paddings(node, pad_begin, pad_end, auto_pad);
+
+                    if (global_pooling ||
+                        (adaptive && std::any_of(kernel_shape.begin(),
+                                                 kernel_shape.end(),
+                                                 [](int32_t i) { return i == 1; })))
+                    {
+                        if (pooling_type == "max")
+                        {
+                            auto axes = ngraph::opset6::Constant::create(
+                                ngraph::element::i64, {2}, {input_rank - 2, input_rank - 1});
+                            return node.default_single_output_mapping(
+                                {std::make_shared<ngraph::opset6::ReduceMax>(data, axes, true)},
+                                {"Out"});
+                        }
+                        else
+                        {
+                            auto axes = ngraph::opset6::Constant::create(
+                                ngraph::element::i64, {2}, {input_rank - 2, input_rank - 1});
+                            return node.default_single_output_mapping(
+                                {std::make_shared<ngraph::opset6::ReduceMean>(data, axes, true)},
+                                {"Out"});
+                        }
+                    }
+                    else if (adaptive)
+                    {
+                        PDPD_ASSERT(input_shape[2].is_static() && input_shape[3].is_static(),
+                                    "pool2d: spatial dim must be static when using adaptive pool");
+                        auto pool_size = std::vector<int64_t>(2, 0);
+
+                        if (kernel_shape.size() == 1)
+                        {
+                            // Not tested: implemented according to spec, but can't generate real
+                            // model to test
+                            pool_size[0] = pool_size[1] = kernel_shape[0];
+                        }
+                        else
+                        {
+                            pool_size[0] = kernel_shape[0];
+                            pool_size[1] = kernel_shape[1];
+                        }
+
+                        const Output<ngraph::Node> output_shape = ngraph::opset6::Constant::create(
+                            ngraph::element::i64, {pool_size.size()}, pool_size);
+
+                        if (pooling_type == "max")
+                        {
+                            std::vector<Output<Node>> pool_outputs;
+                            pool_outputs = std::make_shared<ngraph::opset8::AdaptiveMaxPool>(
+                                               data, output_shape, ngraph::element::i32)
+                                               ->outputs();
+                            NamedOutputs outputs;
+                            outputs["Out"] = {pool_outputs[0]};
+                            outputs["Mask"] = {pool_outputs[1]};
+                            return outputs;
+                        }
+                        else
+                        {
+                            return node.default_single_output_mapping(
+                                {std::make_shared<ngraph::opset8::AdaptiveAvgPool>(data,
+                                                                                   output_shape)},
+                                {"Out"});
+                        }
+                    }
+                    else
+                    {
+                        auto strides = node.get_attribute<std::vector<int32_t>>("strides");
+                        auto paddings = node.get_attribute<std::vector<int32_t>>("paddings");
+
+                        uint64_t kernel_h, kernel_w;
+                        if (kernel_shape.size() == 1)
+                        {
+                            // Not tested: implemented according to spec, but can't generate real
+                            // model to test
+                            kernel_h = kernel_w = kernel_shape[0];
+                        }
+                        else
+                        {
+                            kernel_h = kernel_shape[0];
+                            kernel_w = kernel_shape[1];
+                        }
+
+                        PDPD_ASSERT(kernel_h > 0 && kernel_w > 0,
+                                    "pool2d kernel shape must be greater than 0");
+
+                        // Note: this shape check is only valid when the spatial dim of input_shape
+                        // is static.
+                        if (input_shape[2].is_static() && input_shape[3].is_static())
+                        {
+                            uint64_t input_h = input_shape[input_rank - 2].get_length();
+                            uint64_t input_w = input_shape[input_rank - 1].get_length();
+                            if ((input_h > 0) && (input_h + pad_begin[0] + pad_end[0] < kernel_h))
+                            {
+                                kernel_h = input_h + pad_begin[0] + pad_end[0];
+                            }
+                            if ((input_w > 0) && (input_w + pad_begin[1] + pad_end[1] < kernel_w))
+                            {
+                                kernel_w = input_w + pad_begin[1] + pad_end[1];
+                            }
+                        }
+
+                        if (pooling_type == "max")
+                        {
+                            return node.default_single_output_mapping(
+                                {std::make_shared<ngraph::opset6::MaxPool>(
+                                    data,
+                                    ngraph::Strides(strides.begin(), strides.end()),
+                                    pad_begin,
+                                    pad_end,
+                                    ngraph::Shape{kernel_h, kernel_w},
+                                    rounding_type,
+                                    auto_pad)},
+                                {"Out"});
+                        }
+                        else
+                        {
+                            bool exclude_pad = node.get_attribute<bool>("exclusive", false);
+                            return node.default_single_output_mapping(
+                                {std::make_shared<ngraph::opset6::AvgPool>(
+                                    data,
+                                    ngraph::Strides(strides.begin(), strides.end()),
+                                    pad_begin,
+                                    pad_end,
+                                    ngraph::Shape{kernel_h, kernel_w},
+                                    exclude_pad,
+                                    rounding_type,
+                                    auto_pad)},
+                                {"Out"});
+                        }
+                    }
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/pow.cpp b/ngraph/frontend/paddlepaddle/src/op/pow.cpp
new file mode 100644
index 00000000000..2bab9dc6043
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/pow.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+#include <paddlepaddle_frontend/utility.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs pow(const NodeContext& node)
+                {
+                    auto x = node.get_ng_input("X");
+                    auto dtype = x.get_element_type();
+                    Output<Node> factor_node;
+                    if (node.has_ng_input("FactorTensor"))
+                    {
+                        factor_node = node.get_ng_input("FactorTensor");
+                        if (factor_node.get_element_type() != dtype)
+                            factor_node = std::make_shared<opset6::Convert>(factor_node, dtype);
+                    }
+                    else
+                    {
+                        factor_node = ngraph::opset6::Constant::create(
+                            dtype, Shape{1}, {node.get_attribute<float>("factor")});
+                    }
+
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Power>(x, factor_node)}, {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/range.cpp b/ngraph/frontend/paddlepaddle/src/op/range.cpp
new file mode 100644
index 00000000000..c445c650942
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/range.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs range(const NodeContext& node)
+                {
+                    auto start = node.get_ng_input("Start");
+                    auto stop = node.get_ng_input("End");
+                    auto step = node.get_ng_input("Step");
+                    auto type = node.get_out_port_type("Out");
+
+                    const auto axis = ngraph::opset6::Constant::create(element::i64, Shape{}, {0});
+                    auto start_scalar = std::make_shared<ngraph::opset6::Squeeze>(start, axis);
+                    auto stop_scalar = std::make_shared<ngraph::opset6::Squeeze>(stop, axis);
+                    auto step_scalar = std::make_shared<ngraph::opset6::Squeeze>(step, axis);
+
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Range>(
+                            start_scalar, stop_scalar, step_scalar, type)},
+                        {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/relu.cpp b/ngraph/frontend/paddlepaddle/src/op/relu.cpp
index 68d1cca3203..8bef2b42690 100644
--- a/ngraph/frontend/paddlepaddle/src/op/relu.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op/relu.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "relu.hpp"
 #include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
 
 namespace ngraph
 {
diff --git a/ngraph/frontend/paddlepaddle/src/op/relu.hpp b/ngraph/frontend/paddlepaddle/src/op/relu.hpp
deleted file mode 100644
index 7a63e7f89d8..00000000000
--- a/ngraph/frontend/paddlepaddle/src/op/relu.hpp
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include "node_context.hpp"
-
-namespace ngraph
-{
-    namespace frontend
-    {
-        namespace pdpd
-        {
-            namespace op
-            {
-                NamedOutputs relu(const NodeContext& node);
-
-            }
-        } // namespace pdpd
-    }     // namespace frontend
-} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/relu6.cpp b/ngraph/frontend/paddlepaddle/src/op/relu6.cpp
new file mode 100644
index 00000000000..8c17505494b
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/relu6.cpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+#include <paddlepaddle_frontend/utility.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs relu6(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    auto threshold = node.get_attribute<float>("threshold", 6.0f);
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Clamp>(data, 0.0, threshold)}, {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/reshape2.cpp b/ngraph/frontend/paddlepaddle/src/op/reshape2.cpp
new file mode 100644
index 00000000000..b39943cece2
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/reshape2.cpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+#include <paddlepaddle_frontend/utility.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs reshape2(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    if (!node.has_ng_input("Shape") && !node.has_ng_input("ShapeTensor"))
+                    {
+                        auto shape_attr = node.get_attribute<std::vector<int32_t>>("shape");
+                        auto shape_node = ngraph::opset6::Constant::create(
+                            ngraph::element::i32, {shape_attr.size()}, shape_attr);
+                        return node.default_single_output_mapping(
+                            {std::make_shared<ngraph::opset6::Reshape>(data, shape_node, true)},
+                            {"Out"});
+                    }
+                    else
+                    {
+                        std::string name = "Shape";
+                        if (node.has_ng_input("ShapeTensor"))
+                        {
+                            name = "ShapeTensor";
+                        }
+
+                        auto nodes = node.get_ng_inputs(name);
+                        ngraph::NodeVector node_vec;
+                        for (auto& input_node : nodes)
+                        {
+                            auto cast =
+                                std::make_shared<ngraph::opset6::Convert>(input_node, element::i64);
+                            node_vec.push_back(cast);
+                        }
+
+                        auto shape_node = std::make_shared<ngraph::opset6::Concat>(node_vec, 0);
+                        return node.default_single_output_mapping(
+                            {std::make_shared<ngraph::opset6::Reshape>(data, shape_node, true)},
+                            {"Out"});
+                    }
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/rnn.cpp b/ngraph/frontend/paddlepaddle/src/op/rnn.cpp
new file mode 100644
index 00000000000..49beafee38d
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/rnn.cpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+#include "paddlepaddle_frontend/utility.hpp"
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs lstm(const NodeContext& node);
+                NamedOutputs rnn(const NodeContext& node)
+                {
+                    auto mode = node.get_attribute<std::string>("mode");
+                    PDPD_ASSERT(mode == "LSTM",
+                                "[Paddle Frontend]RNN Only Supports LSTM Ops Conversion now, don't "
+                                "support " +
+                                    mode);
+                    return lstm(node);
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/scale.cpp b/ngraph/frontend/paddlepaddle/src/op/scale.cpp
index bfc7637dd87..be5cafbaaed 100644
--- a/ngraph/frontend/paddlepaddle/src/op/scale.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op/scale.cpp
@@ -2,9 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "scale.hpp"
 #include <ngraph/builder/make_constant.hpp>
 #include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
 
 namespace ngraph
 {
diff --git a/ngraph/frontend/paddlepaddle/src/op/scale.hpp b/ngraph/frontend/paddlepaddle/src/op/scale.hpp
deleted file mode 100644
index 03c1b151c0c..00000000000
--- a/ngraph/frontend/paddlepaddle/src/op/scale.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include "node_context.hpp"
-
-namespace ngraph
-{
-    namespace frontend
-    {
-        namespace pdpd
-        {
-            namespace op
-            {
-                NamedOutputs scale(const NodeContext& node);
-            }
-        } // namespace pdpd
-    }     // namespace frontend
-} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/shape.cpp b/ngraph/frontend/paddlepaddle/src/op/shape.cpp
new file mode 100644
index 00000000000..fe1c2bd72d3
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/shape.cpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs shape(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("Input");
+                    auto shape_node = std::make_shared<ngraph::opset6::ShapeOf>(data, element::i32);
+                    return node.default_single_output_mapping({shape_node}, {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/sigmoid.cpp b/ngraph/frontend/paddlepaddle/src/op/sigmoid.cpp
new file mode 100644
index 00000000000..2f9b8edb48e
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/sigmoid.cpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs sigmoid(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Sigmoid>(data)}, {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/slice.cpp b/ngraph/frontend/paddlepaddle/src/op/slice.cpp
new file mode 100644
index 00000000000..1a245f73849
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/slice.cpp
@@ -0,0 +1,82 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <limits.h>
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs slice(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("Input");
+                    auto axes = node.get_attribute<std::vector<int32_t>>("axes");
+                    Output<Node> start_idx_node, end_idx_node;
+                    if (node.has_ng_input("StartsTensor"))
+                    {
+                        start_idx_node = node.get_ng_input("StartsTensor");
+                    }
+                    else if (node.has_ng_input("StartsTensorList"))
+                    {
+                        auto inputs = node.get_ng_inputs("StartsTensorList");
+                        start_idx_node = std::make_shared<ngraph::opset6::Concat>(inputs, 0);
+                    }
+                    else
+                    {
+                        auto starts = node.get_attribute<std::vector<int32_t>>("starts");
+                        start_idx_node =
+                            opset6::Constant::create(element::i32, {starts.size()}, starts);
+                    }
+
+                    if (node.has_ng_input("EndsTensor"))
+                    {
+                        end_idx_node = node.get_ng_input("EndsTensor");
+                    }
+                    else if (node.has_ng_input("EndsTensorList"))
+                    {
+                        auto inputs = node.get_ng_inputs("EndsTensorList");
+                        end_idx_node = std::make_shared<ngraph::opset6::Concat>(inputs, 0);
+                    }
+                    else
+                    {
+                        auto ends = node.get_attribute<std::vector<int32_t>>("ends");
+                        end_idx_node = opset6::Constant::create(element::i32, {ends.size()}, ends);
+                    }
+
+                    // the shape of input, such as [1, 1, 3, 3]
+                    auto shape_node = std::make_shared<opset6::ShapeOf>(data, element::Type_t::i32);
+                    // the input dim, such as [4]
+                    auto shape_shape_node =
+                        std::make_shared<opset6::ShapeOf>(shape_node, element::i32);
+                    auto const_0_node = opset6::Constant::create(element::i32, {}, {0});
+                    auto const_max_node = opset6::Constant::create(element::i32, {}, {INT_MAX});
+                    // array [0:max)
+                    auto start_node =
+                        std::make_shared<opset6::Broadcast>(const_0_node, shape_shape_node);
+                    auto end_node =
+                        std::make_shared<opset6::Broadcast>(const_max_node, shape_shape_node);
+                    auto axes_node = opset6::Constant::create(element::i32, {axes.size(), 1}, axes);
+                    auto fixed_start_node = std::make_shared<opset6::ScatterNDUpdate>(
+                        start_node, axes_node, start_idx_node);
+                    auto fixed_end_node = std::make_shared<opset6::ScatterNDUpdate>(
+                        end_node, axes_node, end_idx_node);
+
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::StridedSlice>(data,
+                                                                        fixed_start_node,
+                                                                        fixed_end_node,
+                                                                        std::vector<int64_t>{},
+                                                                        std::vector<int64_t>{})},
+                        {"Out"});
+                }
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/softmax.cpp b/ngraph/frontend/paddlepaddle/src/op/softmax.cpp
new file mode 100644
index 00000000000..5be1423a197
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/softmax.cpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs softmax(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    auto axis = node.get_attribute<int32_t>("axis");
+                    if (axis < 0)
+                    {
+                        PDPD_OP_VALIDATION_CHECK(node,
+                                                 data.get_partial_shape().rank().is_static(),
+                                                 "Softmax rank must be static");
+                        auto data_rank = data.get_partial_shape().rank().get_length();
+                        axis = data_rank + axis;
+                    }
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Softmax>(data, axis)}, {"Out"});
+                }
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/split.cpp b/ngraph/frontend/paddlepaddle/src/op/split.cpp
index b1ead907c66..13038624682 100644
--- a/ngraph/frontend/paddlepaddle/src/op/split.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op/split.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "split.hpp"
 #include <ngraph/opsets/opset7.hpp>
+#include <node_context.hpp>
 #include <paddlepaddle_frontend/utility.hpp>
 
 namespace ngraph
@@ -24,7 +24,7 @@ namespace ngraph
                     {
                         auto input = node.get_ng_input("AxisTensor");
                         auto zero_node = Constant::create(element::i32, {1}, {0});
-                        axis = std::make_shared<Reshape>(input, zero_node, false);
+                        axis = std::make_shared<ReduceMin>(input, zero_node, false);
                     }
                     else
                     {
diff --git a/ngraph/frontend/paddlepaddle/src/op/split.hpp b/ngraph/frontend/paddlepaddle/src/op/split.hpp
deleted file mode 100644
index 3ae3a40018f..00000000000
--- a/ngraph/frontend/paddlepaddle/src/op/split.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#pragma once
-#include "node_context.hpp"
-
-namespace ngraph
-{
-    namespace frontend
-    {
-        namespace pdpd
-        {
-            namespace op
-            {
-                NamedOutputs split(const NodeContext& node);
-
-            } // namespace op
-        }     // namespace pdpd
-    }         // namespace frontend
-} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/squeeze.cpp b/ngraph/frontend/paddlepaddle/src/op/squeeze.cpp
new file mode 100644
index 00000000000..a79910af009
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/squeeze.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs squeeze(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    std::vector<int32_t> axes;
+                    if (node.has_attribute<std::vector<int32_t>>("axes"))
+                    {
+                        axes = node.get_attribute<std::vector<int32_t>>("axes");
+                    }
+
+                    std::shared_ptr<Node> out;
+                    if (!axes.empty())
+                    {
+                        auto axesNode = ngraph::opset6::Constant::create(
+                            ngraph::element::i32, {axes.size()}, axes);
+                        out = std::make_shared<ngraph::opset6::Squeeze>(data, axesNode);
+                    }
+                    else
+                    {
+                        out = std::make_shared<ngraph::opset6::Squeeze>(data);
+                    }
+                    return node.default_single_output_mapping(out, {"Out"});
+                }
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/op/transpose2.cpp b/ngraph/frontend/paddlepaddle/src/op/transpose2.cpp
new file mode 100644
index 00000000000..1e7c1da8a50
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/transpose2.cpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs transpose2(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    auto perm = node.get_attribute<std::vector<int>>("axis");
+                    auto input_order =
+                        ngraph::opset6::Constant::create(ngraph::element::i64, {perm.size()}, perm);
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Transpose>(data, input_order)}, {"Out"});
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/unsqueeze.cpp b/ngraph/frontend/paddlepaddle/src/op/unsqueeze.cpp
new file mode 100644
index 00000000000..265fe80025b
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/unsqueeze.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                NamedOutputs unsqueeze(const NodeContext& node)
+                {
+                    auto data = node.get_ng_input("X");
+                    Output<Node> axesNode;
+                    if (node.has_ng_input("AxesTensor"))
+                    {
+                        axesNode = node.get_ng_input("AxesTensor");
+                    }
+                    else if (node.has_ng_input("AxesTensorList"))
+                    {
+                        auto inputs = node.get_ng_inputs("AxesTensorList");
+                        axesNode = std::make_shared<ngraph::opset6::Concat>(inputs, 0);
+                    }
+                    else
+                    {
+                        auto axes = node.get_attribute<std::vector<int32_t>>("axes");
+                        axesNode = ngraph::opset6::Constant::create(
+                            ngraph::element::i32, {axes.size()}, axes);
+                    }
+                    return node.default_single_output_mapping(
+                        {std::make_shared<ngraph::opset6::Unsqueeze>(data, axesNode)}, {"Out"});
+                }
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op/yolo_box.cpp b/ngraph/frontend/paddlepaddle/src/op/yolo_box.cpp
new file mode 100644
index 00000000000..ccbc3f61341
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/op/yolo_box.cpp
@@ -0,0 +1,433 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <limits> // std::numeric_limits
+#include <numeric>
+
+#include <ngraph/opsets/opset6.hpp>
+#include <node_context.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+                using namespace opset6;
+                using namespace element;
+
+                // reference
+                // Paddle/python/paddle/fluid/tests/unittests/test_yolo_box_op.py
+                // Paddle/paddle/fluid/operators/detection/yolo_box_op.h
+                // Paddle2ONNX/paddle2onnx/op_mapper/detection/yolo_box.py - clip_bbox is not used
+                // by Paddle2ONNX.
+                NamedOutputs yolo_box(const NodeContext& node_context)
+                {
+                    auto data = node_context.get_ng_input("X");
+                    auto image_size = node_context.get_ng_input("ImgSize");
+
+                    // get shape of X
+                    auto input_shape = std::make_shared<ShapeOf>(data, i64);
+                    auto indices_batchsize = Constant::create<int32_t>(i64, {1}, {0});
+                    auto indices_height = Constant::create<int32_t>(i64, {1}, {2});
+                    auto indices_width = Constant::create<int64_t>(i64, {1}, {3});
+                    auto const_axis0 = Constant::create<int64_t>(i64, {1}, {0});
+                    auto input_height =
+                        std::make_shared<Gather>(input_shape, indices_height, const_axis0); // H
+                    auto input_width =
+                        std::make_shared<Gather>(input_shape, indices_width, const_axis0); // W
+                    auto batch_size =
+                        std::make_shared<Gather>(input_shape, indices_batchsize, const_axis0); // N
+
+                    int32_t class_num = node_context.get_attribute<int32_t>("class_num");
+                    auto const_class_num = Constant::create<int64_t>(i64, {1}, {class_num});
+
+                    // PDPD anchors attribute is of type int32. Convert to float for computing
+                    // convinient.
+                    auto _anchors = node_context.get_attribute<std::vector<int32_t>>("anchors");
+                    std::vector<float> anchors(_anchors.begin(), _anchors.end());
+                    uint32_t num_anchors = anchors.size() / 2;
+                    auto const_num_anchors = Constant::create<int64_t>(i64, {1}, {num_anchors});
+
+                    auto default_scale = 1.0f;
+                    auto scale_x_y = node_context.get_attribute<float>("scale_x_y", default_scale);
+
+                    auto downsample_ratio = node_context.get_attribute<int32_t>("downsample_ratio");
+                    auto const_downsample_ratio =
+                        Constant::create<int64_t>(i64, {1}, {downsample_ratio});
+                    auto scaled_input_height =
+                        std::make_shared<Multiply>(input_height, const_downsample_ratio);
+                    auto scaled_input_width =
+                        std::make_shared<Multiply>(input_width, const_downsample_ratio);
+
+                    // score_shape {batch_size, input_height * input_width * num_anchors, class_num}
+                    auto node_mul_whc = std::make_shared<Multiply>(input_height, input_width);
+                    node_mul_whc = std::make_shared<Multiply>(node_mul_whc, const_num_anchors);
+                    auto score_shape = std::make_shared<Concat>(
+                        NodeVector{batch_size, node_mul_whc, const_class_num}, 0);
+
+                    auto conf_thresh = node_context.get_attribute<float>("conf_thresh");
+                    auto const_conf_thresh = Constant::create<float>(f32, {1}, {conf_thresh});
+
+                    auto clip_bbox = node_context.get_attribute<bool>("clip_bbox");
+
+                    // main X
+                    // node_x_shape {batch_size, num_anchors, 5 + class_num, input_height,
+                    // input_width}
+                    auto const_class_num_plus5 =
+                        Constant::create<int64_t>(i64, {1}, {5 + class_num});
+                    auto node_x_shape = std::make_shared<Concat>(NodeVector{batch_size,
+                                                                            const_num_anchors,
+                                                                            const_class_num_plus5,
+                                                                            input_height,
+                                                                            input_width},
+                                                                 0);
+
+                    auto node_x_reshape = std::make_shared<Reshape>(data, node_x_shape, false);
+
+                    auto node_input_order = Constant::create<int64_t>(i64, {5}, {0, 1, 3, 4, 2});
+                    auto node_x_transpose =
+                        std::make_shared<Transpose>(node_x_reshape, node_input_order);
+
+                    //  range x/y
+                    //  range_x: shape {1, input_width} containing 0...input_width
+                    //  range_y: shape {input_height, 1} containing 0...input_height
+                    auto const_start = Constant::create<float>(f32, {}, {0.f});
+                    auto const_step = Constant::create<float>(f32, {}, {1.f});
+                    auto reduction_axes = Constant::create<int64_t>(i64, {1}, {0});
+
+                    auto scaler_input_width =
+                        std::make_shared<ReduceMin>(input_width, reduction_axes, false);
+                    auto range_x =
+                        std::make_shared<Range>(const_start, scaler_input_width, const_step, f32);
+                    auto node_range_x = std::make_shared<Unsqueeze>(
+                        range_x, Constant::create<int64_t>(i64, {1}, {0}));
+
+                    auto scaler_input_height =
+                        std::make_shared<ReduceMin>(input_height, reduction_axes, false);
+                    auto range_y =
+                        std::make_shared<Range>(const_start, scaler_input_height, const_step, f32);
+                    auto node_range_y = std::make_shared<Unsqueeze>(
+                        range_y, Constant::create<int64_t>(i64, {1}, {1}));
+
+                    auto node_range_x_shape = std::make_shared<Concat>(
+                        NodeVector{Constant::create<int64_t>(i64, {1}, {1}), input_width}, 0);
+                    auto node_range_y_shape = std::make_shared<Concat>(
+                        NodeVector{input_height, Constant::create<int64_t>(i64, {1}, {1})}, 0);
+
+                    auto node_grid_x =
+                        std::make_shared<Tile>(node_range_x, node_range_y_shape); // shape (H, W)
+                    auto node_grid_y = std::make_shared<Tile>(node_range_y, node_range_x_shape);
+
+                    // main X (part2)
+                    auto node_split_axis = Constant::create<int64_t>(i64, {1}, {-1});
+                    auto node_split_lengths =
+                        Constant::create<int64_t>(i64, {6}, {1, 1, 1, 1, 1, class_num});
+                    auto node_split_input = std::make_shared<VariadicSplit>(
+                        node_x_transpose, node_split_axis, node_split_lengths);
+
+                    auto node_box_x =
+                        node_split_input->output(0); // shape (batch_size, num_anchors, H, W, 1)
+                    auto node_box_y = node_split_input->output(1);
+                    auto node_box_w = node_split_input->output(2);
+                    auto node_box_h = node_split_input->output(3);
+                    auto node_conf = node_split_input->output(4);
+                    auto node_prob = node_split_input->output(5);
+
+                    // x/y
+                    std::shared_ptr<ngraph::Node> node_box_x_sigmoid =
+                        std::make_shared<Sigmoid>(node_box_x);
+                    std::shared_ptr<ngraph::Node> node_box_y_sigmoid =
+                        std::make_shared<Sigmoid>(node_box_y);
+
+                    if (std::fabs(scale_x_y - default_scale) > 1e-6)
+                    { // float not-equal
+                        float bias_x_y = -0.5 * (scale_x_y - 1.0);
+
+                        auto scale_x_y_node = Constant::create<float>(f32, {1}, {scale_x_y});
+                        auto bias_x_y_node = Constant::create<float>(f32, {1}, {bias_x_y});
+
+                        node_box_x_sigmoid =
+                            std::make_shared<Multiply>(node_box_x_sigmoid, scale_x_y_node);
+                        node_box_x_sigmoid =
+                            std::make_shared<Add>(node_box_x_sigmoid, bias_x_y_node);
+
+                        node_box_y_sigmoid =
+                            std::make_shared<Multiply>(node_box_y_sigmoid, scale_x_y_node);
+                        node_box_y_sigmoid =
+                            std::make_shared<Add>(node_box_y_sigmoid, bias_x_y_node);
+                    }
+
+                    auto squeeze_box_x = Constant::create<int64_t>(i64, {1}, {4});
+                    auto node_box_x_squeeze =
+                        std::make_shared<Squeeze>(node_box_x_sigmoid, squeeze_box_x);
+
+                    auto squeeze_box_y = Constant::create<int64_t>(i64, {1}, {4});
+                    auto node_box_y_squeeze =
+                        std::make_shared<Squeeze>(node_box_y_sigmoid, squeeze_box_y);
+
+                    auto node_box_x_add_grid =
+                        std::make_shared<Add>(node_grid_x, node_box_x_squeeze);
+                    auto node_box_y_add_grid =
+                        std::make_shared<Add>(node_grid_y, node_box_y_squeeze);
+
+                    auto node_input_h = std::make_shared<Convert>(input_height, element::f32);
+                    auto node_input_w = std::make_shared<Convert>(input_width, element::f32);
+
+                    auto node_box_x_encode =
+                        std::make_shared<Divide>(node_box_x_add_grid, node_input_w);
+                    auto node_box_y_encode =
+                        std::make_shared<Divide>(node_box_y_add_grid, node_input_h);
+
+                    // w/h
+                    auto node_anchor_tensor =
+                        Constant::create<float>(f32, {num_anchors, 2}, anchors);
+                    auto split_axis = Constant::create<int64_t>(i64, {}, {1});
+                    auto node_anchor_split =
+                        std::make_shared<Split>(node_anchor_tensor, split_axis, 2);
+
+                    auto node_anchor_w_origin = node_anchor_split->output(0);
+                    auto node_anchor_h_origin = node_anchor_split->output(1);
+
+                    auto float_input_height =
+                        std::make_shared<Convert>(scaled_input_height, element::f32);
+                    auto node_anchor_h =
+                        std::make_shared<Divide>(node_anchor_h_origin, float_input_height);
+                    auto float_input_width =
+                        std::make_shared<Convert>(scaled_input_width, element::f32);
+                    auto node_anchor_w =
+                        std::make_shared<Divide>(node_anchor_w_origin, float_input_width);
+
+                    auto node_new_anchor_shape =
+                        Constant::create<int64_t>(i64, {4}, {1, num_anchors, 1, 1});
+                    auto node_anchor_w_reshape =
+                        std::make_shared<Reshape>(node_anchor_w, node_new_anchor_shape, false);
+                    auto node_anchor_h_reshape =
+                        std::make_shared<Reshape>(node_anchor_h, node_new_anchor_shape, false);
+
+                    auto squeeze_box_wh = Constant::create<int64_t>(i64, {1}, {4});
+                    auto node_box_w_squeeze = std::make_shared<Squeeze>(node_box_w, squeeze_box_wh);
+                    auto node_box_h_squeeze = std::make_shared<Squeeze>(node_box_h, squeeze_box_wh);
+
+                    auto node_box_w_exp = std::make_shared<Exp>(node_box_w_squeeze);
+                    auto node_box_h_exp = std::make_shared<Exp>(node_box_h_squeeze);
+
+                    auto node_box_w_encode =
+                        std::make_shared<Multiply>(node_box_w_exp, node_anchor_w_reshape);
+                    auto node_box_h_encode =
+                        std::make_shared<Multiply>(node_box_h_exp, node_anchor_h_reshape);
+
+                    // confidence
+                    auto node_conf_sigmoid = std::make_shared<Sigmoid>(node_conf);
+
+                    auto node_concat = std::make_shared<Concat>(
+                        NodeVector{Constant::create<int64_t>(i64, {1}, {1}),
+                                   const_num_anchors,
+                                   input_height,
+                                   input_width,
+                                   Constant::create<int64_t>(i64, {1}, {1})},
+                        0);
+                    auto node_conf_thresh = std::make_shared<Broadcast>(
+                        const_conf_thresh,
+                        node_concat); // {1, num_anchors, input_height, input_width, 1}
+
+                    auto node_conf_sub =
+                        std::make_shared<Subtract>(node_conf_sigmoid, node_conf_thresh);
+
+                    auto node_conf_clip = std::make_shared<Clamp>(
+                        node_conf_sub, 0.0f, std::numeric_limits<float>::max());
+
+                    auto node_zeros = Constant::create<float>(f32, {1}, {0});
+                    auto node_conf_clip_bool =
+                        std::make_shared<Greater>(node_conf_clip, node_zeros);
+
+                    auto node_conf_clip_cast = std::make_shared<Convert>(node_conf_clip_bool, f32);
+
+                    auto node_conf_set_zero =
+                        std::make_shared<Multiply>(node_conf_sigmoid, node_conf_clip_cast);
+
+                    /* probability */
+                    auto node_prob_sigmoid = std::make_shared<Sigmoid>(node_prob);
+
+                    auto node_new_shape = std::make_shared<Concat>(
+                        NodeVector{batch_size,
+                                   const_num_anchors,
+                                   input_height,
+                                   input_width,
+                                   Constant::create<int64_t>(i64, {1}, {1})},
+                        0);
+                    auto node_conf_new_shape = std::make_shared<Reshape>(
+                        node_conf_set_zero,
+                        node_new_shape,
+                        false); // {batch_size, int(num_anchors), input_height, input_width, 1}
+
+                    // broadcast confidence * probability of each category
+                    auto node_score =
+                        std::make_shared<Multiply>(node_prob_sigmoid, node_conf_new_shape);
+
+                    // for bbox which has object (greater than threshold)
+                    auto node_conf_bool =
+                        std::make_shared<Greater>(node_conf_new_shape, node_zeros);
+
+                    auto node_box_x_new_shape =
+                        std::make_shared<Reshape>(node_box_x_encode, node_new_shape, false);
+                    auto node_box_y_new_shape =
+                        std::make_shared<Reshape>(node_box_y_encode, node_new_shape, false);
+                    auto node_box_w_new_shape =
+                        std::make_shared<Reshape>(node_box_w_encode, node_new_shape, false);
+                    auto node_box_h_new_shape =
+                        std::make_shared<Reshape>(node_box_h_encode, node_new_shape, false);
+                    auto node_pred_box =
+                        std::make_shared<Concat>(OutputVector{node_box_x_new_shape,
+                                                              node_box_y_new_shape,
+                                                              node_box_w_new_shape,
+                                                              node_box_h_new_shape},
+                                                 4);
+
+                    auto node_conf_cast = std::make_shared<Convert>(node_conf_bool, f32);
+
+                    auto node_pred_box_mul_conf =
+                        std::make_shared<Multiply>(node_pred_box, node_conf_cast);
+
+                    auto node_box_shape = std::make_shared<Concat>(
+                        NodeVector{
+                            batch_size, node_mul_whc, Constant::create<int64_t>(i64, {1}, {4})},
+                        0);
+                    auto node_pred_box_new_shape = std::make_shared<Reshape>(
+                        node_pred_box_mul_conf,
+                        node_box_shape,
+                        false); // {batch_size, int(num_anchors) * input_height * input_width, 4}
+
+                    auto pred_box_split_axis = Constant::create<int32_t>(i64, {}, {2});
+                    auto node_pred_box_split =
+                        std::make_shared<Split>(node_pred_box_new_shape, pred_box_split_axis, 4);
+
+                    auto node_pred_box_x = node_pred_box_split->output(0);
+                    auto node_pred_box_y = node_pred_box_split->output(1);
+                    auto node_pred_box_w = node_pred_box_split->output(2);
+                    auto node_pred_box_h = node_pred_box_split->output(3);
+
+                    /* x,y,w,h -> x1,y1,x2,y2 */
+                    auto node_number_two = Constant::create<float>(f32, {1}, {2.0f});
+                    auto node_half_w = std::make_shared<Divide>(node_pred_box_w, node_number_two);
+                    auto node_half_h = std::make_shared<Divide>(node_pred_box_h, node_number_two);
+
+                    auto node_pred_box_x1 =
+                        std::make_shared<Subtract>(node_pred_box_x, node_half_w);
+                    auto node_pred_box_y1 =
+                        std::make_shared<Subtract>(node_pred_box_y, node_half_h);
+
+                    auto node_pred_box_x2 = std::make_shared<Add>(node_pred_box_x, node_half_w);
+                    auto node_pred_box_y2 = std::make_shared<Add>(node_pred_box_y, node_half_h);
+
+                    /* map normalized coords to original image */
+                    auto indices_height_imgsize = Constant::create<int32_t>(i64, {1}, {0});
+                    auto indices_width_imgsize = Constant::create<int64_t>(i64, {1}, {1});
+                    auto const_axis1 = Constant::create<int64_t>(i64, {1}, {1});
+                    auto node_img_height = std::make_shared<Gather>(
+                        image_size, indices_height_imgsize, const_axis1); // shape_image_size[0]
+                    auto node_img_width = std::make_shared<Gather>(
+                        image_size, indices_width_imgsize, const_axis1); // shape_image_size[1]
+
+                    auto node_img_width_cast = std::make_shared<Convert>(node_img_width, f32);
+                    auto node_img_height_cast = std::make_shared<Convert>(node_img_height, f32);
+
+                    auto squeeze_axes2 = Constant::create<int64_t>(i64, {1}, {2});
+                    auto node_pred_box_x1_reshape = std::make_shared<Squeeze>(
+                        node_pred_box_x1,
+                        squeeze_axes2); // shape (N,C,1) -> (N,C) for upcomping multiply.
+                    auto node_pred_box_y1_reshape =
+                        std::make_shared<Squeeze>(node_pred_box_y1, squeeze_axes2);
+                    auto node_pred_box_x2_reshape =
+                        std::make_shared<Squeeze>(node_pred_box_x2, squeeze_axes2);
+                    auto node_pred_box_y2_reshape =
+                        std::make_shared<Squeeze>(node_pred_box_y2, squeeze_axes2);
+
+                    auto node_pred_box_x1_squeeze =
+                        std::make_shared<Multiply>(node_pred_box_x1_reshape, node_img_width_cast);
+                    auto node_pred_box_y1_squeeze =
+                        std::make_shared<Multiply>(node_pred_box_y1_reshape, node_img_height_cast);
+                    auto node_pred_box_x2_squeeze =
+                        std::make_shared<Multiply>(node_pred_box_x2_reshape, node_img_width_cast);
+                    auto node_pred_box_y2_squeeze =
+                        std::make_shared<Multiply>(node_pred_box_y2_reshape, node_img_height_cast);
+
+                    std::shared_ptr<ngraph::Node> node_pred_box_result;
+                    if (clip_bbox)
+                    {
+                        auto node_number_one = Constant::create<float>(f32, {1}, {1.0});
+                        auto node_new_img_height =
+                            std::make_shared<Subtract>(node_img_height_cast, node_number_one);
+                        auto node_new_img_width =
+                            std::make_shared<Subtract>(node_img_width_cast, node_number_one);
+                        auto node_pred_box_x2_sub_w = std::make_shared<Subtract>(
+                            node_pred_box_x2_squeeze, node_new_img_width); // x2 - (w-1)
+                        auto node_pred_box_y2_sub_h = std::make_shared<Subtract>(
+                            node_pred_box_y2_squeeze, node_new_img_height); // y2 - (h-1)
+
+                        auto max_const = std::numeric_limits<float>::max();
+                        auto node_pred_box_x1_clip =
+                            std::make_shared<Clamp>(node_pred_box_x1_squeeze, 0.0f, max_const);
+                        auto node_pred_box_y1_clip =
+                            std::make_shared<Clamp>(node_pred_box_y1_squeeze, 0.0f, max_const);
+                        auto node_pred_box_x2_clip =
+                            std::make_shared<Clamp>(node_pred_box_x2_sub_w, 0.0f, max_const);
+                        auto node_pred_box_y2_clip =
+                            std::make_shared<Clamp>(node_pred_box_y2_sub_h, 0.0f, max_const);
+
+                        auto node_pred_box_x2_res = std::make_shared<Subtract>(
+                            node_pred_box_x2_squeeze, node_pred_box_x2_clip);
+                        auto node_pred_box_y2_res = std::make_shared<Subtract>(
+                            node_pred_box_y2_squeeze, node_pred_box_y2_clip);
+
+                        auto node_pred_box_x1_clip2 = std::make_shared<Unsqueeze>(
+                            node_pred_box_x1_clip, squeeze_axes2); // reshape back to (N,C,1)
+                        auto node_pred_box_y1_clip2 =
+                            std::make_shared<Unsqueeze>(node_pred_box_y1_clip, squeeze_axes2);
+                        auto node_pred_box_x2_res2 =
+                            std::make_shared<Unsqueeze>(node_pred_box_x2_res, squeeze_axes2);
+                        auto node_pred_box_y2_res2 =
+                            std::make_shared<Unsqueeze>(node_pred_box_y2_res, squeeze_axes2);
+
+                        node_pred_box_result =
+                            std::make_shared<Concat>(OutputVector{node_pred_box_x1_clip2,
+                                                                  node_pred_box_y1_clip2,
+                                                                  node_pred_box_x2_res2,
+                                                                  node_pred_box_y2_res2},
+                                                     -1); // outputs=node.output('Boxes')
+                    }
+                    else
+                    {
+                        auto node_pred_box_x1_decode = std::make_shared<Unsqueeze>(
+                            node_pred_box_x1_squeeze, squeeze_axes2); // reshape back to (N,C,1)
+                        auto node_pred_box_y1_decode =
+                            std::make_shared<Unsqueeze>(node_pred_box_y1_squeeze, squeeze_axes2);
+                        auto node_pred_box_x2_decode =
+                            std::make_shared<Unsqueeze>(node_pred_box_x2_squeeze, squeeze_axes2);
+                        auto node_pred_box_y2_decode =
+                            std::make_shared<Unsqueeze>(node_pred_box_y2_squeeze, squeeze_axes2);
+
+                        node_pred_box_result =
+                            std::make_shared<Concat>(OutputVector{node_pred_box_x1_decode,
+                                                                  node_pred_box_y1_decode,
+                                                                  node_pred_box_x2_decode,
+                                                                  node_pred_box_y2_decode},
+                                                     -1); // outputs=node.output('Boxes')
+                    }
+
+                    //
+                    auto node_score_new_shape = std::make_shared<Reshape>(
+                        node_score, score_shape, false); // outputs=node.output('Scores')
+
+                    NamedOutputs outputs;
+                    outputs["Boxes"] = {node_pred_box_result};
+                    outputs["Scores"] = {node_score_new_shape};
+                    return outputs;
+                }
+
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/op_table.cpp b/ngraph/frontend/paddlepaddle/src/op_table.cpp
index 916737fc0c2..668dc3af942 100644
--- a/ngraph/frontend/paddlepaddle/src/op_table.cpp
+++ b/ngraph/frontend/paddlepaddle/src/op_table.cpp
@@ -1,20 +1,74 @@
 // Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-#include "op/argmax.hpp"
-#include "op/assign_value.hpp"
-#include "op/batch_norm.hpp"
-#include "op/cast.hpp"
-#include "op/clip.hpp"
-#include "op/concat.hpp"
-#include "op/conv2d.hpp"
-#include "op/elementwise_ops.hpp"
-#include "op/relu.hpp"
-#include "op/scale.hpp"
-#include "op/split.hpp"
-
 #include "op_table.hpp"
 
+namespace ngraph
+{
+    namespace frontend
+    {
+        namespace pdpd
+        {
+            namespace op
+            {
+#define OP_CONVERTER(op) NamedOutputs op(const NodeContext& node)
+                OP_CONVERTER(argmax);
+                OP_CONVERTER(assign_value);
+                OP_CONVERTER(batch_norm);
+                OP_CONVERTER(bilinear_interp_v2);
+                OP_CONVERTER(cast);
+                OP_CONVERTER(clip);
+                OP_CONVERTER(concat);
+                OP_CONVERTER(conv2d);
+                OP_CONVERTER(conv2d_transpose);
+                OP_CONVERTER(deformable_conv);
+                OP_CONVERTER(dropout);
+                OP_CONVERTER(elementwise_add);
+                OP_CONVERTER(elementwise_div);
+                OP_CONVERTER(elementwise_equal);
+                OP_CONVERTER(elementwise_greater_equal);
+                OP_CONVERTER(elementwise_max);
+                OP_CONVERTER(elementwise_min);
+                OP_CONVERTER(elementwise_mul);
+                OP_CONVERTER(elementwise_pow);
+                OP_CONVERTER(elementwise_sub);
+                OP_CONVERTER(expand_v2);
+                OP_CONVERTER(fill_constant_batch_size_like);
+                OP_CONVERTER(fill_constant);
+                OP_CONVERTER(flatten_contiguous_range);
+                OP_CONVERTER(hard_sigmoid);
+                OP_CONVERTER(hard_swish);
+                OP_CONVERTER(leaky_relu);
+                OP_CONVERTER(log);
+                OP_CONVERTER(logical_not);
+                OP_CONVERTER(matmul);
+                OP_CONVERTER(mul);
+                OP_CONVERTER(matrix_nms);
+                OP_CONVERTER(multiclass_nms);
+                OP_CONVERTER(nearest_interp_v2);
+                OP_CONVERTER(pad3d);
+                OP_CONVERTER(pow);
+                OP_CONVERTER(pool2d);
+                OP_CONVERTER(range);
+                OP_CONVERTER(relu);
+                OP_CONVERTER(relu6);
+                OP_CONVERTER(reshape2);
+                OP_CONVERTER(rnn);
+                OP_CONVERTER(scale);
+                OP_CONVERTER(shape);
+                OP_CONVERTER(slice);
+                OP_CONVERTER(softmax);
+                OP_CONVERTER(sigmoid);
+                OP_CONVERTER(split);
+                OP_CONVERTER(squeeze);
+                OP_CONVERTER(transpose2);
+                OP_CONVERTER(unsqueeze);
+                OP_CONVERTER(yolo_box);
+            } // namespace op
+        }     // namespace pdpd
+    }         // namespace frontend
+} // namespace ngraph
+
 namespace ngraph
 {
     namespace frontend
@@ -26,10 +80,19 @@ namespace ngraph
                 return {{"arg_max", op::argmax},
                         {"assign_value", op::assign_value},
                         {"batch_norm", op::batch_norm},
+                        {"bilinear_interp_v2", op::bilinear_interp_v2},
+                        {"bilinear_interp", op::bilinear_interp_v2},
+                        {"bmm", op::matmul},
                         {"cast", op::cast},
                         {"clip", op::clip},
                         {"concat", op::concat},
                         {"conv2d", op::conv2d},
+                        {"conv2d_transpose", op::conv2d_transpose},
+                        {"deformable_conv", op::deformable_conv},
+                        {"deformable_conv_v1", op::deformable_conv},
+                        {"depthwise_conv2d", op::conv2d},
+                        {"depthwise_conv2d_transpose", op::conv2d_transpose},
+                        {"dropout", op::dropout},
                         {"elementwise_add", op::elementwise_add},
                         {"elementwise_div", op::elementwise_div},
                         {"elementwise_max", op::elementwise_max},
@@ -37,11 +100,45 @@ namespace ngraph
                         {"elementwise_mul", op::elementwise_mul},
                         {"elementwise_pow", op::elementwise_pow},
                         {"elementwise_sub", op::elementwise_sub},
+                        {"equal", op::elementwise_equal},
+                        {"expand_v2", op::expand_v2},
+                        {"fill_constant_batch_size_like", op::fill_constant_batch_size_like},
+                        {"fill_constant", op::fill_constant},
+                        {"flatten_contiguous_range", op::flatten_contiguous_range},
+                        {"greater_equal", op::elementwise_greater_equal},
+                        {"hard_sigmoid", op::hard_sigmoid},
+                        {"hard_swish", op::hard_swish},
+                        {"leaky_relu", op::leaky_relu},
+                        {"log", op::log},
+                        {"logical_not", op::logical_not},
+                        {"matmul", op::matmul},
+                        {"max_pool2d_with_index", op::pool2d},
+                        {"mul", op::mul},
+                        {"matrix_nms", op::matrix_nms},
+                        {"multiclass_nms3", op::multiclass_nms},
+                        {"nearest_interp_v2", op::nearest_interp_v2},
+                        {"nearest_interp", op::nearest_interp_v2},
+                        {"pad3d", op::pad3d},
+                        {"pow", op::pow},
+                        {"pool2d", op::pool2d},
+                        {"range", op::range},
                         {"relu", op::relu},
+                        {"relu6", op::relu6},
+                        {"reshape2", op::reshape2},
+                        {"rnn", op::rnn},
                         {"scale", op::scale},
-                        {"split", op::split}};
+                        {"shape", op::shape},
+                        {"slice", op::slice},
+                        {"softmax", op::softmax},
+                        {"sigmoid", op::sigmoid},
+                        {"split", op::split},
+                        {"squeeze2", op::squeeze},
+                        {"sync_batch_norm", op::batch_norm},
+                        {"transpose2", op::transpose2},
+                        {"unsqueeze2", op::unsqueeze},
+                        {"yolo_box", op::yolo_box}};
             };
 
         } // namespace pdpd
     }     // namespace frontend
-} // namespace ngraph
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/frontend/paddlepaddle/src/pdpd_fw_node.cpp b/ngraph/frontend/paddlepaddle/src/pdpd_fw_node.cpp
new file mode 100644
index 00000000000..aa55ca4be88
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/pdpd_fw_node.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <pdpd_fw_node.hpp>
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        NGRAPH_RTTI_DEFINITION(PDPDFrameworkNode, "PDPDFrameworkNode", 1);
+
+        void PDPDFrameworkNode::validate_and_infer_types()
+        {
+            FrameworkNode::validate_and_infer_types();
+            size_t idx = 0;
+            for (const auto& port_pair : m_decoder.get_output_type_map())
+            {
+                for (const auto& p_type : port_pair.second)
+                {
+                    set_output_type(idx++, p_type, PartialShape::dynamic());
+                }
+            }
+        }
+
+        std::map<std::string, OutputVector> PDPDFrameworkNode::get_named_inputs() const
+        {
+            return m_decoder.map_for_each_input([&](const std::string& name, size_t) {
+                auto it = std::find(m_inputs_names.begin(), m_inputs_names.end(), name);
+                if (it != m_inputs_names.end())
+                {
+                    return input(it - m_inputs_names.begin()).get_source_output();
+                }
+                else
+                {
+                    return Output<Node>();
+                }
+            });
+        }
+
+        std::map<std::string, OutputVector> PDPDFrameworkNode::return_named_outputs()
+        {
+            return m_decoder.map_for_each_output(
+                [&](const std::string&, size_t idx) { return output(idx); });
+        }
+
+    } // namespace frontend
+} // namespace ngraph
diff --git a/ngraph/frontend/paddlepaddle/src/pdpd_fw_node.hpp b/ngraph/frontend/paddlepaddle/src/pdpd_fw_node.hpp
new file mode 100644
index 00000000000..967b17e77c2
--- /dev/null
+++ b/ngraph/frontend/paddlepaddle/src/pdpd_fw_node.hpp
@@ -0,0 +1,53 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph_ops/framework_node.hpp>
+#include "decoder.hpp"
+
+namespace ngraph
+{
+    namespace frontend
+    {
+        class PDPDFrameworkNode : public op::FrameworkNode
+        {
+        public:
+            NGRAPH_RTTI_DECLARATION;
+
+            PDPDFrameworkNode(const DecoderPDPDProto& decoder,
+                              const OutputVector& inputs,
+                              const std::vector<std::string>& inputs_names)
+                : FrameworkNode(inputs, decoder.get_output_size())
+                , m_decoder{decoder}
+                , m_inputs_names{inputs_names}
+            {
+                op::FrameworkNodeAttrs attrs;
+                attrs.set_type_name(m_decoder.get_op_type());
+                set_attrs(attrs);
+
+                validate_and_infer_types();
+            }
+
+            void validate_and_infer_types() override;
+
+            std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override
+            {
+                return std::make_shared<PDPDFrameworkNode>(m_decoder, inputs, m_inputs_names);
+            }
+
+            std::string get_op_type() const { return m_decoder.get_op_type(); }
+
+            const DecoderPDPDProto& get_decoder() const { return m_decoder; }
+
+            std::map<std::string, OutputVector> get_named_inputs() const;
+
+            std::map<std::string, OutputVector> return_named_outputs();
+
+        private:
+            const DecoderPDPDProto m_decoder;
+            std::vector<std::string> m_inputs_names;
+        };
+    } // namespace frontend
+} // namespace ngraph
\ No newline at end of file
diff --git a/ngraph/python/CMakeLists.txt b/ngraph/python/CMakeLists.txt
index b2380350a91..b7667422781 100644
--- a/ngraph/python/CMakeLists.txt
+++ b/ngraph/python/CMakeLists.txt
@@ -104,5 +104,9 @@ if(OpenVINO_SOURCE_DIR OR InferenceEngineDeveloperPackage_FOUND)
             COMPONENT pyngraph_${PYTHON_VERSION}
             USE_SOURCE_PERMISSIONS)
 
+    install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/tests
+            DESTINATION tests/${PROJECT_NAME}
+            COMPONENT tests EXCLUDE_FROM_ALL)
+
     ie_cpack(pyngraph_${PYTHON_VERSION})
 endif()
diff --git a/ngraph/python/src/pyngraph/frontend/frontend.cpp b/ngraph/python/src/pyngraph/frontend/frontend.cpp
index eb723aded42..dd98869488c 100644
--- a/ngraph/python/src/pyngraph/frontend/frontend.cpp
+++ b/ngraph/python/src/pyngraph/frontend/frontend.cpp
@@ -55,11 +55,12 @@ void regclass_pyngraph_FrontEnd(py::module m)
                     Fully converted nGraph function.
              )");
 
-    fem.def("convert",
-            static_cast<std::shared_ptr<ngraph::Function> (ngraph::frontend::FrontEnd::*)(
-                std::shared_ptr<ngraph::Function>) const>(&ngraph::frontend::FrontEnd::convert),
-            py::arg("function"),
-            R"(
+    fem.def(
+        "convert",
+        static_cast<void (ngraph::frontend::FrontEnd::*)(std::shared_ptr<ngraph::Function>) const>(
+            &ngraph::frontend::FrontEnd::convert),
+        py::arg("function"),
+        R"(
                 Completely convert the remaining, not converted part of a function.
 
                 Parameters
diff --git a/ngraph/python/src/pyngraph/variant.hpp b/ngraph/python/src/pyngraph/variant.hpp
index 1027abcae7d..5f107d9fa70 100644
--- a/ngraph/python/src/pyngraph/variant.hpp
+++ b/ngraph/python/src/pyngraph/variant.hpp
@@ -7,6 +7,7 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
+#include <sstream>
 #include <string>
 
 #include "ngraph/variant.hpp" // ngraph::Variant
diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py
index 109c150b1dd..edcdb43a750 100644
--- a/ngraph/python/tests/__init__.py
+++ b/ngraph/python/tests/__init__.py
@@ -145,5 +145,3 @@ xfail_issue_52463 = xfail_test(reason="test_operator_add_size1_singleton_broadca
 xfail_issue_58033 = xfail_test(reason="Einsum operation misses support for complex ellipsis equations")
 xfail_issue_58676 = xfail_test(reason="AssertionError: Not equal to tolerance rtol=0.001, atol=1e-07")
 xfail_issue_onnx_models_140 = xfail_test(reason="https://github.com/onnx/models/issues/140")
-
-xfail_issue_59935 = xfail_test(reason="AdaptivePool is not implemented in CPU plugin.")
diff --git a/ngraph/python/tests/mock/mock_py_ngraph_frontend/CMakeLists.txt b/ngraph/python/tests/mock/mock_py_ngraph_frontend/CMakeLists.txt
index 245947d4cac..b218da4751f 100644
--- a/ngraph/python/tests/mock/mock_py_ngraph_frontend/CMakeLists.txt
+++ b/ngraph/python/tests/mock/mock_py_ngraph_frontend/CMakeLists.txt
@@ -18,3 +18,7 @@ target_include_directories(${TARGET_FE_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
 target_link_libraries(${TARGET_FE_NAME} PRIVATE ngraph::frontend_manager::static)
 
 add_clang_format_target(${TARGET_FE_NAME}_clang FOR_TARGETS ${TARGET_FE_NAME})
+
+install(TARGETS ${TARGET_FE_NAME}
+        RUNTIME DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT tests EXCLUDE_FROM_ALL
+        LIBRARY DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT tests EXCLUDE_FROM_ALL)
diff --git a/ngraph/python/tests/mock/mock_py_ngraph_frontend/mock_py_frontend.hpp b/ngraph/python/tests/mock/mock_py_ngraph_frontend/mock_py_frontend.hpp
index d3e09583481..624a8ee48da 100644
--- a/ngraph/python/tests/mock/mock_py_ngraph_frontend/mock_py_frontend.hpp
+++ b/ngraph/python/tests/mock/mock_py_ngraph_frontend/mock_py_frontend.hpp
@@ -334,7 +334,7 @@ public:
         return std::make_shared<PlaceMockPy>();
     }
 
-    Place::Ptr get_place_by_operation_name(const std::string& operationName) override
+    Place::Ptr get_place_by_operation_name(const std::string& operationName) const override
     {
         m_stat.m_get_place_by_operation_name++;
         m_stat.m_lastArgString = operationName;
@@ -515,11 +515,7 @@ public:
         return std::make_shared<ngraph::Function>(NodeVector{}, ParameterVector{});
     }
 
-    std::shared_ptr<ngraph::Function> convert(std::shared_ptr<ngraph::Function> func) const override
-    {
-        m_stat.m_convert++;
-        return func;
-    }
+    void convert(std::shared_ptr<ngraph::Function> func) const override { m_stat.m_convert++; }
 
     std::shared_ptr<ngraph::Function> convert_partially(InputModel::Ptr model) const override
     {
diff --git a/ngraph/python/tests/mock/pyngraph_fe_mock_api/CMakeLists.txt b/ngraph/python/tests/mock/pyngraph_fe_mock_api/CMakeLists.txt
index c8300df3d87..f464e3e316f 100644
--- a/ngraph/python/tests/mock/pyngraph_fe_mock_api/CMakeLists.txt
+++ b/ngraph/python/tests/mock/pyngraph_fe_mock_api/CMakeLists.txt
@@ -14,3 +14,7 @@ pybind11_add_module(${PYBIND_FE_NAME} MODULE ${PYBIND_FE_SRC})
 target_link_libraries(${PYBIND_FE_NAME} PRIVATE ${TARGET_FE_NAME} ngraph::frontend_manager::static)
 
 add_clang_format_target(${PYBIND_FE_NAME}_clang FOR_TARGETS ${PYBIND_FE_NAME})
+
+install(TARGETS ${PYBIND_FE_NAME}
+        DESTINATION python/${PYTHON_VERSION}
+        COMPONENT tests EXCLUDE_FROM_ALL)
diff --git a/ngraph/python/tests/test_frontend/test_frontend_onnx.py b/ngraph/python/tests/test_frontend/test_frontend_onnx.py
index 1dbe6a34ae6..e55f665b883 100644
--- a/ngraph/python/tests/test_frontend/test_frontend_onnx.py
+++ b/ngraph/python/tests/test_frontend/test_frontend_onnx.py
@@ -86,12 +86,12 @@ def test_decode_and_convert():
         assert op.get_type_name() in ["Parameter", "Constant", "ONNXFrameworkNode",
                                       "ONNXSubgraphFrameworkNode", "Result"]
 
-    function = fe.convert(decoded_function)
-    assert function
-    for op in function.get_ordered_ops():
+    fe.convert(decoded_function)
+    assert decoded_function
+    for op in decoded_function.get_ordered_ops():
         assert op.get_type_name() not in ["ONNXFrameworkNode", "ONNXSubgraphFrameworkNode"]
 
     a = np.array([[1, 2], [3, 4]], dtype=np.float32)
     b = np.array([[2, 3], [4, 5]], dtype=np.float32)
     expected = np.array([[1.5, 5], [10.5, 18]], dtype=np.float32)
-    run_function(function, a, b, expected=[expected])
+    run_function(decoded_function, a, b, expected=[expected])
diff --git a/ngraph/python/tests/test_frontend/test_frontend_onnx_editor.py b/ngraph/python/tests/test_frontend/test_frontend_onnx_editor.py
new file mode 100644
index 00000000000..260d5d68d54
--- /dev/null
+++ b/ngraph/python/tests/test_frontend/test_frontend_onnx_editor.py
@@ -0,0 +1,551 @@
+# Copyright (C) 2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import onnx
+import pytest
+from onnx.helper import make_graph, make_model, make_tensor_value_info
+from ngraph import PartialShape
+from ngraph.frontend import FrontEndManager
+
+
+#       in1        in2        in3
+#        |          |          |
+#        \          /          |
+#         +--------+        +------+
+#         |  Add   |        | Relu |
+#         +--------+        +------+
+#          <add_out>           |
+#         /       \\           |
+#    +--------+  +-----+      out3
+#    | Split  |  | Mul |
+#    |(split1)|..|     |
+#    +--------+  +-----+
+#     /     \       |
+#   out1   out2    out4
+#
+def create_test_onnx_models():
+    models = {}
+    # Input model
+    add = onnx.helper.make_node("Add", inputs=["in1", "in2"], outputs=["add_out"])
+    split = onnx.helper.make_node("Split", inputs=["add_out"],
+                                  outputs=["out1", "out2"], name="split1", axis=0)
+    relu = onnx.helper.make_node("Relu", inputs=["in3"], outputs=["out3"])
+    mul = onnx.helper.make_node("Mul", inputs=["add_out", "add_out"], outputs=["out4"])
+
+    input_tensors = [
+        make_tensor_value_info("in1", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("in2", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("in3", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    output_tensors = [
+        make_tensor_value_info("out1", onnx.TensorProto.FLOAT, (1, 2)),
+        make_tensor_value_info("out2", onnx.TensorProto.FLOAT, (1, 2)),
+        make_tensor_value_info("out3", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("out4", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    graph = make_graph([add, split, relu, mul], "test_graph", input_tensors, output_tensors)
+    models["input_model.onnx"] = make_model(graph, producer_name="ONNX Importer")
+
+    # Expected for extract_subgraph
+    input_tensors = [
+        make_tensor_value_info("in1", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("in2", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    output_tensors = [
+        make_tensor_value_info("add_out", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    graph = make_graph([add], "test_graph", input_tensors, output_tensors)
+    models["extract_subgraph.onnx"] = make_model(graph, producer_name="ONNX Importer")
+
+    # Expected for extract_subgraph 2
+    input_tensors = [
+        make_tensor_value_info("in1", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("in2", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("in3", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    output_tensors = [
+        make_tensor_value_info("out3", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("add_out", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    graph = make_graph([add, relu], "test_graph", input_tensors, output_tensors)
+    models["extract_subgraph_2.onnx"] = make_model(graph, producer_name="ONNX Importer")
+
+    # Expected for extract_subgraph 3
+    input_tensors = [
+        make_tensor_value_info("out1/placeholder_port_0", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    output_tensors = [
+        make_tensor_value_info("out1", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("out2", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    expected_split = onnx.helper.make_node("Split", inputs=["out1/placeholder_port_0"],
+                                           outputs=["out1", "out2"], name="split1", axis=0)
+    graph = make_graph([expected_split], "test_graph", input_tensors, output_tensors)
+    models["extract_subgraph_3.onnx"] = make_model(graph, producer_name="ONNX Importer")
+
+    # Expected for extract_subgraph 4
+    input_tensors = [
+        make_tensor_value_info("out4/placeholder_port_0", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("out4/placeholder_port_1", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("out1/placeholder_port_0", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    output_tensors = [
+        make_tensor_value_info("out1", onnx.TensorProto.FLOAT, (1, 2)),
+        make_tensor_value_info("out2", onnx.TensorProto.FLOAT, (1, 2)),
+        make_tensor_value_info("out4", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    expected_split = onnx.helper.make_node("Split", inputs=["out1/placeholder_port_0"],
+                                           outputs=["out1", "out2"])
+    expected_mul = onnx.helper.make_node("Mul", inputs=["out4/placeholder_port_0", "out4/placeholder_port_1"],
+                                         outputs=["out4"])
+    graph = make_graph([expected_split, expected_mul], "test_graph", input_tensors, output_tensors)
+    models["extract_subgraph_4.onnx"] = make_model(graph, producer_name="ONNX Importer")
+
+    # Expected for test_override_all_outputs
+    input_tensors = [
+        make_tensor_value_info("in1", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("in2", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("in3", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    output_tensors = [
+        make_tensor_value_info("out3", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("add_out", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    graph = make_graph([add, relu], "test_graph", input_tensors, output_tensors)
+    models["test_override_all_outputs.onnx"] = make_model(graph, producer_name="ONNX Importer")
+
+    # Expected for test_override_all_outputs 2
+    input_tensors = [
+        make_tensor_value_info("in1", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("in2", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    output_tensors = [
+        make_tensor_value_info("out4", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    graph = make_graph([add, mul], "test_graph", input_tensors, output_tensors)
+    models["test_override_all_outputs_2.onnx"] = make_model(graph, producer_name="ONNX Importer")
+
+    # Expected for test_override_all_inputs
+    input_tensors = [
+        make_tensor_value_info("in3", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("out1/placeholder_port_0", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("out4/placeholder_port_0", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("out4/placeholder_port_1", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    output_tensors = [
+        make_tensor_value_info("out1", onnx.TensorProto.FLOAT, (1, 2)),
+        make_tensor_value_info("out2", onnx.TensorProto.FLOAT, (1, 2)),
+        make_tensor_value_info("out3", onnx.TensorProto.FLOAT, (2, 2)),
+        make_tensor_value_info("out4", onnx.TensorProto.FLOAT, (2, 2)),
+    ]
+    expected_split = onnx.helper.make_node("Split", inputs=["out1/placeholder_port_0"],
+                                           outputs=["out1", "out2"])
+    expected_mul = onnx.helper.make_node("Mul", inputs=["out4/placeholder_port_0", "out4/placeholder_port_1"],
+                                         outputs=["out4"])
+    graph = make_graph([expected_split, relu, expected_mul], "test_graph", input_tensors, output_tensors)
+    models["test_override_all_inputs.onnx"] = make_model(graph, producer_name="ONNX Importer")
+
+    # test partial shape
+    input_tensors = [
+        make_tensor_value_info("in1", onnx.TensorProto.FLOAT, (8, 16)),
+        make_tensor_value_info("in2", onnx.TensorProto.FLOAT, (8, 16)),
+        make_tensor_value_info("in3", onnx.TensorProto.FLOAT, (4, 6)),
+    ]
+    output_tensors = [
+        make_tensor_value_info("out1", onnx.TensorProto.FLOAT, (4, 16)),
+        make_tensor_value_info("out2", onnx.TensorProto.FLOAT, (4, 16)),
+        make_tensor_value_info("out3", onnx.TensorProto.FLOAT, (4, 6)),
+        make_tensor_value_info("out4", onnx.TensorProto.FLOAT, (8, 16)),
+    ]
+    graph = make_graph([add, split, relu, mul], "test_graph", input_tensors, output_tensors)
+    models["test_partial_shape.onnx"] = make_model(graph, producer_name="ONNX Importer")
+
+    return models
+
+
+fem = FrontEndManager()
+test_models_names = []
+
+
+def setup_module():
+    models = create_test_onnx_models()
+    for name, model in models.items():
+        onnx.save_model(model, name)
+        test_models_names.append(name)
+
+
+def teardown_module():
+    for name in test_models_names:
+        os.remove(name)
+
+
+def skip_if_onnx_frontend_is_disabled():
+    front_ends = fem.get_available_front_ends()
+    if "onnx" not in front_ends:
+        pytest.skip()
+
+
+# Function to compare ng Functions (ops names, types and shapes).
+# Note that the functions uses get_ordered_ops, so the topological order of ops should be also preserved.
+def compare_functions(current, expected):  # noqa: C901 the function is too complex
+    result = True
+    msg = ""
+    if current.get_friendly_name() != expected.get_friendly_name():
+        result = False
+        msg += "Friendly name of nG Functions not equal. "
+        msg += f"Current: {current.get_friendly_name()}, expected: {expected.get_friendly_name()}. "
+
+    current_ops = current.get_ordered_ops()
+    expected_ops = expected.get_ordered_ops()
+
+    if len(current_ops) != len(expected_ops):
+        result = False
+        msg += "Not equal number of ops. "
+        msg += f"Current: {len(current_ops)}, expected: {len(expected_ops)}. "
+
+    for i in range(len(current_ops)):
+        if (current_ops[i].get_friendly_name() != expected_ops[i].get_friendly_name()
+                and current_ops[i].get_type_name() != "Constant"):  # const have different names
+            result = False
+            msg += "Not equal op name. "
+            msg += f"Current: {current_ops[i].get_friendly_name()}, "
+            msg += f"expected: {expected_ops[i].get_friendly_name()}. "
+        if current_ops[i].get_output_size() != expected_ops[i].get_output_size():
+            result = False
+            msg += f"Not equal output size of {current_ops[i].get_friendly_name()}. "
+        for j in range(current_ops[i].get_output_size()):
+            if current_ops[i].get_output_partial_shape(j) != expected_ops[i].get_output_partial_shape(j):
+                result = False
+                msg += f"Not equal op partial shapes of {current_ops[i].get_friendly_name()}. "
+                msg += f"Current: {current_ops[i].get_partial_shape({j})}, "
+                msg += f"expected: {expected_ops[i].get_partial_shape({j})}. "
+            if current_ops[i].get_output_element_type(j) != expected_ops[i].get_output_element_type(j):
+                result = False
+                msg += f"Not equal output element type of {current_ops[i].get_friendly_name()}. "
+                msg += f"Current: {current_ops[i].get_output_element_type(j)}, "
+                msg += f"expected: {expected_ops[i].get_output_element_type(j)}. "
+
+    if not result:
+        print(msg)
+
+    return result
+
+
+def test_extract_subgraph():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_tensor_name(tensorName="add_out").get_input_port(inputPortIndex=0)  # in1
+    place2 = model.get_place_by_tensor_name(tensorName="add_out").get_input_port(inputPortIndex=1)  # in2
+    place3 = model.get_place_by_tensor_name(tensorName="add_out")
+    model.extract_subgraph(inputs=[place1, place2], outputs=[place3])
+    result_func = fe.convert(model)
+
+    expected_model = fe.load("extract_subgraph.onnx")
+    expected_func = fe.convert(expected_model)
+
+    res = compare_functions(result_func, expected_func)
+    assert res
+
+
+def test_extract_subgraph_2():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_tensor_name(tensorName="add_out")
+    place2 = model.get_place_by_tensor_name(tensorName="out3")
+    model.extract_subgraph(inputs=[], outputs=[place1, place2])
+    result_func = fe.convert(model)
+
+    expected_model = fe.load("extract_subgraph_2.onnx")
+    expected_func = fe.convert(expected_model)
+
+    res = compare_functions(result_func, expected_func)
+    assert res
+
+
+def test_extract_subgraph_3():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_operation_name_and_input_port(operationName="split1", inputPortIndex=0)
+    place2 = model.get_place_by_tensor_name(tensorName="out1")
+    place3 = model.get_place_by_tensor_name(tensorName="out2")
+    model.extract_subgraph(inputs=[place1], outputs=[place2, place3])
+    result_func = fe.convert(model)
+
+    expected_model = fe.load("extract_subgraph_3.onnx")
+    expected_func = fe.convert(expected_model)
+
+    res = compare_functions(result_func, expected_func)
+    assert res
+
+
+def test_extract_subgraph_4():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_tensor_name(tensorName="out4").get_input_port(inputPortIndex=0)
+    place2 = model.get_place_by_tensor_name(tensorName="out4").get_input_port(inputPortIndex=1)
+    place3 = model.get_place_by_operation_name_and_input_port(operationName="split1", inputPortIndex=0)
+    place4 = model.get_place_by_tensor_name(tensorName="out1")
+    place5 = model.get_place_by_tensor_name(tensorName="out2")
+    place6 = model.get_place_by_tensor_name(tensorName="out4")
+    model.extract_subgraph(inputs=[place1, place2, place3], outputs=[place4, place5, place6])
+    result_func = fe.convert(model)
+
+    expected_model = fe.load("extract_subgraph_4.onnx")
+    expected_func = fe.convert(expected_model)
+
+    res = compare_functions(result_func, expected_func)
+    assert res
+
+
+def test_override_all_outputs():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_tensor_name(tensorName="add_out")
+    place2 = model.get_place_by_tensor_name(tensorName="out3")
+    model.override_all_outputs(outputs=[place1, place2])
+    result_func = fe.convert(model)
+
+    expected_model = fe.load("test_override_all_outputs.onnx")
+    expected_func = fe.convert(expected_model)
+
+    res = compare_functions(result_func, expected_func)
+    assert res
+
+
+def test_override_all_outputs_2():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_tensor_name(tensorName="out4")
+    model.override_all_outputs(outputs=[place1])
+    result_func = fe.convert(model)
+
+    expected_model = fe.load("test_override_all_outputs_2.onnx")
+    expected_func = fe.convert(expected_model)
+
+    res = compare_functions(result_func, expected_func)
+    assert res
+
+
+def test_override_all_inputs():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_operation_name_and_input_port(
+        operationName="split1", inputPortIndex=0)
+    place2 = model.get_place_by_tensor_name(tensorName="out4").get_input_port(inputPortIndex=0)
+    place3 = model.get_place_by_tensor_name(tensorName="out4").get_input_port(inputPortIndex=1)
+    place4 = model.get_place_by_tensor_name(tensorName="in3")
+    model.override_all_inputs(inputs=[place1, place2, place3, place4])
+    result_func = fe.convert(model)
+
+    expected_model = fe.load("test_override_all_inputs.onnx")
+    expected_func = fe.convert(expected_model)
+
+    res = compare_functions(result_func, expected_func)
+    assert res
+
+
+def test_override_all_inputs_exceptions():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_tensor_name(tensorName="in1")
+    place2 = model.get_place_by_tensor_name(tensorName="in2")
+    place3 = model.get_place_by_operation_name_and_input_port(operationName="split1", inputPortIndex=0)
+    place4 = model.get_place_by_tensor_name(tensorName="in3")
+
+    with pytest.raises(Exception) as e:
+        model.override_all_inputs(inputs=[place1, place2])
+    assert "Unexpected number of inputs after override_all_inputs" in str(e)
+
+    with pytest.raises(Exception) as e:
+        model.override_all_inputs(inputs=[place3, place4])
+    assert "Unexpected number of inputs after override_all_inputs" in str(e)
+
+
+def test_is_input_output():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_tensor_name(tensorName="in2")
+    assert place1.is_input()
+    assert not place1.is_output()
+
+    place2 = model.get_place_by_tensor_name(tensorName="out2")
+    assert not place2.is_input()
+    assert place2.is_output()
+
+    place3 = model.get_place_by_tensor_name(tensorName="add_out")
+    assert not place3.is_input()
+    assert not place3.is_output()
+
+    place4 = place1 = model.get_place_by_operation_name_and_input_port(
+        operationName="split1", inputPortIndex=0)
+    assert not place4.is_input()
+    assert not place4.is_output()
+
+
+def test_set_partial_shape():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_tensor_name(tensorName="in1")
+    model.set_partial_shape(place1, PartialShape([8, 16]))
+    place2 = model.get_place_by_tensor_name(tensorName="in2")
+    model.set_partial_shape(place2, PartialShape([8, 16]))
+    place3 = model.get_place_by_tensor_name(tensorName="in3")
+    model.set_partial_shape(place3, PartialShape([4, 6]))
+    result_func = fe.convert(model)
+
+    expected_model = fe.load("test_partial_shape.onnx")
+    expected_func = fe.convert(expected_model)
+
+    res = compare_functions(result_func, expected_func)
+    assert res
+
+
+def test_get_partial_shape():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_tensor_name(tensorName="in1")
+    assert model.get_partial_shape(place1) == PartialShape([2, 2])
+
+    place2 = model.get_place_by_tensor_name(tensorName="out1")
+    assert model.get_partial_shape(place2) == PartialShape([1, 2])
+
+    place3 = model.get_place_by_tensor_name(tensorName="add_out")
+    assert model.get_partial_shape(place3) == PartialShape([2, 2])
+
+    place4 = model.get_place_by_tensor_name(tensorName="in3")
+    model.set_partial_shape(place4, PartialShape([4, 6]))
+    assert model.get_partial_shape(place4) == PartialShape([4, 6])
+    assert model.get_partial_shape(place2) == PartialShape([1, 2])
+
+
+def test_get_inputs():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    in_names = [place.get_names()[0] for place in model.get_inputs()]
+    assert in_names == ["in1", "in2", "in3"]
+
+
+def test_get_outputs():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    out_names = [place.get_names()[0] for place in model.get_outputs()]
+    assert out_names == ["out1", "out2", "out3", "out4"]
+
+
+def test_is_equal():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_tensor_name(tensorName="in1")
+    assert place1.is_equal(place1)
+
+    place2 = model.get_place_by_tensor_name(tensorName="out2")
+    assert place2.is_equal(place2)
+
+    place3 = model.get_place_by_tensor_name(tensorName="out4").get_input_port(inputPortIndex=0)
+    place4 = model.get_place_by_tensor_name(tensorName="out4").get_input_port(inputPortIndex=0)
+    assert place3.is_equal(place4)
+
+    place5 = model.get_place_by_operation_name_and_input_port(operationName="split1", inputPortIndex=0)
+    place6 = model.get_place_by_tensor_name(tensorName="out1").get_input_port(inputPortIndex=0)
+    assert place5.is_equal(place6)
+
+    place7 = model.get_place_by_tensor_name(tensorName="out4").get_producing_port()
+    assert place7.is_equal(place7)
+
+    place8 = model.get_place_by_tensor_name(tensorName="add_out")
+    assert place8.is_equal(place8)
+
+    assert not place1.is_equal(place2)
+    assert not place6.is_equal(place7)
+    assert not place8.is_equal(place2)
+
+
+def test_get_place_by_tensor_name():
+    skip_if_onnx_frontend_is_disabled()
+    fe = fem.load_by_framework(framework="onnx")
+    assert fe
+
+    model = fe.load("input_model.onnx")
+    assert model
+
+    place1 = model.get_place_by_tensor_name(tensorName="out2")
+    assert place1
+
+    place2 = model.get_place_by_tensor_name(tensorName="add_out")
+    assert place2
+
+    place3 = model.get_place_by_tensor_name(tensorName="in1")
+    assert place3
+
+    with pytest.raises(Exception) as e:
+        model.get_place_by_tensor_name(tensorName="0:add_out")
+    assert "The tensor with name: 0:add_out does not exist in the graph" in str(e)
diff --git a/ngraph/python/tests/test_ngraph/test_adaptive_pool.py b/ngraph/python/tests/test_ngraph/test_adaptive_pool.py
index 7d662706f46..d1b9159f84e 100644
--- a/ngraph/python/tests/test_ngraph/test_adaptive_pool.py
+++ b/ngraph/python/tests/test_ngraph/test_adaptive_pool.py
@@ -1,13 +1,11 @@
 import ngraph as ng
 import numpy as np
-from tests import xfail_issue_59935
 from tests.runtime import get_runtime
 
 
-@xfail_issue_59935
 def test_adaptive_avg_pool():
     runtime = get_runtime()
-    input = np.reshape([0, 4, 1, 3, -2, -5, -2,
+    input = np.reshape([0.0, 4, 1, 3, -2, -5, -2,
                         -2, 1, -3, 1, -3, -4, 0,
                         -2, 1, -1, -2, 3, -1, -3,
 
@@ -31,7 +29,6 @@ def test_adaptive_avg_pool():
     assert np.allclose(adaptive_pool_results, expected_results)
 
 
-@xfail_issue_59935
 def test_adaptive_max_pool():
     runtime = get_runtime()
     input = np.reshape([0, 4, 1, 3, -2, -5, -2,
diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt
index 5cc1197307b..411fb661b92 100644
--- a/ngraph/test/CMakeLists.txt
+++ b/ngraph/test/CMakeLists.txt
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-add_definitions("-DSERIALIZED_ZOO=\"${CMAKE_CURRENT_SOURCE_DIR}/models\"")
+add_definitions(-DSERIALIZED_ZOO=\"${TEST_MODEL_ZOO}/ngraph/models\")
 set(NGRAPH_ONNX_NAMESPACE ngraph_onnx)
 
 add_subdirectory(runtime)
@@ -122,6 +122,7 @@ set(SRC
     type_prop/dft.cpp
     type_prop/dyn_reshape.cpp
     type_prop/einsum.cpp
+    type_prop/erf.cpp
     type_prop/exp.cpp
     type_prop/experimental_detectron_generate_proposals.cpp
     type_prop/experimental_detectron_roi_feature_extractor.cpp
@@ -151,6 +152,7 @@ set(SRC
     type_prop/hswish.cpp
     type_prop/idft.cpp
     type_prop/interpolate.cpp
+    type_prop/logical_and.cpp
     type_prop/lrn.cpp
     type_prop/lstm_cell.cpp
     type_prop/lstm_sequence.cpp
@@ -177,6 +179,7 @@ set(SRC
     type_prop/proposal.cpp
     type_prop/psroi_pooling.cpp
     type_prop/prior_box_clustered.cpp
+    type_prop/random_uniform.cpp
     type_prop/range.cpp
     type_prop/read_value.cpp
     type_prop/reduce_l1.cpp
@@ -219,6 +222,7 @@ set(SRC
     type_prop/squared_difference.cpp
     type_prop/squeeze.cpp
     type_prop/swish.cpp
+    type_prop/tan.cpp
     type_prop/ti.cpp
     type_prop/tile.cpp
     type_prop/top_k.cpp
@@ -233,6 +237,7 @@ set(SRC
     visitors/op/acosh.cpp
     visitors/op/adaptive_avg_pool.cpp
     visitors/op/adaptive_max_pool.cpp
+    visitors/op/add.cpp
     visitors/op/asinh.cpp
     visitors/op/atan.cpp
     visitors/op/batch_norm.cpp
@@ -252,15 +257,24 @@ set(SRC
     visitors/op/detection_output.cpp
     visitors/op/einsum.cpp
     visitors/op/elu.cpp
+    visitors/op/equal.cpp
+    visitors/op/erf.cpp
     visitors/op/extractimagepatches.cpp
     visitors/op/fake_quantize.cpp
+    visitors/op/floor_mod.cpp
     visitors/op/floor.cpp
     visitors/op/gather.cpp
     visitors/op/gelu.cpp
+    visitors/op/greater_equal.cpp
+    visitors/op/greater.cpp
     visitors/op/grn.cpp
     visitors/op/group_conv.cpp
     visitors/op/interpolate.cpp
+    visitors/op/less_equal.cpp
+    visitors/op/less.cpp
     visitors/op/log.cpp
+    visitors/op/logical_and.cpp
+    visitors/op/logical_or.cpp
     visitors/op/logical_xor.cpp
     visitors/op/lrn.cpp
     visitors/op/lstm_cell.cpp
@@ -268,21 +282,27 @@ set(SRC
     visitors/op/matmul.cpp
     visitors/op/matrix_nms.cpp
     visitors/op/max_pool.cpp
+    visitors/op/maximum.cpp
+    visitors/op/minimum.cpp
     visitors/op/mish.cpp
     visitors/op/mod.cpp
     visitors/op/multiclass_nms.cpp
+    visitors/op/multiply.cpp
     visitors/op/mvn.cpp
     visitors/op/negative.cpp
     visitors/op/non_max_suppression.cpp
     visitors/op/non_zero.cpp
     visitors/op/normalize_l2.cpp
+    visitors/op/not_equal.cpp
     visitors/op/one_hot.cpp
     visitors/op/pad.cpp
     visitors/op/parameter.cpp
+    visitors/op/power.cpp
     visitors/op/prior_box.cpp
     visitors/op/prior_box_clustered.cpp
     visitors/op/proposal.cpp
     visitors/op/psroi_pooling.cpp
+    visitors/op/random_uniform.cpp
     visitors/op/reduce_l1.cpp
     visitors/op/reduce_l2.cpp
     visitors/op/reduce_logical_and.cpp
@@ -312,11 +332,13 @@ set(SRC
     visitors/op/space_to_batch.cpp
     visitors/op/space_to_depth.cpp
     visitors/op/split.cpp
+    visitors/op/sqrt.cpp
     visitors/op/squared_difference.cpp
     visitors/op/squeeze.cpp
-    visitors/op/sqrt.cpp
     visitors/op/strided_slice.cpp
+    visitors/op/subtract.cpp
     visitors/op/swish.cpp
+    visitors/op/tan.cpp
     visitors/op/tanh.cpp
     visitors/op/topk.cpp
     visitors/op/transpose.cpp
@@ -356,7 +378,7 @@ if (NGRAPH_UNIT_TEST_BACKENDS_ENABLE)
     set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} INTERPRETER)
 endif()
 
-add_definitions("-DTEST_FILES=\"${CMAKE_CURRENT_SOURCE_DIR}/files\"")
+add_definitions("-DTEST_FILES=\"${TEST_MODEL_ZOO}/ngraph/files\"")
 add_subdirectory(util)
 
 # backend specific test files must meet the following requirements:
@@ -392,7 +414,6 @@ set(MULTI_TEST_SRC
     backend/comparison.in.cpp
     backend/concat.in.cpp
     backend/constant.in.cpp
-    backend/convert_like.in.cpp
     backend/convolution_backprop.in.cpp
     backend/convolution.in.cpp
     backend/binary_convolution.in.cpp
@@ -415,7 +436,6 @@ set(MULTI_TEST_SRC
     backend/strided_slice.in.cpp
     backend/dynamic.in.cpp
     backend/elu.in.cpp
-    backend/erf.in.cpp
     backend/exp.in.cpp
     backend/experimental_detectron_detection_output.in.cpp
     backend/experimental_detectron_prior_grid.in.cpp
@@ -435,7 +455,6 @@ set(MULTI_TEST_SRC
     backend/interpolate.in.cpp
     backend/log.in.cpp
     backend/log_softmax.in.cpp
-    backend/logical_and.in.cpp
     backend/logical_not.in.cpp
     backend/logical_or.in.cpp
     backend/logical_xor.in.cpp
@@ -482,7 +501,6 @@ set(MULTI_TEST_SRC
     backend/result.in.cpp
     backend/reverse_sequence.in.cpp
     backend/reverse.in.cpp
-    backend/roi_pooling.in.cpp
     backend/roll.in.cpp
     backend/round.in.cpp
     backend/scatter_nd_update.in.cpp
@@ -502,7 +520,6 @@ set(MULTI_TEST_SRC
     backend/squeeze.in.cpp
     backend/subtract.in.cpp
     backend/swish.in.cpp
-    backend/tan.in.cpp
     backend/tanh.in.cpp
     backend/tile.in.cpp
     backend/topk.in.cpp
@@ -514,12 +531,13 @@ set(MULTI_TEST_SRC
     backend/zero_sized.in.cpp
 )
 
-if (NGRAPH_ONNX_IMPORT_ENABLE AND NOT NGRAPH_USE_PROTOBUF_LITE)
+if (NGRAPH_ONNX_IMPORT_ENABLE)
     list(APPEND MULTI_TEST_SRC
             onnx/onnx_import.in.cpp
             onnx/onnx_import_controlflow.in.cpp
             onnx/onnx_import_const_folding.in.cpp
             onnx/onnx_import_convpool.in.cpp
+            onnx/onnx_import_deprecated.in.cpp
             onnx/onnx_import_dyn_shapes.in.cpp
             onnx/onnx_import_external_data.in.cpp
             onnx/onnx_import_org_openvino.in.cpp
@@ -541,28 +559,10 @@ if (NGRAPH_ONNX_IMPORT_ENABLE)
 endif()
 
 # SOURCE FOR FRONTEND TESTING
-
 file(GLOB FRONTEND_TESTS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/frontend/frontend_manager.cpp)
 set(SRC ${FRONTEND_TESTS_SRC} ${SRC})
 
-file(GLOB FRONTEND_SHARED_TESTS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/frontend/shared/src/*.cpp)
-file(GLOB FRONTEND_SHARED_TESTS_HDR ${CMAKE_CURRENT_SOURCE_DIR}/frontend/shared/include/*.hpp)
-set(SRC ${FRONTEND_SHARED_TESTS_SRC} ${SRC})
-
-# ---- PaddlePaddle FrontEnd testing ------
-if (NGRAPH_PDPD_FRONTEND_ENABLE)
-    ie_check_pip_package(paddlepaddle WARNING)
-
-    if(paddlepaddle_FOUND)
-        file(GLOB FRONTEND_PDPD_TESTS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/frontend/paddlepaddle/*.cpp)
-        set(SRC ${FRONTEND_PDPD_TESTS_SRC} ${SRC})
-        set(TEST_PDPD_MODELS ${CMAKE_CURRENT_BINARY_DIR}/pdpd_test_models/)
-        add_definitions("-DTEST_PDPD_MODELS=\"${TEST_PDPD_MODELS}\"")
-    endif()
-endif()
-# ---- End PaddlePaddle FrontEnd testing ------
-
-add_clang_format_target(unit-test_clang FOR_SOURCES ${SRC} ${MULTI_TEST_SRC} ${FRONTEND_SHARED_TESTS_HDR})
+add_clang_format_target(unit-test_clang FOR_SOURCES ${SRC} ${MULTI_TEST_SRC})
 
 foreach(BACKEND_NAME ${ACTIVE_BACKEND_LIST})
     string(TOLOWER ${BACKEND_NAME} BACKEND_DIR)
@@ -583,7 +583,6 @@ add_executable(unit-test ${SRC})
 
 target_include_directories(unit-test PRIVATE ".")
 target_include_directories(unit-test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/runtime)
-target_include_directories(unit-test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/frontend/shared/include)
 
 add_definitions("-DCURDIR=\"${CMAKE_CURRENT_SOURCE_DIR}\"")
 add_definitions("-DJSON_INCLUDES=\"${JSON_INCLUDE_DIR}\"")
@@ -598,11 +597,12 @@ target_link_libraries(unit-test PRIVATE ngraph_test_util
                                         ie_backend
                                         interpreter_backend
                                         Threads::Threads
-                                        openvino::conditional_compilation)
+                                        openvino::conditional_compilation
+                                        frontend_manager)
 
 # Protobuf-lite does not support parsing files from prototxt format
 # Since most of the onnx models are stored in this format it have to be disabled
-if (NGRAPH_ONNX_IMPORT_ENABLE AND NOT NGRAPH_USE_PROTOBUF_LITE)
+if (NGRAPH_ONNX_IMPORT_ENABLE)
     # It's needed by onnx_import_library.cpp and onnx_import_exceptions.cpp tests to include onnx_pb.h.
     # Not linking statically to libprotobuf (linked into libonnx) avoids false-failing onnx_editor tests.
     target_include_directories(unit-test
@@ -623,6 +623,11 @@ if (NGRAPH_ONNX_IMPORT_ENABLE)
     target_include_directories(unit-test PRIVATE ${ONNX_IMPORTER_SRC_DIR}/src)
 
     target_link_libraries(unit-test PRIVATE onnx_importer)
+    if (LINUX)
+        target_link_options(unit-test PRIVATE -Wl,--exclude-libs,ALL)
+    elseif(APPLE)
+        target_link_options(unit-test PRIVATE -Wl,-dead_strip)
+    endif()
 endif()
 
 install(TARGETS unit-test
@@ -630,31 +635,7 @@ install(TARGETS unit-test
         COMPONENT tests
         EXCLUDE_FROM_ALL)
 
-############ FRONTEND ############
-target_include_directories(unit-test PRIVATE ${FRONTEND_INCLUDE_PATH} frontend/shared/include)
-target_link_libraries(unit-test PRIVATE frontend_manager cnpy)
-
 add_subdirectory(frontend)
-### END FRONTEND ###
 
-#PaddlePaddle - test models generator
-if (NGRAPH_PDPD_FRONTEND_ENABLE AND paddlepaddle_FOUND)
-    file(GLOB_RECURSE PDPD_GEN_SCRIPTS ${CMAKE_CURRENT_SOURCE_DIR}/files/paddlepaddle/gen_scripts/generate_*.py)
-    set(OUT_FILES "")
-    foreach(GEN_SCRIPT ${PDPD_GEN_SCRIPTS})
-        get_filename_component(FILE_WE ${GEN_SCRIPT} NAME_WE)
-        set(OUT_DONE_FILE ${TEST_PDPD_MODELS}/${FILE_WE}_done.txt)
-        set(OUT_FILES ${OUT_DONE_FILE} ${OUT_FILES})
-        add_custom_command(OUTPUT ${OUT_DONE_FILE}
-                COMMAND ${PYTHON_EXECUTABLE}
-                        ${CMAKE_CURRENT_SOURCE_DIR}/files/paddlepaddle/gen_wrapper.py
-                        ${GEN_SCRIPT}
-                        ${TEST_PDPD_MODELS}
-                        ${OUT_DONE_FILE}
-                DEPENDS ${GEN_SCRIPT} ${CMAKE_CURRENT_SOURCE_DIR}/files/paddlepaddle/gen_wrapper.py
-                )
-    endforeach()
-    add_custom_target(pdpd_test_models DEPENDS ${OUT_FILES})
-    add_dependencies(unit-test pdpd_test_models)
-    add_dependencies(unit-test paddlepaddle_ngraph_frontend)
-endif()
+# process models
+add_dependencies(unit-test test_model_zoo)
diff --git a/ngraph/test/backend/comparison.in.cpp b/ngraph/test/backend/comparison.in.cpp
index cb0cf69c11a..2ba2cd34282 100644
--- a/ngraph/test/backend/comparison.in.cpp
+++ b/ngraph/test/backend/comparison.in.cpp
@@ -26,27 +26,6 @@ using namespace ngraph;
 
 static string s_manifest = "${MANIFEST}";
 
-NGRAPH_TEST(${BACKEND_NAME}, equal)
-{
-    Shape shape{2, 2, 2};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto B = make_shared<op::Parameter>(element::f32, shape);
-    auto f = make_shared<Function>(make_shared<op::v1::Equal>(A, B), ParameterVector{A, B});
-
-    auto backend = runtime::Backend::create("${BACKEND_NAME}");
-
-    // Create some tensors for input/output
-    auto a = backend->create_tensor(element::f32, shape);
-    copy_data(a, vector<float>{1, 8, -8, 17, -0.5, 0, 1, 1});
-    auto b = backend->create_tensor(element::f32, shape);
-    copy_data(b, vector<float>{1, 8, 4, 8, 0, 0, 1, 1.5});
-    auto result = backend->create_tensor(element::boolean, shape);
-
-    auto handle = backend->compile(f);
-    handle->call_with_validate({result}, {a, b});
-    EXPECT_EQ((vector<char>{1, 1, 0, 0, 0, 1, 1, 0}), read_vector<char>(result));
-}
-
 NGRAPH_TEST(${BACKEND_NAME}, notequal)
 {
     Shape shape{2, 2, 2};
@@ -130,91 +109,3 @@ NGRAPH_TEST(${BACKEND_NAME}, greatereq)
     handle->call_with_validate({result}, {a, b});
     EXPECT_EQ((vector<char>{1, 1, 1, 1, 0, 1, 1, 0}), read_vector<char>(result));
 }
-
-NGRAPH_TEST(${BACKEND_NAME}, less)
-{
-    Shape shape{2, 2, 2};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto B = make_shared<op::Parameter>(element::f32, shape);
-    auto f = make_shared<Function>(make_shared<op::v1::Less>(A, B), ParameterVector{A, B});
-
-    auto backend = runtime::Backend::create("${BACKEND_NAME}");
-
-    // Create some tensors for input/output
-    auto a = backend->create_tensor(element::f32, shape);
-    copy_data(a, vector<float>{1, 8, -8, 17, -0.5, 0.5, 2, 1});
-    auto b = backend->create_tensor(element::f32, shape);
-    copy_data(b, vector<float>{1, 2, 4, 8, 0, 0, 1, 1.5});
-    auto result = backend->create_tensor(element::boolean, shape);
-
-    auto handle = backend->compile(f);
-    handle->call_with_validate({result}, {a, b});
-    EXPECT_EQ((vector<char>{0, 0, 1, 0, 1, 0, 0, 1}), read_vector<char>(result));
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, lesseq)
-{
-    Shape shape{2, 2, 2};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto B = make_shared<op::Parameter>(element::f32, shape);
-    auto f = make_shared<Function>(make_shared<op::v1::LessEqual>(A, B), ParameterVector{A, B});
-
-    auto backend = runtime::Backend::create("${BACKEND_NAME}");
-
-    // Create some tensors for input/output
-    auto a = backend->create_tensor(element::f32, shape);
-    copy_data(a, vector<float>{1, 8, -8, 17, -0.5, 0, 2, 1});
-    auto b = backend->create_tensor(element::f32, shape);
-    copy_data(b, vector<float>{1, 2, -8, 8, 0, 0, 0.5, 1.5});
-    auto result = backend->create_tensor(element::boolean, shape);
-
-    auto handle = backend->compile(f);
-    handle->call_with_validate({result}, {a, b});
-    EXPECT_EQ((vector<char>{1, 0, 1, 0, 1, 1, 0, 1}), read_vector<char>(result));
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, lesseq_int32)
-{
-    Shape shape{2, 2};
-    auto A = make_shared<op::Parameter>(element::i32, shape);
-    auto B = make_shared<op::Parameter>(element::i32, shape);
-    auto f = make_shared<Function>(make_shared<op::v1::LessEqual>(A, B), ParameterVector{A, B});
-
-    auto backend = runtime::Backend::create("${BACKEND_NAME}");
-
-    // Create some tensors for input/output
-    auto a = backend->create_tensor(element::i32, shape);
-    copy_data(a, vector<int32_t>{0x40000170, 0x40000005, 0x40000005, -5});
-    auto b = backend->create_tensor(element::i32, shape);
-    copy_data(b, vector<int32_t>{0x40000140, 0x40000001, 0x40000005, 0});
-    auto result = backend->create_tensor(element::boolean, shape);
-
-    auto handle = backend->compile(f);
-    handle->call_with_validate({result}, {a, b});
-    EXPECT_EQ((vector<char>{0, 0, 1, 1}), read_vector<char>(result)); // NNP result {1, 1, 0, 1}
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, lesseq_bool)
-{
-    Shape shape{2, 2, 2};
-    auto A = make_shared<op::Parameter>(element::boolean, shape);
-    auto B = make_shared<op::Parameter>(element::boolean, shape);
-    auto f = make_shared<Function>(make_shared<op::v1::LessEqual>(A, B), ParameterVector{A, B});
-
-    auto backend = runtime::Backend::create("${BACKEND_NAME}");
-
-    // Create some tensors for input/output
-    auto a = backend->create_tensor(element::boolean, shape);
-    copy_data(a, vector<char>{1, 1, 1, 1, 1, 1, 1, 1});
-    auto b = backend->create_tensor(element::boolean, shape);
-    copy_data(b, vector<char>{0, 0, 0, 0, 0, 0, 0, 0});
-    auto result = backend->create_tensor(element::boolean, shape);
-
-    // Overwrite the initial result vector to make sure we're not just coincidentally getting the
-    // right value.
-    copy_data(result, vector<char>{1, 1, 1, 1, 1, 1, 1, 1});
-
-    auto handle = backend->compile(f);
-    handle->call_with_validate({result}, {a, b});
-    EXPECT_EQ((vector<char>{0, 0, 0, 0, 0, 0, 0, 0}), read_vector<char>(result));
-}
diff --git a/ngraph/test/backend/convert_like.in.cpp b/ngraph/test/backend/convert_like.in.cpp
deleted file mode 100644
index ef0a4326b76..00000000000
--- a/ngraph/test/backend/convert_like.in.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "gtest/gtest.h"
-#include "ngraph/ngraph.hpp"
-#include "util/engine/test_engines.hpp"
-#include "util/test_case.hpp"
-#include "util/test_control.hpp"
-
-NGRAPH_SUPPRESS_DEPRECATED_START
-
-using namespace std;
-using namespace ngraph;
-
-static string s_manifest = "${MANIFEST}";
-using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
-
-NGRAPH_TEST(${BACKEND_NAME}, convert_like_float32_int32)
-{
-    Shape input_shape{2, 3, 1};
-    const auto data = make_shared<op::Parameter>(element::f32, input_shape);
-    const auto like = make_shared<op::Parameter>(element::i32, input_shape);
-    const auto convert_like = make_shared<op::v1::ConvertLike>(data, like);
-    const auto f = make_shared<Function>(convert_like, ParameterVector{data, like});
-
-    vector<float> data_vect = {-1.8, 0.2f, 1.4f, 2.1f, 3.9f, 4.3f};
-    vector<int32_t> like_vect(shape_size(input_shape), 0);
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<float>(input_shape, data_vect);
-    test_case.add_input<int32_t>(input_shape, like_vect);
-    test_case.add_expected_output<int>(input_shape, {-1, 0, 1, 2, 3, 4});
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, convert_like_int32_float32)
-{
-    Shape shape{2, 2};
-    const auto data = make_shared<op::Parameter>(element::i32, shape);
-    const auto like = make_shared<op::Parameter>(element::f32, shape);
-    auto f = make_shared<Function>(make_shared<op::v1::ConvertLike>(data, like),
-                                   ParameterVector{data, like});
-
-    vector<int32_t> data_vect{281, 2, 3, 4};
-    vector<float> like_vect(shape_size(shape), 0);
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<int32_t>(shape, data_vect);
-    test_case.add_input<float>(shape, like_vect);
-    test_case.add_expected_output<float>(shape, {281, 2, 3, 4});
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, convert_like_uint16_float32)
-{
-    Shape shape{2, 2};
-    const auto data = make_shared<op::Parameter>(element::u16, shape);
-    const auto like = make_shared<op::Parameter>(element::f32, shape);
-    auto f = make_shared<Function>(make_shared<op::v1::ConvertLike>(data, like),
-                                   ParameterVector{data, like});
-
-    vector<uint16_t> data_vect{1, 2, 3, 4};
-    vector<float> like_vect(shape_size(shape), 0);
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<uint16_t>(shape, data_vect);
-    test_case.add_input<float>(shape, like_vect);
-    test_case.add_expected_output<float>(shape, {1, 2, 3, 4});
-    test_case.run(MIN_FLOAT_TOLERANCE_BITS);
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, convert_like_int32_bool)
-{
-    Shape shape{2, 3};
-    const auto data = make_shared<op::Parameter>(element::i32, shape);
-    const auto like = make_shared<op::Parameter>(element::boolean, shape);
-    auto f = make_shared<Function>(make_shared<op::v1::ConvertLike>(data, like),
-                                   ParameterVector{data, like});
-
-    int32_t lowest = std::numeric_limits<int32_t>::lowest();
-    int32_t max = std::numeric_limits<int32_t>::max();
-
-    vector<int32_t> data_vect{0, 12, 23, 0, lowest, max};
-    vector<char> like_vect(shape_size(shape), 0);
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<int32_t>(shape, data_vect);
-    test_case.add_input<char>(shape, like_vect);
-    test_case.add_expected_output<char>(shape, {0, 1, 1, 0, 1, 1});
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, convert_like_float32_bool)
-{
-    Shape shape{3, 3};
-    const auto data = make_shared<op::Parameter>(element::f32, shape);
-    const auto like = make_shared<op::Parameter>(element::boolean, shape);
-    auto f = make_shared<Function>(make_shared<op::v1::ConvertLike>(data, like),
-                                   ParameterVector{data, like});
-
-    float lowest = std::numeric_limits<float>::lowest();
-    float max = std::numeric_limits<float>::max();
-    float min = std::numeric_limits<float>::min();
-    float pos_inf = std::numeric_limits<float>::infinity();
-    float neg_inf = -std::numeric_limits<float>::infinity();
-
-    vector<float> data_vect{0.f, 1.5745f, 0.12352f, 0.f, lowest, max, min, pos_inf, neg_inf};
-    vector<char> like_vect(shape_size(shape), 0);
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<float>(shape, data_vect);
-    test_case.add_input<char>(shape, like_vect);
-    test_case.add_expected_output<char>(shape, {0, 1, 1, 0, 1, 1, 1, 1, 1});
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, convert_like_float32_bfloat16)
-{
-    Shape shape{1, 1, 3, 5};
-    const auto data = make_shared<op::Parameter>(element::f32, shape);
-    const auto like = make_shared<op::Parameter>(element::bf16, shape);
-    auto f = make_shared<Function>(make_shared<op::v1::ConvertLike>(data, like),
-                                   ParameterVector{data, like});
-
-    vector<float> data_vect{
-        0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f};
-    vector<bfloat16> like_vect(shape_size(shape), 0);
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<float>(shape, data_vect);
-    test_case.add_input<bfloat16>(shape, like_vect);
-    test_case.add_expected_output<bfloat16>(
-        shape,
-        vector<bfloat16>{
-            0.5, 1.5, 0.5, 2.5, 1.5, 0.5, 3.5, 2.5, 0.5, 0.5, 2.5, 0.5, 0.5, 0.5, 1.5});
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, convert_like_bfloat16_float32)
-{
-    Shape shape_data{1, 1, 3, 5};
-    Shape shape_like{4};
-    const auto data = make_shared<op::Parameter>(element::bf16, shape_data);
-    const auto like = make_shared<op::Parameter>(element::f32, shape_like);
-    auto f = make_shared<Function>(make_shared<op::v1::ConvertLike>(data, like),
-                                   ParameterVector{data, like});
-
-    vector<bfloat16> data_vect{
-        0.5f, 1.5f, 0.5f, 2.5f, 1.5f, 0.5f, 3.5f, 2.5f, 0.5f, 0.5f, 2.5f, 0.5f, 0.5f, 0.5f, 1.5f};
-    vector<float> like_vect(shape_size(shape_like), 0);
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<bfloat16>(shape_data, data_vect);
-    test_case.add_input<float>(shape_like, like_vect);
-    test_case.add_expected_output<float>(
-        shape_data, {0.5, 1.5, 0.5, 2.5, 1.5, 0.5, 3.5, 2.5, 0.5, 0.5, 2.5, 0.5, 0.5, 0.5, 1.5});
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, convert_like_dyn_float16_to_int64)
-{
-    PartialShape pshape_data{Dimension::dynamic(), 2, 2, Dimension::dynamic()};
-    Shape shape_like{};
-    const auto data = make_shared<op::Parameter>(element::f16, pshape_data);
-    const auto like = op::Constant::create(element::i64, Shape{}, {0});
-    auto f =
-        make_shared<Function>(make_shared<op::v1::ConvertLike>(data, like), ParameterVector{data});
-
-    vector<float16> data_vect = {-3.21f, 0.1f, 2.6f, 4.99f};
-    Shape shape_data{1, 2, 2, 1};
-
-    auto test_case = test::TestCase<TestEngine, ngraph::test::TestCaseType::DYNAMIC>(f);
-    test_case.add_input<float16>(shape_data, data_vect);
-    test_case.add_expected_output<int64_t>(shape_data, vector<int64_t>{-3, 0, 2, 4});
-    test_case.run();
-}
diff --git a/ngraph/test/backend/erf.in.cpp b/ngraph/test/backend/erf.in.cpp
deleted file mode 100644
index 7af553ceacb..00000000000
--- a/ngraph/test/backend/erf.in.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <algorithm>
-#include <cinttypes>
-#include <cmath>
-#include <cstdlib>
-#include <random>
-#include <string>
-
-// clang-format off
-#ifdef ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
-#define DEFAULT_FLOAT_TOLERANCE_BITS ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
-#endif
-
-#ifdef ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
-#define DEFAULT_DOUBLE_TOLERANCE_BITS ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
-#endif
-// clang-format on
-
-#include "gtest/gtest.h"
-#include "ngraph/ngraph.hpp"
-#include "util/engine/test_engines.hpp"
-#include "util/test_case.hpp"
-#include "util/test_control.hpp"
-
-using namespace std;
-using namespace ngraph;
-
-static string s_manifest = "${MANIFEST}";
-using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
-
-NGRAPH_TEST(${BACKEND_NAME}, erf)
-{
-    Shape shape{8};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto f = make_shared<Function>(make_shared<op::Erf>(A), ParameterVector{A});
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<float>({-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f});
-    test_case.add_expected_output<float>(shape,
-                                         {erf(-4.0f),
-                                          erf(-3.0f),
-                                          erf(-2.0f),
-                                          erf(-1.0f),
-                                          erf(0.0f),
-                                          erf(1.0f),
-                                          erf(2.0f),
-                                          erf(3.0f)});
-    test_case.run();
-}
diff --git a/ngraph/test/backend/logical_and.in.cpp b/ngraph/test/backend/logical_and.in.cpp
deleted file mode 100644
index 20ac74bedb2..00000000000
--- a/ngraph/test/backend/logical_and.in.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "gtest/gtest.h"
-#include "ngraph/ngraph.hpp"
-#include "util/engine/test_engines.hpp"
-#include "util/test_case.hpp"
-#include "util/test_control.hpp"
-
-NGRAPH_SUPPRESS_DEPRECATED_START
-
-using namespace std;
-using namespace ngraph;
-
-static string s_manifest = "${MANIFEST}";
-using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
-
-NGRAPH_TEST(${BACKEND_NAME}, logical_and)
-{
-    Shape shape{3, 4};
-    auto A = make_shared<op::Parameter>(element::boolean, shape);
-    auto B = make_shared<op::Parameter>(element::boolean, shape);
-    auto f =
-        make_shared<Function>(std::make_shared<op::v1::LogicalAnd>(A, B), ParameterVector{A, B});
-
-    std::vector<bool> a{true, true, true, true, true, false, true, false, false, true, true, true};
-    std::vector<bool> b{true, true, true, true, true, false, true, false, false, true, true, false};
-
-    auto test_case_1 = test::TestCase<TestEngine>(f);
-    test_case_1.add_multiple_inputs<bool>({a, b});
-    test_case_1.add_expected_output<float>(shape, {1., 1., 1., 1., 1., 0., 1., 0., 0., 1., 1., 0.});
-    test_case_1.run();
-}
diff --git a/ngraph/test/backend/roi_pooling.in.cpp b/ngraph/test/backend/roi_pooling.in.cpp
deleted file mode 100644
index b37cf3c37b6..00000000000
--- a/ngraph/test/backend/roi_pooling.in.cpp
+++ /dev/null
@@ -1,203 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "gtest/gtest.h"
-#include "ngraph/ngraph.hpp"
-#include "util/engine/test_engines.hpp"
-#include "util/test_case.hpp"
-#include "util/test_control.hpp"
-
-NGRAPH_SUPPRESS_DEPRECATED_START
-
-using namespace std;
-using namespace ngraph;
-
-static string s_manifest = "${MANIFEST}";
-using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
-
-NGRAPH_TEST(${BACKEND_NAME}, roi_pooling_1x1_max)
-{
-    const int H = 6;
-    const int W = 6;
-    const int image_size = H * W;
-    const int channels = 3;
-    const int num_rois = 3;
-
-    const int pooled_h = 1;
-    const int pooled_w = 1;
-    const float spatial_scale = 1.f;
-
-    Shape feat_maps_shape{1, channels, H, W};
-    Shape rois_shape{num_rois, 5};
-    Shape pooled_shape{pooled_h, pooled_w};
-    Shape output_shape{num_rois, channels, pooled_h, pooled_w};
-
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, feat_maps_shape);
-    const auto rois = make_shared<op::Parameter>(element::f32, rois_shape);
-    const auto roi_pooling =
-        make_shared<op::v0::ROIPooling>(feat_maps, rois, pooled_shape, spatial_scale, "max");
-    const auto f = make_shared<Function>(roi_pooling, ParameterVector{feat_maps, rois});
-
-    vector<float> feat_maps_vect;
-    for (unsigned int i = 0; i < channels * image_size; i++)
-    {
-        feat_maps_vect.push_back(1.f * i / 10);
-    }
-
-    vector<float> rois_vect = {0, 1, 1, 2, 3, 0, 1, 1, 2, 3, 0, 1, 1, 2, 3};
-
-    const vector<float> expected_vect = {2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f, 2.0f, 5.6f, 9.2f};
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<float>(feat_maps_shape, feat_maps_vect);
-    test_case.add_input<float>(rois_shape, rois_vect);
-    test_case.add_expected_output<float>(output_shape, expected_vect);
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, roi_pooling_2x2_max)
-{
-    const int H = 6;
-    const int W = 6;
-    const int image_size = H * W;
-    const int channels = 1;
-    const int num_rois = 3;
-
-    const int pooled_h = 2;
-    const int pooled_w = 2;
-    const float spatial_scale = 1.f;
-
-    Shape feat_maps_shape{1, channels, H, W};
-    Shape rois_shape{num_rois, 5};
-    Shape pooled_shape{pooled_h, pooled_w};
-    Shape output_shape{num_rois, channels, pooled_h, pooled_w};
-
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, feat_maps_shape);
-    const auto rois = make_shared<op::Parameter>(element::f32, rois_shape);
-    const auto roi_pooling =
-        make_shared<op::v0::ROIPooling>(feat_maps, rois, pooled_shape, spatial_scale, "max");
-    const auto f = make_shared<Function>(roi_pooling, ParameterVector{feat_maps, rois});
-
-    vector<float> feat_maps_vect;
-    for (unsigned int i = 0; i < channels * image_size; i++)
-    {
-        feat_maps_vect.push_back(1.f * i / 10);
-    }
-
-    vector<float> rois_vect = {0, 1, 1, 3, 3, 0, 1, 2, 2, 4, 0, 0, 1, 4, 5};
-
-    const vector<float> expected_vect = {
-        1.4f, 1.5f, 2.0f, 2.1f, 1.9f, 2.0f, 2.5f, 2.6f, 2.0f, 2.2f, 3.2f, 3.4f};
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<float>(feat_maps_shape, feat_maps_vect);
-    test_case.add_input<float>(rois_shape, rois_vect);
-    test_case.add_expected_output<float>(output_shape, expected_vect);
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, roi_pooling_1x1_bilinear)
-{
-    const int H = 6;
-    const int W = 6;
-    const int image_size = H * W;
-    const int channels = 3;
-    const int num_rois = 2;
-
-    const int pooled_h = 1;
-    const int pooled_w = 1;
-    const float spatial_scale = 1.f;
-
-    Shape feat_maps_shape{1, channels, H, W};
-    Shape rois_shape{num_rois, 5};
-    Shape pooled_shape{pooled_h, pooled_w};
-    Shape output_shape{num_rois, channels, pooled_h, pooled_w};
-
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, feat_maps_shape);
-    const auto rois = make_shared<op::Parameter>(element::f32, rois_shape);
-    const auto roi_pooling =
-        make_shared<op::v0::ROIPooling>(feat_maps, rois, pooled_shape, spatial_scale, "bilinear");
-    const auto f = make_shared<Function>(roi_pooling, ParameterVector{feat_maps, rois});
-
-    vector<float> feat_maps_vect;
-    for (unsigned int i = 0; i < channels * image_size; i++)
-    {
-        feat_maps_vect.push_back(1.f * i / 10);
-    }
-
-    vector<float> rois_vect = {0, 0.2, 0.2, 0.4, 0.4, 0, 0.2, 0.2, 0.6, 0.6};
-
-    const vector<float> expected_vect = {1.05f, 4.65f, 8.25f, 1.4f, 5.0f, 8.6f};
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<float>(feat_maps_shape, feat_maps_vect);
-    test_case.add_input<float>(rois_shape, rois_vect);
-    test_case.add_expected_output<float>(output_shape, expected_vect);
-    test_case.run();
-}
-
-NGRAPH_TEST(${BACKEND_NAME}, roi_pooling_2x2_bilinear)
-{
-    const int H = 8;
-    const int W = 8;
-    const int image_size = H * W;
-    const int channels = 1;
-    const int num_rois = 3;
-
-    const int pooled_h = 2;
-    const int pooled_w = 2;
-    const float spatial_scale = 1.f;
-
-    Shape feat_maps_shape{1, channels, H, W};
-    Shape rois_shape{num_rois, 5};
-    Shape pooled_shape{pooled_h, pooled_w};
-    Shape output_shape{num_rois, channels, pooled_h, pooled_w};
-
-    const auto feat_maps = make_shared<op::Parameter>(element::f32, feat_maps_shape);
-    const auto rois = make_shared<op::Parameter>(element::f32, rois_shape);
-    const auto roi_pooling =
-        make_shared<op::v0::ROIPooling>(feat_maps, rois, pooled_shape, spatial_scale, "bilinear");
-    const auto f = make_shared<Function>(roi_pooling, ParameterVector{feat_maps, rois});
-
-    vector<float> feat_maps_vect;
-    for (unsigned int i = 0; i < channels * image_size; i++)
-    {
-        feat_maps_vect.push_back(1.f * i / 10);
-    }
-
-    vector<float> rois_vect = {0.f,
-                               0.15f,
-                               0.2f,
-                               0.75f,
-                               0.8f,
-                               0.f,
-                               0.15f,
-                               0.2f,
-                               0.75f,
-                               0.8f,
-                               0.f,
-                               0.15f,
-                               0.2f,
-                               0.75f,
-                               0.8f};
-
-    const vector<float> expected_vect = {1.225f,
-                                         1.645f,
-                                         4.585f,
-                                         5.005f,
-                                         1.225f,
-                                         1.645f,
-                                         4.585f,
-                                         5.005f,
-                                         1.225f,
-                                         1.645f,
-                                         4.585f,
-                                         5.005f};
-
-    auto test_case = test::TestCase<TestEngine>(f);
-    test_case.add_input<float>(feat_maps_shape, feat_maps_vect);
-    test_case.add_input<float>(rois_shape, rois_vect);
-    test_case.add_expected_output<float>(output_shape, expected_vect);
-    test_case.run();
-}
diff --git a/ngraph/test/backend/tan.in.cpp b/ngraph/test/backend/tan.in.cpp
deleted file mode 100644
index 6664006ad00..00000000000
--- a/ngraph/test/backend/tan.in.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <algorithm>
-#include <cinttypes>
-#include <cmath>
-#include <cstdlib>
-#include <random>
-#include <string>
-
-// clang-format off
-#ifdef ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
-#define DEFAULT_FLOAT_TOLERANCE_BITS ${BACKEND_NAME}_FLOAT_TOLERANCE_BITS
-#endif
-
-#ifdef ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
-#define DEFAULT_DOUBLE_TOLERANCE_BITS ${BACKEND_NAME}_DOUBLE_TOLERANCE_BITS
-#endif
-// clang-format on
-
-#include "gtest/gtest.h"
-#include "runtime/backend.hpp"
-#include "ngraph/runtime/tensor.hpp"
-#include "ngraph/ngraph.hpp"
-#include "util/all_close.hpp"
-#include "util/all_close_f.hpp"
-#include "util/ndarray.hpp"
-#include "util/test_control.hpp"
-#include "util/test_tools.hpp"
-
-using namespace std;
-using namespace ngraph;
-
-static string s_manifest = "${MANIFEST}";
-
-NGRAPH_TEST(${BACKEND_NAME}, tan)
-{
-    Shape shape{11};
-    auto A = make_shared<op::Parameter>(element::f32, shape);
-    auto f = make_shared<Function>(make_shared<op::Tan>(A), ParameterVector{A});
-
-    auto backend = runtime::Backend::create("${BACKEND_NAME}");
-
-    // Create some tensors for input/output
-    auto a = backend->create_tensor(element::f32, shape);
-    vector<float> input{0.f, 0.25f, -0.25f, 0.5f, -0.5f, 1.f, -1.f, 2.f, -2.f, 4.f, -4.f};
-    copy_data(a, input);
-    auto result = backend->create_tensor(element::f32, shape);
-    auto handle = backend->compile(f);
-    handle->call_with_validate({result}, {a});
-    EXPECT_TRUE(test::all_close_f(vector<float>{0.00000000f,
-                                                0.25534192f,
-                                                -0.25534192f,
-                                                0.54630249f,
-                                                -0.54630249f,
-                                                1.55740772f,
-                                                -1.55740772f,
-                                                -2.18503986f,
-                                                2.18503986f,
-                                                1.15782128f,
-                                                -1.15782128f},
-                                  read_vector<float>(result)));
-}
diff --git a/ngraph/test/constant.cpp b/ngraph/test/constant.cpp
index 5de4e73eb9a..7d507575d9f 100644
--- a/ngraph/test/constant.cpp
+++ b/ngraph/test/constant.cpp
@@ -7,6 +7,8 @@
 #include <gtest/gtest.h>
 
 #include "ngraph/ngraph.hpp"
+#include "ngraph/runtime/host_tensor.hpp"
+#include "runtime/ie/ie_tensor.hpp"
 #include "util/type_prop.hpp"
 
 using namespace ngraph;
@@ -1811,3 +1813,31 @@ TEST(constant, bad_get_data_ptr)
         EXPECT_HAS_SUBSTRING(error.what(), std::string("get_data_ptr"));
     }
 }
+
+TEST(constant, hold_host_tensor)
+{
+    Shape shape{4};
+    void* hostDataPtr = nullptr;
+    std::shared_ptr<op::Constant> constOp;
+    {
+        auto tensor = std::make_shared<runtime::HostTensor>(element::f32, Shape{1, 2, 3, 3});
+        hostDataPtr = tensor->get_data_ptr();
+        constOp = std::make_shared<op::Constant>(tensor);
+    }
+    const void* constDataPtr = constOp->get_data_ptr();
+    ASSERT_EQ(constDataPtr, hostDataPtr);
+}
+
+TEST(constant, copy_unknown_tensor)
+{
+    Shape shape{4};
+    void* hostDataPtr = nullptr;
+    std::shared_ptr<op::Constant> constOp;
+    {
+        auto tensor = std::make_shared<runtime::ie::IETensor>(element::f32, Shape{1, 2, 3, 3});
+        hostDataPtr = const_cast<void*>(tensor->get_data_ptr());
+        constOp = std::make_shared<op::Constant>(tensor);
+    }
+    const void* constDataPtr = constOp->get_data_ptr();
+    ASSERT_NE(constDataPtr, hostDataPtr);
+}
diff --git a/ngraph/test/constant_folding.cpp b/ngraph/test/constant_folding.cpp
index d7efe623708..1f3c00b684b 100644
--- a/ngraph/test/constant_folding.cpp
+++ b/ngraph/test/constant_folding.cpp
@@ -2274,6 +2274,75 @@ TEST(constant_folding, constant_dyn_reshape_shape_not_originally_constant)
     ASSERT_TRUE(test::all_close_f(values_in, values_out, MIN_FLOAT_TOLERANCE_BITS));
 }
 
+TEST(constant_folding, const_reshape_no_data_copy)
+{
+    auto const_data = op::Constant::create(element::f32, Shape{1, 64}, {1});
+    auto const_reshape = op::Constant::create(element::i64, Shape{2}, {2, 32});
+    auto reshape = std::make_shared<op::v1::Reshape>(const_data, const_reshape, false);
+    auto consumer1 = std::make_shared<op::Relu>(reshape);
+    auto consumer2 = std::make_shared<op::Relu>(reshape);
+
+    auto f = std::make_shared<Function>(NodeVector{consumer1, consumer2}, ParameterVector{});
+
+    pass::Manager pass_manager;
+    pass_manager.register_pass<pass::ConstantFolding>();
+    pass_manager.run_passes(f);
+
+    auto const1 = std::dynamic_pointer_cast<op::Constant>(consumer1->input_value(0).get_node_shared_ptr());
+    auto const2 = std::dynamic_pointer_cast<op::Constant>(consumer2->input_value(0).get_node_shared_ptr());
+
+    ASSERT_TRUE(const1);
+    ASSERT_TRUE(const2);
+    ASSERT_EQ(const1, const2);
+    ASSERT_EQ(const1->get_data_ptr(), const2->get_data_ptr());
+}
+
+TEST(constant_folding, const_squeeze_no_data_copy)
+{
+    auto const_data = op::Constant::create(element::f32, Shape{1, 64}, {1});
+    auto const_reshape = op::Constant::create(element::i64, Shape{1}, {0});
+    auto reshape = std::make_shared<op::v0::Squeeze>(const_data, const_reshape);
+    auto consumer1 = std::make_shared<op::Relu>(reshape);
+    auto consumer2 = std::make_shared<op::Relu>(reshape);
+
+    auto f = std::make_shared<Function>(NodeVector{consumer1, consumer2}, ParameterVector{});
+
+    pass::Manager pass_manager;
+    pass_manager.register_pass<pass::ConstantFolding>();
+    pass_manager.run_passes(f);
+
+    auto const1 = std::dynamic_pointer_cast<op::Constant>(consumer1->input_value(0).get_node_shared_ptr());
+    auto const2 = std::dynamic_pointer_cast<op::Constant>(consumer2->input_value(0).get_node_shared_ptr());
+
+    ASSERT_TRUE(const1);
+    ASSERT_TRUE(const2);
+    ASSERT_EQ(const1, const2);
+    ASSERT_EQ(const1->get_data_ptr(), const2->get_data_ptr());
+}
+
+TEST(constant_folding, const_unsqueeze_no_data_copy)
+{
+    auto const_data = op::Constant::create(element::f32, Shape{1, 64}, {1});
+    auto const_reshape = op::Constant::create(element::i64, Shape{1}, {0});
+    auto reshape = std::make_shared<op::v0::Unsqueeze>(const_data, const_reshape);
+    auto consumer1 = std::make_shared<op::Relu>(reshape);
+    auto consumer2 = std::make_shared<op::Relu>(reshape);
+
+    auto f = std::make_shared<Function>(NodeVector{consumer1, consumer2}, ParameterVector{});
+
+    pass::Manager pass_manager;
+    pass_manager.register_pass<pass::ConstantFolding>();
+    pass_manager.run_passes(f);
+
+    auto const1 = std::dynamic_pointer_cast<op::Constant>(consumer1->input_value(0).get_node_shared_ptr());
+    auto const2 = std::dynamic_pointer_cast<op::Constant>(consumer2->input_value(0).get_node_shared_ptr());
+
+    ASSERT_TRUE(const1);
+    ASSERT_TRUE(const2);
+    ASSERT_EQ(const1, const2);
+    ASSERT_EQ(const1->get_data_ptr(), const2->get_data_ptr());
+}
+
 TEST(constant_folding, constant_transpose)
 {
     Shape shape_in{2, 4};
diff --git a/ngraph/test/frontend/CMakeLists.txt b/ngraph/test/frontend/CMakeLists.txt
index ba20be7d338..67ffb5a992a 100644
--- a/ngraph/test/frontend/CMakeLists.txt
+++ b/ngraph/test/frontend/CMakeLists.txt
@@ -1,6 +1,11 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
+add_subdirectory(shared)
+
+if (NGRAPH_PDPD_FRONTEND_ENABLE)
+    add_subdirectory(paddlepaddle)
+endif()
 
 set(SRC ${CMAKE_CURRENT_SOURCE_DIR}/mock_frontend.cpp)
 add_library(mock1_ngraph_frontend SHARED ${SRC})
diff --git a/ngraph/test/frontend/paddlepaddle/CMakeLists.txt b/ngraph/test/frontend/paddlepaddle/CMakeLists.txt
new file mode 100644
index 00000000000..52b898192d0
--- /dev/null
+++ b/ngraph/test/frontend/paddlepaddle/CMakeLists.txt
@@ -0,0 +1,70 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(TARGET_NAME "paddlepaddle_tests")
+
+file(GLOB_RECURSE SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
+
+add_executable(${TARGET_NAME} ${SRC})
+
+target_link_libraries(${TARGET_NAME} PRIVATE frontend_shared_test_classes)
+
+add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
+
+install(TARGETS ${TARGET_NAME}
+        RUNTIME DESTINATION tests
+        COMPONENT tests
+        EXCLUDE_FROM_ALL)
+
+# Test model generating
+ie_check_pip_package(paddlepaddle WARNING)
+
+set(TEST_PADDLE_MODELS_DIRNAME test_model_zoo/paddle_test_models)
+target_compile_definitions(${TARGET_NAME} PRIVATE -D TEST_PADDLE_MODELS_DIRNAME=\"${TEST_PADDLE_MODELS_DIRNAME}/\")
+
+# If 'paddlepaddle' is not found, code will still be compiled
+# but models will not be generated and tests will fail
+# This is done this way for 'code style' and check cases - cmake shall pass, but CI machine doesn't need to have
+# 'paddlepaddle' installed to check code style
+if (paddlepaddle_FOUND)
+    set(TEST_PADDLE_MODELS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TEST_PADDLE_MODELS_DIRNAME}/)
+
+    file(GLOB_RECURSE PADDLE_GEN_SCRIPTS ${CMAKE_CURRENT_SOURCE_DIR}/test_models/gen_scripts/generate_*.py)
+    file(GLOB_RECURSE PADDLE_ALL_SCRIPTS ${CMAKE_CURRENT_SOURCE_DIR}/*.py)
+    set(OUT_FILES "")
+    foreach(GEN_SCRIPT ${PADDLE_GEN_SCRIPTS})
+        get_filename_component(FILE_WE ${GEN_SCRIPT} NAME_WE)
+        set(OUT_DONE_FILE ${TEST_PADDLE_MODELS}/${FILE_WE}_done.txt)
+        set(OUT_FILES ${OUT_DONE_FILE} ${OUT_FILES})
+        add_custom_command(OUTPUT ${OUT_DONE_FILE}
+                COMMAND ${PYTHON_EXECUTABLE}
+                ${CMAKE_CURRENT_SOURCE_DIR}/test_models/gen_wrapper.py
+                ${GEN_SCRIPT}
+                ${TEST_PADDLE_MODELS}
+                ${OUT_DONE_FILE}
+                DEPENDS ${PADDLE_ALL_SCRIPTS}
+                )
+    endforeach()
+    add_custom_target(paddlepaddle_test_models DEPENDS ${OUT_FILES})
+
+    install(DIRECTORY ${TEST_PADDLE_MODELS}
+            DESTINATION tests/${TEST_PADDLE_MODELS_DIRNAME}
+            COMPONENT tests
+            EXCLUDE_FROM_ALL)
+else()
+    # Produce warning message at build time as well
+    add_custom_command(OUTPUT unable_build_paddle_models.txt
+            COMMAND ${CMAKE_COMMAND}
+            -E cmake_echo_color --red "Warning: Unable to generate PaddlePaddle test models. Running '${TARGET_NAME}' will likely fail"
+            )
+    add_custom_target(paddlepaddle_test_models DEPENDS unable_build_paddle_models.txt)
+endif()
+
+add_dependencies(${TARGET_NAME} paddlepaddle_test_models)
+add_dependencies(${TARGET_NAME} paddlepaddle_ngraph_frontend)
+
+# Fuzzy tests for PaddlePaddle use IE_CPU engine
+if (ENABLE_MKL_DNN)
+    add_dependencies(${TARGET_NAME} MKLDNNPlugin)
+endif()
\ No newline at end of file
diff --git a/ngraph/test/frontend/paddlepaddle/basic_api.cpp b/ngraph/test/frontend/paddlepaddle/basic_api.cpp
index a2568000f31..f1df45c0996 100644
--- a/ngraph/test/frontend/paddlepaddle/basic_api.cpp
+++ b/ngraph/test/frontend/paddlepaddle/basic_api.cpp
@@ -3,12 +3,11 @@
 //
 
 #include "basic_api.hpp"
+#include "paddle_utils.hpp"
 
 using namespace ngraph;
 using namespace ngraph::frontend;
 
-static const std::string PDPD = "pdpd";
-
 using PDPDBasicTest = FrontEndBasicTest;
 
 static const std::vector<std::string> models{
@@ -20,9 +19,10 @@ static const std::vector<std::string> models{
     std::string("2in_2out_dynbatch/2in_2out_dynbatch.pdmodel"),
 };
 
-INSTANTIATE_TEST_SUITE_P(PDPDBasicTest,
-                         FrontEndBasicTest,
-                         ::testing::Combine(::testing::Values(PDPD),
-                                            ::testing::Values(std::string(TEST_PDPD_MODELS)),
-                                            ::testing::ValuesIn(models)),
-                         FrontEndBasicTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(
+    PDPDBasicTest,
+    FrontEndBasicTest,
+    ::testing::Combine(::testing::Values(PADDLE_FE),
+                       ::testing::Values(std::string(TEST_PADDLE_MODELS_DIRNAME)),
+                       ::testing::ValuesIn(models)),
+    FrontEndBasicTest::getTestCaseName);
diff --git a/ngraph/test/frontend/paddlepaddle/convert_model.cpp b/ngraph/test/frontend/paddlepaddle/convert_model.cpp
new file mode 100644
index 00000000000..1fc0ba8f6d8
--- /dev/null
+++ b/ngraph/test/frontend/paddlepaddle/convert_model.cpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_model.hpp"
+#include "paddle_utils.hpp"
+
+using namespace ngraph;
+using namespace ngraph::frontend;
+
+using PDPDConvertModelTest = FrontEndConvertModelTest;
+
+static const std::vector<std::string> models{
+    std::string("conv2d"),
+    std::string("conv2d_s/conv2d.pdmodel"),
+    std::string("conv2d_relu/conv2d_relu.pdmodel"),
+    std::string("2in_2out/2in_2out.pdmodel"),
+    std::string("multi_tensor_split/multi_tensor_split.pdmodel"),
+    std::string("2in_2out_dynbatch/2in_2out_dynbatch.pdmodel"),
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    PDPDConvertModelTest,
+    FrontEndConvertModelTest,
+    ::testing::Combine(::testing::Values(PADDLE_FE),
+                       ::testing::Values(std::string(TEST_PADDLE_MODELS_DIRNAME)),
+                       ::testing::ValuesIn(models)),
+    FrontEndConvertModelTest::getTestCaseName);
diff --git a/ngraph/test/frontend/paddlepaddle/convert_unsupported.cpp b/ngraph/test/frontend/paddlepaddle/convert_unsupported.cpp
new file mode 100644
index 00000000000..735a6e671f5
--- /dev/null
+++ b/ngraph/test/frontend/paddlepaddle/convert_unsupported.cpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <frontend_manager/frontend_exceptions.hpp>
+#include <frontend_manager/frontend_manager.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "paddle_utils.hpp"
+#include "utils.hpp"
+
+using namespace ngraph;
+using namespace ngraph::frontend;
+
+TEST(FrontEndConvertModelTest, test_unsupported_op)
+{
+    FrontEndManager fem;
+    FrontEnd::Ptr frontEnd;
+    InputModel::Ptr inputModel;
+    ASSERT_NO_THROW(frontEnd = fem.load_by_framework(PADDLE_FE));
+    ASSERT_NE(frontEnd, nullptr);
+    auto model_filename = FrontEndTestUtils::make_model_path(
+        std::string(TEST_PADDLE_MODELS_DIRNAME) +
+        std::string("relu_unsupported/relu_unsupported.pdmodel"));
+    ASSERT_NO_THROW(inputModel = frontEnd->load(model_filename));
+    ASSERT_NE(inputModel, nullptr);
+    std::shared_ptr<ngraph::Function> function;
+    ASSERT_THROW(function = frontEnd->convert(inputModel), OpConversionFailure);
+    ASSERT_EQ(function, nullptr);
+    ASSERT_NO_THROW(function = frontEnd->decode(inputModel));
+    ASSERT_THROW(frontEnd->convert(function), OpConversionFailure);
+    ASSERT_NO_THROW(function = frontEnd->convert_partially(inputModel));
+    ASSERT_THROW(frontEnd->convert(function), OpConversionFailure);
+
+    for (auto& node : function->get_ordered_ops())
+    {
+        if (node->get_friendly_name() == "rxyz_0.tmp_0")
+        {
+            function->replace_node(
+                node, std::make_shared<opset6::Relu>(node->input(0).get_source_output()));
+        }
+    }
+    ASSERT_NO_THROW(frontEnd->convert(function));
+}
diff --git a/ngraph/test/frontend/paddlepaddle/cut_specific_model.cpp b/ngraph/test/frontend/paddlepaddle/cut_specific_model.cpp
index 04826bce96f..ca77e8d47ee 100644
--- a/ngraph/test/frontend/paddlepaddle/cut_specific_model.cpp
+++ b/ngraph/test/frontend/paddlepaddle/cut_specific_model.cpp
@@ -3,19 +3,18 @@
 //
 
 #include "cut_specific_model.hpp"
+#include "paddle_utils.hpp"
 
 using namespace ngraph;
 using namespace ngraph::frontend;
 
-static const auto PDPD = "pdpd";
-
 using PDPDCutTest = FrontEndCutModelTest;
 
 static CutModelParam getTestData_2in_2out()
 {
     CutModelParam res;
-    res.m_frontEndName = PDPD;
-    res.m_modelsPath = std::string(TEST_PDPD_MODELS);
+    res.m_frontEndName = PADDLE_FE;
+    res.m_modelsPath = std::string(TEST_PADDLE_MODELS_DIRNAME);
     res.m_modelName = "2in_2out/2in_2out.pdmodel";
     res.m_oldInputs = {"inputX1", "inputX2"};
     res.m_newInputs = {"add1.tmp_0"};
diff --git a/ngraph/test/frontend/paddlepaddle/incorrect_cut_model.cpp b/ngraph/test/frontend/paddlepaddle/incorrect_cut_model.cpp
new file mode 100644
index 00000000000..42cee6c6c70
--- /dev/null
+++ b/ngraph/test/frontend/paddlepaddle/incorrect_cut_model.cpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <frontend_manager/frontend_exceptions.hpp>
+#include <frontend_manager/frontend_manager.hpp>
+
+#include "paddle_utils.hpp"
+#include "utils.hpp"
+
+using namespace ngraph;
+using namespace ngraph::frontend;
+
+TEST(FrontEndIncorrectCutModelTest, test_incorrect_cut)
+{
+    FrontEndManager fem;
+    FrontEnd::Ptr frontEnd;
+    InputModel::Ptr inputModel;
+    ASSERT_NO_THROW(frontEnd = fem.load_by_framework(PADDLE_FE));
+    ASSERT_NE(frontEnd, nullptr);
+    auto model_filename = FrontEndTestUtils::make_model_path(
+        std::string(TEST_PADDLE_MODELS_DIRNAME) + std::string("2in_2out/2in_2out.pdmodel"));
+    ASSERT_NO_THROW(inputModel = frontEnd->load(model_filename));
+    ASSERT_NE(inputModel, nullptr);
+
+    // remove second input
+    inputModel->override_all_inputs({inputModel->get_inputs()[0]});
+
+    std::shared_ptr<ngraph::Function> function;
+    ASSERT_THROW(function = frontEnd->convert(inputModel), GeneralFailure);
+    ASSERT_EQ(function, nullptr);
+}
\ No newline at end of file
diff --git a/ngraph/test/frontend/paddlepaddle/load_from.cpp b/ngraph/test/frontend/paddlepaddle/load_from.cpp
index b8865b6df6e..54011c71481 100644
--- a/ngraph/test/frontend/paddlepaddle/load_from.cpp
+++ b/ngraph/test/frontend/paddlepaddle/load_from.cpp
@@ -3,19 +3,18 @@
 //
 
 #include "load_from.hpp"
+#include "paddle_utils.hpp"
 
 using namespace ngraph;
 using namespace ngraph::frontend;
 
-static const auto PDPD = "pdpd";
-
 using PDPDCutTest = FrontEndLoadFromTest;
 
 static LoadFromFEParam getTestData()
 {
     LoadFromFEParam res;
-    res.m_frontEndName = PDPD;
-    res.m_modelsPath = std::string(TEST_PDPD_MODELS);
+    res.m_frontEndName = PADDLE_FE;
+    res.m_modelsPath = std::string(TEST_PADDLE_MODELS_DIRNAME);
     res.m_file = "conv2d";
     res.m_files = {"2in_2out/2in_2out.pdmodel", "2in_2out/2in_2out.pdiparams"};
     res.m_stream = "relu/relu.pdmodel";
diff --git a/ngraph/test/frontend/paddlepaddle/main.cpp b/ngraph/test/frontend/paddlepaddle/main.cpp
new file mode 100644
index 00000000000..f3c89e3ae80
--- /dev/null
+++ b/ngraph/test/frontend/paddlepaddle/main.cpp
@@ -0,0 +1,10 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "utils.hpp"
+
+int main(int argc, char** argv)
+{
+    return FrontEndTestUtils::run_tests(argc, argv);
+}
diff --git a/ngraph/test/frontend/paddlepaddle/op_fuzzy.cpp b/ngraph/test/frontend/paddlepaddle/op_fuzzy.cpp
index dd01cc1e2ae..c9915435b11 100644
--- a/ngraph/test/frontend/paddlepaddle/op_fuzzy.cpp
+++ b/ngraph/test/frontend/paddlepaddle/op_fuzzy.cpp
@@ -7,6 +7,7 @@
 #include <cnpy.h>
 #include "ngraph/ngraph.hpp"
 #include "op_fuzzy.hpp"
+#include "paddle_utils.hpp"
 #include "util/engine/test_engines.hpp"
 #include "util/test_control.hpp"
 
@@ -16,7 +17,6 @@ using namespace ngraph;
 using namespace ngraph::frontend;
 using TestEngine = test::IE_CPU_Engine;
 
-static const std::string PDPD = "pdpd";
 using PDPDFuzzyOpTest = FrontEndFuzzyOpTest;
 
 static const std::vector<std::string> models{
@@ -32,9 +32,10 @@ static const std::vector<std::string> models{
     std::string("relu"),
 };
 
-INSTANTIATE_TEST_SUITE_P(PDPDFuzzyOpTest,
-                         FrontEndFuzzyOpTest,
-                         ::testing::Combine(::testing::Values(PDPD),
-                                            ::testing::Values(std::string(TEST_PDPD_MODELS)),
-                                            ::testing::ValuesIn(models)),
-                         PDPDFuzzyOpTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(
+    PDPDFuzzyOpTest,
+    FrontEndFuzzyOpTest,
+    ::testing::Combine(::testing::Values(PADDLE_FE),
+                       ::testing::Values(std::string(TEST_PADDLE_MODELS_DIRNAME)),
+                       ::testing::ValuesIn(models)),
+    PDPDFuzzyOpTest::getTestCaseName);
diff --git a/ngraph/test/frontend/paddlepaddle/paddle_utils.hpp b/ngraph/test/frontend/paddlepaddle/paddle_utils.hpp
new file mode 100644
index 00000000000..270a56e2e9d
--- /dev/null
+++ b/ngraph/test/frontend/paddlepaddle/paddle_utils.hpp
@@ -0,0 +1,10 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+
+static const std::string PADDLE_FE = "paddle";
+
diff --git a/ngraph/test/frontend/paddlepaddle/partial_shape.cpp b/ngraph/test/frontend/paddlepaddle/partial_shape.cpp
index 97989af8ca2..0cd3138dff2 100644
--- a/ngraph/test/frontend/paddlepaddle/partial_shape.cpp
+++ b/ngraph/test/frontend/paddlepaddle/partial_shape.cpp
@@ -3,12 +3,11 @@
 //
 
 #include "partial_shape.hpp"
+#include "paddle_utils.hpp"
 
 using namespace ngraph;
 using namespace ngraph::frontend;
 
-static const auto PDPD = "pdpd";
-
 using PDPDPartialShapeTest = FrontEndPartialShapeTest;
 
 static PartShape getTestShape_2in_2out()
@@ -61,14 +60,14 @@ static PartShape getTestShape_conv2d_relu()
     return res;
 }
 
-INSTANTIATE_TEST_SUITE_P(PDPDPartialShapeTest,
-                         FrontEndPartialShapeTest,
-                         ::testing::Combine(::testing::Values(BaseFEParam{
-                                                PDPD, std::string(TEST_PDPD_MODELS)}),
-                                            ::testing::ValuesIn(std::vector<PartShape>{
-                                                getTestShape_2in_2out(),
-                                                getTestShape_conv2d_relu(),
-                                                getTestShape_conv2d(),
-                                                getTestShape_conv2d_setDynamicBatch(),
-                                                getTestShape_2in_2out_dynbatch()})),
-                         FrontEndPartialShapeTest::getTestCaseName);
\ No newline at end of file
+INSTANTIATE_TEST_SUITE_P(
+    PDPDPartialShapeTest,
+    FrontEndPartialShapeTest,
+    ::testing::Combine(
+        ::testing::Values(BaseFEParam{PADDLE_FE, std::string(TEST_PADDLE_MODELS_DIRNAME)}),
+        ::testing::ValuesIn(std::vector<PartShape>{getTestShape_2in_2out(),
+                                                   getTestShape_conv2d_relu(),
+                                                   getTestShape_conv2d(),
+                                                   getTestShape_conv2d_setDynamicBatch(),
+                                                   getTestShape_2in_2out_dynbatch()})),
+    FrontEndPartialShapeTest::getTestCaseName);
\ No newline at end of file
diff --git a/ngraph/test/frontend/paddlepaddle/places.cpp b/ngraph/test/frontend/paddlepaddle/places.cpp
index a913353d0be..60800085950 100644
--- a/ngraph/test/frontend/paddlepaddle/places.cpp
+++ b/ngraph/test/frontend/paddlepaddle/places.cpp
@@ -4,12 +4,13 @@
 
 #include <frontend/shared/include/utils.hpp>
 #include <frontend_manager/frontend_manager.hpp>
-#include <memory>
 #include "gtest/gtest.h"
+#include "paddle_utils.hpp"
 
 using namespace ngraph::frontend;
 
-const std::string model_file = "place_test_model/place_test_model.pdmodel";
+const std::string model_file =
+    std::string(TEST_PADDLE_MODELS_DIRNAME) + "place_test_model/place_test_model.pdmodel";
 
 /***
 model:
@@ -55,9 +56,11 @@ std::vector<std::string> tensor_names = {
 TEST(PDPD_Places, check_tensor_names)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     for (const auto& tensor_name : tensor_names)
     {
@@ -69,9 +72,11 @@ TEST(PDPD_Places, check_tensor_names)
 TEST(PDPD_Places, check_input_outputs)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     auto inputs = input_model->get_inputs();
     auto outputs = input_model->get_outputs();
@@ -104,9 +109,11 @@ TEST(PDPD_Places, check_input_outputs)
 TEST(PDPD_Places, check_out_port_of_all_ops)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     for (const auto& tensor_name : tensor_names)
     {
@@ -127,9 +134,11 @@ TEST(PDPD_Places, check_out_port_of_all_ops)
 TEST(PDPD_Places, check_in_out_ports_of_model_outputs)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     auto outputs = input_model->get_outputs();
     for (const auto& output : outputs)
@@ -162,9 +171,11 @@ TEST(PDPD_Places, check_in_out_ports_of_model_outputs)
 TEST(PDPD_Places, check_source_target_tensors_of_model_outputs)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     auto outputs = input_model->get_outputs();
     for (const auto& output : outputs)
@@ -197,9 +208,11 @@ TEST(PDPD_Places, check_source_target_tensors_of_model_outputs)
 TEST(PDPD_Places, check_producing_consuming_ops_of_model_outputs)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     auto outputs = input_model->get_outputs();
     for (const auto& output : outputs)
@@ -233,9 +246,11 @@ TEST(PDPD_Places, check_producing_consuming_ops_of_model_outputs)
 TEST(PDPD_Places, check_data_flow)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     for (const auto& tensor_name : tensor_names)
     {
@@ -274,9 +289,11 @@ TEST(PDPD_Places, check_data_flow)
 TEST(PDPD_Places, check_tensor_to_multiple_ports)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     for (const auto& tensor_name : tensor_names)
     {
@@ -306,9 +323,11 @@ TEST(PDPD_Places, check_tensor_to_multiple_ports)
 TEST(PDPD_Places, check_consuming_ops)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     for (const auto& tensor_name : tensor_names)
     {
@@ -351,9 +370,11 @@ TEST(PDPD_Places, check_consuming_ops)
 TEST(PDPD_Places, check_consuming_ops_2)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     auto it = find(tensor_names.begin(), tensor_names.end(), "lstm_0.tmp_2");
     EXPECT_NE(it, tensor_names.end());
@@ -393,9 +414,11 @@ TEST(PDPD_Places, check_consuming_ops_2)
 TEST(PDPD_Places, check_producing_ops)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     for (const auto& tensor_name : tensor_names)
     {
@@ -417,9 +440,11 @@ TEST(PDPD_Places, check_producing_ops)
 TEST(PDPD_Places, check_input_output_ports_dy_idx)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     std::vector<std::string> output_names = {"save_infer_model/scale_0.tmp_1",
                                              "save_infer_model/scale_1.tmp_1",
@@ -444,9 +469,11 @@ TEST(PDPD_Places, check_input_output_ports_dy_idx)
 TEST(PDPD_Places, check_ops_tensors_by_idx)
 {
     FrontEndTestUtils::setupTestEnv();
-    auto m_fem = FrontEndManager();
-    auto frontend = m_fem.load_by_framework("pdpd");
-    auto input_model = frontend->load(TEST_PDPD_MODELS + model_file);
+    auto fem = FrontEndManager();
+    FrontEnd::Ptr frontend;
+    ASSERT_NO_THROW(frontend = fem.load_by_framework(PADDLE_FE));
+    InputModel::Ptr input_model;
+    ASSERT_NO_THROW(input_model = frontend->load(FrontEndTestUtils::make_model_path(model_file)));
 
     std::vector<std::string> output_names = {"save_infer_model/scale_0.tmp_1",
                                              "save_infer_model/scale_1.tmp_1",
diff --git a/ngraph/test/requirements_test.txt b/ngraph/test/frontend/paddlepaddle/requirements_dev.txt
similarity index 100%
rename from ngraph/test/requirements_test.txt
rename to ngraph/test/frontend/paddlepaddle/requirements_dev.txt
diff --git a/ngraph/test/frontend/paddlepaddle/set_element_type.cpp b/ngraph/test/frontend/paddlepaddle/set_element_type.cpp
index 10781a0abe6..763c0e1e0bd 100644
--- a/ngraph/test/frontend/paddlepaddle/set_element_type.cpp
+++ b/ngraph/test/frontend/paddlepaddle/set_element_type.cpp
@@ -3,19 +3,18 @@
 //
 
 #include "set_element_type.hpp"
+#include "paddle_utils.hpp"
 
 using namespace ngraph;
 using namespace ngraph::frontend;
 
-static const auto PDPD = "pdpd";
-
 using PDPDCutTest = FrontEndElementTypeTest;
 
 static SetTypeFEParam getTestData_relu()
 {
     SetTypeFEParam res;
-    res.m_frontEndName = PDPD;
-    res.m_modelsPath = std::string(TEST_PDPD_MODELS);
+    res.m_frontEndName = PADDLE_FE;
+    res.m_modelsPath = std::string(TEST_PADDLE_MODELS_DIRNAME);
     res.m_modelName = "relu/relu.pdmodel";
     return res;
 }
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_2in_2out.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_2in_2out.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_2in_2out.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_2in_2out.py
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_2in_2out_dynbatch.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_2in_2out_dynbatch.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_2in_2out_dynbatch.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_2in_2out_dynbatch.py
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_argmax.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_argmax.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_argmax.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_argmax.py
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_assign_value.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_assign_value.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_assign_value.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_assign_value.py
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_batch_norm.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_batch_norm.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_batch_norm.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_batch_norm.py
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_clip.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_clip.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_clip.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_clip.py
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_conv2d.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_conv2d.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_conv2d.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_conv2d.py
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_conv2d_relu.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_conv2d_relu.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_conv2d_relu.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_conv2d_relu.py
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_conv2d_s.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_conv2d_s.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_conv2d_s.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_conv2d_s.py
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_multi_tensor_split.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_multi_tensor_split.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_multi_tensor_split.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_multi_tensor_split.py
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_place_test_model.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_place_test_model.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_place_test_model.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_place_test_model.py
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/generate_relu.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_relu.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/generate_relu.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_relu.py
diff --git a/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_unsupported_relu.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_unsupported_relu.py
new file mode 100644
index 00000000000..ef70895ed38
--- /dev/null
+++ b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/generate_unsupported_relu.py
@@ -0,0 +1,105 @@
+#
+# relu paddle model generator
+#
+import os.path
+
+import sys
+
+import os
+import numpy as np
+import paddle as pdpd
+
+
+# print numpy array like C structure
+def print_alike(arr):
+    shape = arr.shape
+    rank = len(shape)
+
+    # print("shape: ", shape, "rank: %d" %(rank))
+
+    # for idx, value in np.ndenumerate(arr):
+    #    print(idx, value)
+
+    def print_array(arr, end=' '):
+        shape = arr.shape
+        rank = len(arr.shape)
+        if rank > 1:
+            line = "{"
+            for i in range(arr.shape[0]):
+                line += print_array(arr[i, :], end="},\n" if i < arr.shape[0] - 1 else "}")
+            line += end
+            return line
+        else:
+            line = "{"
+            for i in range(arr.shape[0]):
+                line += "{:.2f}".format(arr[i])  # str(arr[i])
+                line += ", " if i < shape[0] - 1 else ' '
+            line += end
+            # print(line)
+            return line
+
+    print(print_array(arr, "}"))
+
+
+def saveModel(name, exe, feedkeys: list, fetchlist: list, inputs: list, outputs: list, target_dir: str):
+    model_dir = os.path.join(target_dir, name)
+    if not os.path.exists(model_dir):
+        os.makedirs(model_dir)
+
+    print("\n\n------------- %s -----------\n" % (name))
+    for i, input in enumerate(inputs):
+        print("INPUT %s :" % (feedkeys[i]), input.shape, input.dtype, "\n")
+        print_alike(input)
+        np.save(os.path.join(model_dir, "input{}".format(i)), input)
+        np.save(os.path.join(model_dir, "input{}.{}.{}".format(i, feedkeys[i], input.dtype)), input)
+    print("\n")
+
+    for i, output in enumerate(outputs):
+        print("OUTPUT %s :" % (fetchlist[i]), output.shape, output.dtype, "\n")
+        print_alike(output)
+        np.save(os.path.join(model_dir, "output{}".format(i)), output)
+
+        # composited model + scattered model
+    pdpd.fluid.io.save_inference_model(model_dir, feedkeys, fetchlist, exe)
+    pdpd.fluid.io.save_inference_model(model_dir, feedkeys, fetchlist, exe, model_filename=name + ".pdmodel",
+                                       params_filename=name + ".pdiparams")
+
+
+def relu(name: str, x):
+    import paddle as pdpd
+    pdpd.enable_static()
+
+    node_x = pdpd.static.data(name='x', shape=x.shape, dtype='float32')
+    out = pdpd.nn.functional.relu(node_x)
+
+    cpu = pdpd.static.cpu_places(1)
+    exe = pdpd.static.Executor(cpu[0])
+    # startup program will call initializer to initialize the parameters.
+    exe.run(pdpd.static.default_startup_program())
+
+    outs = exe.run(
+        feed={'x': x},
+        fetch_list=[out])
+
+    saveModel(name, exe, feedkeys=['x'], fetchlist=[out],
+              inputs=[x], outputs=[outs[0]], target_dir=sys.argv[1])
+
+    return outs[0]
+
+
+def main():
+    data = np.array([-2, 0, 1]).astype('float32')
+
+    relu("relu_unsupported", data)
+
+    with open(os.path.join(sys.argv[1], "relu_unsupported", "relu_unsupported.pdmodel"), mode='rb') as file:
+        modelContent = file.read()
+
+    modelContent = modelContent.replace(b"relu", b"rxyz")
+
+    with open(os.path.join(sys.argv[1], "relu_unsupported", "relu_unsupported.pdmodel"), mode='wb') as file:
+        file.write(modelContent)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/ngraph/test/files/paddlepaddle/gen_scripts/save_model.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/save_model.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_scripts/save_model.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_scripts/save_model.py
diff --git a/ngraph/test/files/paddlepaddle/gen_wrapper.py b/ngraph/test/frontend/paddlepaddle/test_models/gen_wrapper.py
similarity index 100%
rename from ngraph/test/files/paddlepaddle/gen_wrapper.py
rename to ngraph/test/frontend/paddlepaddle/test_models/gen_wrapper.py
diff --git a/ngraph/test/frontend/shared/CMakeLists.txt b/ngraph/test/frontend/shared/CMakeLists.txt
new file mode 100644
index 00000000000..c4dda394fb1
--- /dev/null
+++ b/ngraph/test/frontend/shared/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(TARGET_NAME "frontend_shared_test_classes")
+
+file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
+file(GLOB_RECURSE LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp)
+
+add_library(${TARGET_NAME} STATIC ${LIBRARY_SRC} ${LIBRARY_HEADERS})
+
+target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../..)
+target_link_libraries(${TARGET_NAME} PUBLIC frontend_manager
+        ngraph::ngraph cnpy ie_backend ngraph_test_util commonTestUtils)
+
+add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME})
diff --git a/ngraph/test/frontend/shared/include/convert_model.hpp b/ngraph/test/frontend/shared/include/convert_model.hpp
new file mode 100644
index 00000000000..5fef0354671
--- /dev/null
+++ b/ngraph/test/frontend/shared/include/convert_model.hpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <frontend_manager/frontend_manager.hpp>
+
+#include <gtest/gtest.h>
+
+using ConvertParam = std::tuple<std::string,  // FrontEnd name
+                                std::string,  // Base path to models
+                                std::string>; // Model name
+
+class FrontEndConvertModelTest : public ::testing::TestWithParam<ConvertParam>
+{
+public:
+    std::string m_feName;
+    std::string m_pathToModels;
+    std::string m_modelFile;
+    ngraph::frontend::FrontEndManager m_fem;
+    ngraph::frontend::FrontEnd::Ptr m_frontEnd;
+    ngraph::frontend::InputModel::Ptr m_inputModel;
+
+    static std::string getTestCaseName(const testing::TestParamInfo<ConvertParam>& obj);
+
+    void SetUp() override;
+
+protected:
+    void initParamTest();
+
+    void doLoadFromFile();
+};
diff --git a/ngraph/test/frontend/shared/include/utils.hpp b/ngraph/test/frontend/shared/include/utils.hpp
index 44f2f52ade5..37828f1a09a 100644
--- a/ngraph/test/frontend/shared/include/utils.hpp
+++ b/ngraph/test/frontend/shared/include/utils.hpp
@@ -4,13 +4,32 @@
 
 #pragma once
 
+#include <frontend_manager/frontend_manager.hpp>
+#include <fstream>
 #include <string>
 #include "backend.hpp"
+#include "common_test_utils/file_utils.hpp"
+#include "ngraph/env_util.hpp"
 #include "ngraph/file_util.hpp"
 
 // Helper functions
 namespace FrontEndTestUtils
 {
+    int run_tests(int argc, char** argv);
+
+    std::string get_current_executable_path();
+
+    inline std::tuple<ngraph::frontend::FrontEnd::Ptr, ngraph::frontend::InputModel::Ptr>
+        load_from_file(ngraph::frontend::FrontEndManager& fem,
+                       const std::string& frontend_name,
+                       const std::string& model_file)
+    {
+        auto frontend = fem.load_by_framework(frontend_name);
+        auto inputModel = frontend->load(model_file);
+        return std::tuple<ngraph::frontend::FrontEnd::Ptr, ngraph::frontend::InputModel::Ptr>{
+            frontend, inputModel};
+    }
+
     inline std::string fileToTestName(const std::string& fileName)
     {
         // TODO: GCC 4.8 has limited support of regex
@@ -46,4 +65,15 @@ namespace FrontEndTestUtils
             ngraph::runtime::Backend::get_backend_shared_library_search_directory());
         set_test_env("OV_FRONTEND_PATH", fePath.c_str());
     }
+
+    inline bool exists(const std::string& file)
+    {
+        std::ifstream str(file, std::ios::in | std::ifstream::binary);
+        return str.is_open();
+    }
+
+    inline std::string make_model_path(const std::string& modelsRelativePath)
+    {
+        return CommonTestUtils::getModelFromTestModelZoo(modelsRelativePath);
+    }
 } // namespace FrontEndTestUtils
\ No newline at end of file
diff --git a/ngraph/test/frontend/shared/src/basic_api.cpp b/ngraph/test/frontend/shared/src/basic_api.cpp
index e316486e702..92a7254c76e 100644
--- a/ngraph/test/frontend/shared/src/basic_api.cpp
+++ b/ngraph/test/frontend/shared/src/basic_api.cpp
@@ -25,17 +25,13 @@ void FrontEndBasicTest::SetUp()
 void FrontEndBasicTest::initParamTest()
 {
     std::tie(m_feName, m_pathToModels, m_modelFile) = GetParam();
-    m_modelFile = m_pathToModels + m_modelFile;
+    m_modelFile = FrontEndTestUtils::make_model_path(m_pathToModels + m_modelFile);
 }
 
 void FrontEndBasicTest::doLoadFromFile()
 {
-    std::vector<std::string> frontends;
-    ASSERT_NO_THROW(frontends = m_fem.get_available_front_ends());
-    ASSERT_NO_THROW(m_frontEnd = m_fem.load_by_framework(m_feName));
-    ASSERT_NE(m_frontEnd, nullptr);
-    ASSERT_NO_THROW(m_inputModel = m_frontEnd->load(m_modelFile));
-    ASSERT_NE(m_inputModel, nullptr);
+    std::tie(m_frontEnd, m_inputModel) =
+        FrontEndTestUtils::load_from_file(m_fem, m_feName, m_modelFile);
 }
 
 TEST_P(FrontEndBasicTest, testLoadFromFile)
diff --git a/ngraph/test/frontend/shared/src/convert_model.cpp b/ngraph/test/frontend/shared/src/convert_model.cpp
new file mode 100644
index 00000000000..65a4ac67116
--- /dev/null
+++ b/ngraph/test/frontend/shared/src/convert_model.cpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "convert_model.hpp"
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "utils.hpp"
+
+using namespace ngraph;
+using namespace ngraph::frontend;
+
+std::string
+    FrontEndConvertModelTest::getTestCaseName(const testing::TestParamInfo<ConvertParam>& obj)
+{
+    std::string fe, path, fileName;
+    std::tie(fe, path, fileName) = obj.param;
+    return fe + "_" + FrontEndTestUtils::fileToTestName(fileName);
+}
+
+void FrontEndConvertModelTest::SetUp()
+{
+    FrontEndTestUtils::setupTestEnv();
+    m_fem = FrontEndManager(); // re-initialize after setting up environment
+    initParamTest();
+}
+
+void FrontEndConvertModelTest::initParamTest()
+{
+    std::tie(m_feName, m_pathToModels, m_modelFile) = GetParam();
+    m_modelFile = FrontEndTestUtils::make_model_path(m_pathToModels + m_modelFile);
+}
+
+void FrontEndConvertModelTest::doLoadFromFile()
+{
+    std::vector<std::string> frontends;
+    ASSERT_NO_THROW(frontends = m_fem.get_available_front_ends());
+    ASSERT_NO_THROW(m_frontEnd = m_fem.load_by_framework(m_feName));
+    ASSERT_NE(m_frontEnd, nullptr);
+    ASSERT_NO_THROW(m_inputModel = m_frontEnd->load(m_modelFile));
+    ASSERT_NE(m_inputModel, nullptr);
+}
+
+TEST_P(FrontEndConvertModelTest, test_convert_partially_equal_convert)
+{
+    ASSERT_NO_THROW(doLoadFromFile());
+    std::shared_ptr<ngraph::Function> function_ref;
+    ASSERT_NO_THROW(function_ref = m_frontEnd->convert(m_inputModel));
+    ASSERT_NE(function_ref, nullptr);
+    std::shared_ptr<ngraph::Function> function;
+    ASSERT_NO_THROW(function = m_frontEnd->convert_partially(m_inputModel));
+    ASSERT_NE(function, nullptr);
+
+    const FunctionsComparator func_comparator =
+        FunctionsComparator::with_default().enable(FunctionsComparator::NAMES);
+    const FunctionsComparator::Result res = func_comparator(function, function_ref);
+    ASSERT_TRUE(res.valid) << res.message;
+}
+
+TEST_P(FrontEndConvertModelTest, test_decode_convert_equal_convert)
+{
+    ASSERT_NO_THROW(doLoadFromFile());
+    std::shared_ptr<ngraph::Function> function_ref;
+    ASSERT_NO_THROW(function_ref = m_frontEnd->convert(m_inputModel));
+    ASSERT_NE(function_ref, nullptr);
+    std::shared_ptr<ngraph::Function> function;
+    ASSERT_NO_THROW(function = m_frontEnd->decode(m_inputModel));
+    ASSERT_NO_THROW(m_frontEnd->convert(function));
+    ASSERT_NE(function, nullptr);
+
+    const FunctionsComparator func_comparator =
+        FunctionsComparator::with_default().enable(FunctionsComparator::NAMES);
+    const FunctionsComparator::Result res = func_comparator(function, function_ref);
+    ASSERT_TRUE(res.valid) << res.message;
+}
diff --git a/ngraph/test/frontend/shared/src/cut_specific_model.cpp b/ngraph/test/frontend/shared/src/cut_specific_model.cpp
index c7847459003..cc9527bf8f9 100644
--- a/ngraph/test/frontend/shared/src/cut_specific_model.cpp
+++ b/ngraph/test/frontend/shared/src/cut_specific_model.cpp
@@ -34,18 +34,14 @@ void FrontEndCutModelTest::SetUp()
 void FrontEndCutModelTest::initParamTest()
 {
     m_param = GetParam();
-    m_param.m_modelName = m_param.m_modelsPath + m_param.m_modelName;
+    m_param.m_modelName =
+        FrontEndTestUtils::make_model_path(m_param.m_modelsPath + m_param.m_modelName);
 }
 
 void FrontEndCutModelTest::doLoadFromFile()
 {
-    std::vector<std::string> frontends;
-    FrontEnd::Ptr fe;
-    ASSERT_NO_THROW(frontends = m_fem.get_available_front_ends());
-    ASSERT_NO_THROW(m_frontEnd = m_fem.load_by_framework(m_param.m_frontEndName));
-    ASSERT_NE(m_frontEnd, nullptr);
-    ASSERT_NO_THROW(m_inputModel = m_frontEnd->load(m_param.m_modelName));
-    ASSERT_NE(m_inputModel, nullptr);
+    std::tie(m_frontEnd, m_inputModel) =
+        FrontEndTestUtils::load_from_file(m_fem, m_param.m_frontEndName, m_param.m_modelName);
 }
 
 std::vector<ngraph::frontend::Place::Ptr> FrontEndCutModelTest::constructNewInputs() const
diff --git a/ngraph/test/frontend/shared/src/load_from.cpp b/ngraph/test/frontend/shared/src/load_from.cpp
index d7a7e666b69..9578baaaa29 100644
--- a/ngraph/test/frontend/shared/src/load_from.cpp
+++ b/ngraph/test/frontend/shared/src/load_from.cpp
@@ -27,7 +27,8 @@ void FrontEndLoadFromTest::SetUp()
 
 TEST_P(FrontEndLoadFromTest, testLoadFromFilePath)
 {
-    std::string model_path = m_param.m_modelsPath + m_param.m_file;
+    std::string model_path =
+        FrontEndTestUtils::make_model_path(m_param.m_modelsPath + m_param.m_file);
     std::vector<std::string> frontends;
     FrontEnd::Ptr fe;
     ASSERT_NO_THROW(frontends = m_fem.get_available_front_ends());
@@ -44,8 +45,10 @@ TEST_P(FrontEndLoadFromTest, testLoadFromFilePath)
 
 TEST_P(FrontEndLoadFromTest, testLoadFromTwoFiles)
 {
-    std::string model_path = m_param.m_modelsPath + m_param.m_files[0];
-    std::string weights_path = m_param.m_modelsPath + m_param.m_files[1];
+    std::string model_path =
+        FrontEndTestUtils::make_model_path(m_param.m_modelsPath + m_param.m_files[0]);
+    std::string weights_path =
+        FrontEndTestUtils::make_model_path(m_param.m_modelsPath + m_param.m_files[1]);
     std::vector<std::string> frontends;
     FrontEnd::Ptr fe;
     ASSERT_NO_THROW(frontends = m_fem.get_available_front_ends());
@@ -62,9 +65,9 @@ TEST_P(FrontEndLoadFromTest, testLoadFromTwoFiles)
 
 TEST_P(FrontEndLoadFromTest, testLoadFromStream)
 {
-    auto ifs = std::make_shared<std::ifstream>(m_param.m_modelsPath + m_param.m_stream,
-                                               std::ios::in | std::ifstream::binary);
-    auto is = std::dynamic_pointer_cast<std::istream>(ifs);
+    std::ifstream ifs(FrontEndTestUtils::make_model_path(m_param.m_modelsPath + m_param.m_stream),
+                      std::ios::in | std::ios::binary);
+    std::istream* is = &ifs;
     std::vector<std::string> frontends;
     FrontEnd::Ptr fe;
     ASSERT_NO_THROW(frontends = m_fem.get_available_front_ends());
@@ -81,12 +84,14 @@ TEST_P(FrontEndLoadFromTest, testLoadFromStream)
 
 TEST_P(FrontEndLoadFromTest, testLoadFromTwoStreams)
 {
-    auto model_ifs = std::make_shared<std::ifstream>(m_param.m_modelsPath + m_param.m_streams[0],
-                                                     std::ios::in | std::ifstream::binary);
-    auto weights_ifs = std::make_shared<std::ifstream>(m_param.m_modelsPath + m_param.m_streams[1],
-                                                       std::ios::in | std::ifstream::binary);
-    auto model_is = std::dynamic_pointer_cast<std::istream>(model_ifs);
-    auto weights_is = std::dynamic_pointer_cast<std::istream>(weights_ifs);
+    std::ifstream model_ifs(
+        FrontEndTestUtils::make_model_path(m_param.m_modelsPath + m_param.m_streams[0]),
+        std::ios::in | std::ios::binary);
+    std::ifstream weights_ifs(
+        FrontEndTestUtils::make_model_path(m_param.m_modelsPath + m_param.m_streams[1]),
+        std::ios::in | std::ios::binary);
+    std::istream* model_is(&model_ifs);
+    std::istream* weights_is(&weights_ifs);
 
     std::vector<std::string> frontends;
     FrontEnd::Ptr fe;
diff --git a/ngraph/test/frontend/shared/src/main.cpp b/ngraph/test/frontend/shared/src/main.cpp
new file mode 100644
index 00000000000..3828e04f333
--- /dev/null
+++ b/ngraph/test/frontend/shared/src/main.cpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gtest/gtest.h"
+
+using namespace std;
+
+namespace FrontEndTestUtils
+{
+    int run_tests(int argc, char** argv)
+    {
+        ::testing::InitGoogleTest(&argc, argv);
+        int rc = RUN_ALL_TESTS();
+        return rc;
+    }
+} // namespace FrontEndTestUtils
\ No newline at end of file
diff --git a/ngraph/test/frontend/shared/src/op_fuzzy.cpp b/ngraph/test/frontend/shared/src/op_fuzzy.cpp
index b43dd294636..a668de217fe 100644
--- a/ngraph/test/frontend/shared/src/op_fuzzy.cpp
+++ b/ngraph/test/frontend/shared/src/op_fuzzy.cpp
@@ -35,17 +35,13 @@ void FrontEndFuzzyOpTest::SetUp()
 void FrontEndFuzzyOpTest::initParamTest()
 {
     std::tie(m_feName, m_pathToModels, m_modelFile) = GetParam();
-    m_modelFile = m_pathToModels + m_modelFile;
+    m_modelFile = FrontEndTestUtils::make_model_path(m_pathToModels + m_modelFile);
 }
 
 void FrontEndFuzzyOpTest::doLoadFromFile()
 {
-    std::vector<std::string> frontends;
-    ASSERT_NO_THROW(frontends = m_fem.get_available_front_ends());
-    ASSERT_NO_THROW(m_frontEnd = m_fem.load_by_framework(m_feName));
-    ASSERT_NE(m_frontEnd, nullptr);
-    ASSERT_NO_THROW(m_inputModel = m_frontEnd->load(m_modelFile));
-    ASSERT_NE(m_inputModel, nullptr);
+    std::tie(m_frontEnd, m_inputModel) =
+        FrontEndTestUtils::load_from_file(m_fem, m_feName, m_modelFile);
 }
 
 template <typename T>
diff --git a/ngraph/test/frontend/shared/src/partial_shape.cpp b/ngraph/test/frontend/shared/src/partial_shape.cpp
index bfb63528f3f..2d74fc27097 100644
--- a/ngraph/test/frontend/shared/src/partial_shape.cpp
+++ b/ngraph/test/frontend/shared/src/partial_shape.cpp
@@ -32,18 +32,14 @@ void FrontEndPartialShapeTest::SetUp()
 void FrontEndPartialShapeTest::initParamTest()
 {
     std::tie(m_baseParam, m_partShape) = GetParam();
-    m_partShape.m_modelName = m_baseParam.m_modelsPath + m_partShape.m_modelName;
+    m_partShape.m_modelName =
+        FrontEndTestUtils::make_model_path(m_baseParam.m_modelsPath + m_partShape.m_modelName);
 }
 
 void FrontEndPartialShapeTest::doLoadFromFile()
 {
-    std::vector<std::string> frontends;
-    FrontEnd::Ptr fe;
-    ASSERT_NO_THROW(frontends = m_fem.get_available_front_ends());
-    ASSERT_NO_THROW(m_frontEnd = m_fem.load_by_framework(m_baseParam.m_frontEndName));
-    ASSERT_NE(m_frontEnd, nullptr);
-    ASSERT_NO_THROW(m_inputModel = m_frontEnd->load(m_partShape.m_modelName));
-    ASSERT_NE(m_inputModel, nullptr);
+    std::tie(m_frontEnd, m_inputModel) = FrontEndTestUtils::load_from_file(
+        m_fem, m_baseParam.m_frontEndName, m_partShape.m_modelName);
 }
 
 ///////////////////////////////////////////////////////////////////
diff --git a/ngraph/test/frontend/shared/src/set_element_type.cpp b/ngraph/test/frontend/shared/src/set_element_type.cpp
index 647087d6119..9b29a308298 100644
--- a/ngraph/test/frontend/shared/src/set_element_type.cpp
+++ b/ngraph/test/frontend/shared/src/set_element_type.cpp
@@ -25,18 +25,14 @@ void FrontEndElementTypeTest::SetUp()
 void FrontEndElementTypeTest::initParamTest()
 {
     m_param = GetParam();
-    m_param.m_modelName = m_param.m_modelsPath + m_param.m_modelName;
+    m_param.m_modelName =
+        FrontEndTestUtils::make_model_path(m_param.m_modelsPath + m_param.m_modelName);
 }
 
 void FrontEndElementTypeTest::doLoadFromFile()
 {
-    std::vector<std::string> frontends;
-    FrontEnd::Ptr fe;
-    ASSERT_NO_THROW(frontends = m_fem.get_available_front_ends());
-    ASSERT_NO_THROW(m_frontEnd = m_fem.load_by_framework(m_param.m_frontEndName));
-    ASSERT_NE(m_frontEnd, nullptr);
-    ASSERT_NO_THROW(m_inputModel = m_frontEnd->load(m_param.m_modelName));
-    ASSERT_NE(m_inputModel, nullptr);
+    std::tie(m_frontEnd, m_inputModel) =
+        FrontEndTestUtils::load_from_file(m_fem, m_param.m_frontEndName, m_param.m_modelName);
 }
 
 ///////////////////////////////////////////////////////////////////
diff --git a/ngraph/test/graph_rewrite.cpp b/ngraph/test/graph_rewrite.cpp
index 1e62b86e5a8..de86a23b930 100644
--- a/ngraph/test/graph_rewrite.cpp
+++ b/ngraph/test/graph_rewrite.cpp
@@ -433,3 +433,70 @@ TEST(PassConfigTest, Test1)
         ASSERT_EQ(count_ops_of_type<opset3::Relu>(f), 1);
     }
 }
+
+class CheckConsumers : public ngraph::pass::MatcherPass
+{
+public:
+    NGRAPH_RTTI_DECLARATION;
+    CheckConsumers()
+    {
+        ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) -> bool {
+            auto node = m.get_match_root();
+            auto consumers = [](Node * node) {
+                int64_t cnt{0};
+                for (auto output : node->outputs())
+                {
+                    cnt += output.get_target_inputs().size();
+                }
+                if (as_type<op::Parameter>(node) || as_type<op::Result>(node))
+                {
+                    cnt += 1;
+                }
+                return cnt;
+            };
+            /* The expected number of use_count() for Node is equal to the sum of next components:
+             * 1. Each consumer holds a pointer to Output<Node> which holds a shared_ptr to Node
+             * 2. pattern::Matcher object holds a shared_ptr to the matched node
+             * 3. Local node variable increases use_counter
+             * 4. Some GraphRewrite facilities
+             */
+            auto cnt = consumers(node.get());
+            if(node.use_count() != cnt + 7)
+            {
+                throw ngraph::ngraph_error("Wrong number of consumers");
+            }
+
+            NodeVector nodes;
+            for (const auto & inputs : node->input_values())
+            {
+                nodes.emplace_back(inputs.get_node_shared_ptr());
+            }
+
+            /* The expected number of use_count() for Node is equal to the sum of next components:
+             * 1. Each consumer holds a pointer to Output<Node> which holds a shared_ptr to Node
+             * 2. Local input_node variable increases use_counter
+             */
+            for (const auto & input_node : nodes)
+            {
+                if(input_node.use_count() != consumers(input_node.get()) + 1)
+                {
+                    throw ngraph::ngraph_error("Wrong number of consumers");
+                }
+            }
+            return false;
+        };
+
+        auto m = std::make_shared<ngraph::pattern::Matcher>(ngraph::pattern::any_input(), "CheckConsumers");
+        this->register_matcher(m, callback);
+    }
+};
+
+NGRAPH_RTTI_DEFINITION(CheckConsumers, "CheckConsumers", 0);
+
+TEST(GraphRewriteTest, nodes_use_count)
+{
+    auto f = get_function();
+    pass::Manager m;
+    m.register_pass<CheckConsumers>();
+    ASSERT_NO_THROW(m.run_passes(f));
+}
diff --git a/ngraph/test/models/onnx/add_abc.onnx b/ngraph/test/models/onnx/add_abc.onnx
deleted file mode 100644
index 5c2da5dcc0b..00000000000
--- a/ngraph/test/models/onnx/add_abc.onnx
+++ /dev/null
@@ -1,24 +0,0 @@
-ngraph ONNXImporter:�
-
-A
-BX	add_node1"Add
-
-X
-CY	add_node2"Add
-test_graphZ
-A
-
-
-Z
-B
-
-
-Z
-C
-
-
-b
-Y
-
-
-B
\ No newline at end of file
diff --git a/ngraph/test/models/onnx/affine.prototxt b/ngraph/test/models/onnx/affine.prototxt
new file mode 100644
index 00000000000..da40173ae49
--- /dev/null
+++ b/ngraph/test/models/onnx/affine.prototxt
@@ -0,0 +1,55 @@
+ir_version: 3
+producer_name: "nGraph ONNX Importer"
+graph {
+  node {
+    input: "x"
+    output: "y"
+    op_type: "Affine"
+    attribute {
+      name: "alpha"
+      f: 0.5
+      type: FLOAT
+    }
+    attribute {
+      name: "beta"
+      f: 50
+      type: FLOAT
+    }
+  }
+  name: "test_model"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 3
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 1
+}
diff --git a/ngraph/test/models/onnx/filename.prototxt b/ngraph/test/models/onnx/filename.prototxt
deleted file mode 100644
index e3b955c4424..00000000000
--- a/ngraph/test/models/onnx/filename.prototxt
+++ /dev/null
@@ -1,95 +0,0 @@
-ir_version: 3
-producer_name: "nGraph ONNX Importer"
-graph {
-  node {
-    input: "cond"
-    output: "cond_bool"
-    op_type: "Cast"
-    attribute {
-      name: "to"
-      i: 9
-      type: INT
-    }
-  }
-  node {
-    input: "cond_bool"
-    input: "x1"
-    input: "x2"
-    output: "y"
-    op_type: "Where"
-  }
-  name: "where_graph"
-  input {
-    name: "cond"
-    type {
-      tensor_type {
-        elem_type: INT32
-        shape {
-          dim {
-            dim_value: 3
-          }
-          dim {
-            dim_value: 3
-          }
-          dim {
-            dim_value: 3
-          }
-        }
-      }
-    }
-  }
-  input {
-    name: "x1"
-    type {
-      tensor_type {
-        elem_type: INT32
-        shape {
-          dim {
-            dim_value: 1
-          }
-          dim {
-            dim_value: 3
-          }
-        }
-      }
-    }
-  }
-  input {
-    name: "x2"
-    type {
-      tensor_type {
-        elem_type: INT32
-        shape {
-          dim {
-            dim_value: 3
-          }
-          dim {
-            dim_value: 1
-          }
-        }
-      }
-    }
-  }
-  output {
-    name: "y"
-    type {
-      tensor_type {
-        elem_type: INT32
-        shape {
-          dim {
-            dim_value: 3
-          }
-          dim {
-            dim_value: 3
-          }
-          dim {
-            dim_value: 3
-          }
-        }
-      }
-    }
-  }
-}
-opset_import {
-  version: 9
-}
diff --git a/ngraph/test/models/onnx/onnx_prototxt_converter.py b/ngraph/test/models/onnx/onnx_prototxt_converter.py
index d8e06d58698..3be04b037b1 100644
--- a/ngraph/test/models/onnx/onnx_prototxt_converter.py
+++ b/ngraph/test/models/onnx/onnx_prototxt_converter.py
@@ -24,6 +24,7 @@ Options:
 from docopt import docopt
 from google.protobuf import text_format
 import onnx
+from onnx.external_data_helper import convert_model_to_external_data
 import os
 
 ONNX_SUFFX = '.onnx'
@@ -53,6 +54,18 @@ _ext_map = {
 def _get_output_file_path(path, extension):
     return path + _ext_map[extension]
 
+
+def save_model(proto, f, format=None, save_as_external_data=False, all_tensors_to_one_file=True, location=None, size_threshold=1024, convert_attribute=False):
+    if isinstance(proto, bytes):
+        proto = onnx._deserialize(proto, onnx.ModelProto())
+
+    if save_as_external_data:
+        convert_model_to_external_data(proto, all_tensors_to_one_file, location, size_threshold, convert_attribute)
+
+    s = onnx._serialize(proto)
+    onnx._save_bytes(s, f)
+
+
 if __name__ == '__main__':
     args = docopt(__doc__)
     input_file_path = args['INPUT_FILE']
@@ -61,8 +74,6 @@ if __name__ == '__main__':
     else:
         output_file_path = args['OUTPUT_FILE']
 
-    print('Converting {} to {}.'.format(input_file_path, output_file_path))
-
     if not os.path.exists(input_file_path):
         sys.exit('ERROR: Provided input model path does not exists: {}'.format(input_file_path))
 
@@ -75,6 +86,6 @@ if __name__ == '__main__':
     elif _is_txt_file(input_file_path) and _is_bin_file(output_file_path):
         with open(input_file_path, 'r') as f:
             converted_model = _txt2bin(f.read())
-        onnx.save(converted_model, output_file_path)
+        save_model(converted_model, output_file_path)
     else:
         sys.exit('ERROR: Provided input or output file has unsupported format.')
diff --git a/ngraph/test/onnx/onnx_editor.cpp b/ngraph/test/onnx/onnx_editor.cpp
index 6672aa6ffdc..b6b905ba91b 100644
--- a/ngraph/test/onnx/onnx_editor.cpp
+++ b/ngraph/test/onnx/onnx_editor.cpp
@@ -54,7 +54,7 @@ NGRAPH_TEST(onnx_editor, types__single_input_type_substitution)
 {
     // the original model contains 2 inputs with i64 data type and one f32 input
     ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_abc.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_abc.onnx")};
 
     editor.set_input_types({{"A", element::i64}});
 
@@ -77,7 +77,7 @@ NGRAPH_TEST(onnx_editor, types__all_inputs_type_substitution)
 {
     // the original model contains 2 inputs with i64 data type and one f32 input
     ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_abc.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_abc.onnx")};
 
     editor.set_input_types({{"A", element::i8}, {"B", element::i8}, {"C", element::i8}});
 
@@ -98,7 +98,7 @@ NGRAPH_TEST(onnx_editor, types__all_inputs_type_substitution)
 NGRAPH_TEST(onnx_editor, types__missing_type_in_input_descriptor)
 {
     ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/invalid_input_no_type.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/invalid_input_no_type.onnx")};
 
     // input A doesn't have the "type" field in the model and so the data type cannot be modified
     EXPECT_THROW(editor.set_input_types({{"A", element::f32}}), ngraph_error);
@@ -107,7 +107,7 @@ NGRAPH_TEST(onnx_editor, types__missing_type_in_input_descriptor)
 NGRAPH_TEST(onnx_editor, types__missing_tensor_type_in_input_descriptor)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/invalid_input_no_tensor_type.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/invalid_input_no_tensor_type.onnx")};
 
     // input A doesn't have the "tensor_type" field in the model
     EXPECT_THROW(editor.set_input_types({{"A", element::f32}}), ngraph_error);
@@ -116,7 +116,7 @@ NGRAPH_TEST(onnx_editor, types__missing_tensor_type_in_input_descriptor)
 NGRAPH_TEST(onnx_editor, types__unsupported_data_type_passed)
 {
     ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_abc.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_abc.onnx")};
 
     EXPECT_THROW(editor.set_input_types({{"A", element::dynamic}}), ngraph_error);
 }
@@ -124,7 +124,7 @@ NGRAPH_TEST(onnx_editor, types__unsupported_data_type_passed)
 NGRAPH_TEST(onnx_editor, types__incorrect_input_name_passed)
 {
     ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_abc.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_abc.onnx")};
 
     EXPECT_THROW(editor.set_input_types({{"ShiaLaBeouf", element::i64}}), ngraph_error);
 }
@@ -133,7 +133,7 @@ NGRAPH_TEST(onnx_editor, types__elem_type_missing_in_input)
 {
     // the original model contains 2 inputs with i64 data type and one f32 input
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/elem_type_missing_in_input.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/elem_type_missing_in_input.onnx")};
 
     // the "elem_type" is missing in the model but it should be possible to set the type anyway
     EXPECT_NO_THROW(editor.set_input_types({{"A", element::i64}}));
@@ -154,7 +154,7 @@ NGRAPH_TEST(onnx_editor, types__elem_type_missing_in_input)
 NGRAPH_TEST(onnx_editor, shapes__modify_single_input)
 {
     ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.onnx")};
 
     const auto new_shape = PartialShape{1};
 
@@ -169,7 +169,7 @@ NGRAPH_TEST(onnx_editor, shapes__modify_single_input)
 NGRAPH_TEST(onnx_editor, shapes__modify_all_inputs)
 {
     ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.onnx")};
 
     const auto new_shape = PartialShape{1, 2, 3, 5, 8, 13};
 
@@ -187,7 +187,7 @@ NGRAPH_TEST(onnx_editor, shapes__modify_all_inputs)
 NGRAPH_TEST(onnx_editor, shapes__dynamic_rank_in_model)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/shapes__dynamic_rank_in_model.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/shapes__dynamic_rank_in_model.onnx")};
 
     // input A in the model doesn't have the "shape" field meaning it has dynamic rank
     // it should still be possible to set such input's shape to some custom value
@@ -204,7 +204,7 @@ NGRAPH_TEST(onnx_editor, shapes__dynamic_rank_in_model)
 NGRAPH_TEST(onnx_editor, shapes__set_dynamic_dimension)
 {
     ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.onnx")};
 
     const auto new_shape = PartialShape{Dimension::dynamic()};
 
@@ -219,7 +219,7 @@ NGRAPH_TEST(onnx_editor, shapes__set_dynamic_dimension)
 NGRAPH_TEST(onnx_editor, shapes__set_mixed_dimensions)
 {
     ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.onnx")};
 
     const auto new_shape_A = PartialShape{21, Dimension::dynamic()};
     const auto new_shape_B = PartialShape{Dimension::dynamic(), 37};
@@ -239,7 +239,7 @@ NGRAPH_TEST(onnx_editor, shapes__set_mixed_dimensions)
 NGRAPH_TEST(onnx_editor, shapes__set_scalar_inputs)
 {
     ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.onnx")};
 
     const auto new_shape = PartialShape{};
 
@@ -258,7 +258,7 @@ NGRAPH_TEST(onnx_editor, shapes__set_scalar_inputs)
 NGRAPH_TEST(onnx_editor, shapes__static_to_dynamic_rank_substitution)
 {
     ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/shapes__add_two_inputs.onnx")};
 
     const auto new_shape = PartialShape::dynamic();
 
@@ -277,12 +277,12 @@ NGRAPH_TEST(onnx_editor, shapes__static_to_dynamic_rank_substitution)
 NGRAPH_TEST(onnx_editor, subgraph__linear_model_head_cut)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx")};
 
     editor.cut_graph_fragment({{InputEdge(1, 0)}}, {});
 
     const auto ref_model = file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__linear_model_head_cut.prototxt");
+        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__linear_model_head_cut.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -292,14 +292,14 @@ NGRAPH_TEST(onnx_editor, subgraph__linear_model_head_cut)
 NGRAPH_TEST(onnx_editor, subgraph__linear_model_head_cut_ins_and_outs)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx")};
 
     editor.cut_graph_fragment({{InputEdge(1, 0)}},
                               {{OutputEdge(2, 0)}});
 
     // expected to behave the same way as subgraph__linear_model_head_cut
     const auto ref_model = file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__linear_model_head_cut.prototxt");
+        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__linear_model_head_cut.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -309,13 +309,13 @@ NGRAPH_TEST(onnx_editor, subgraph__linear_model_head_cut_ins_and_outs)
 NGRAPH_TEST(onnx_editor, subgraph__linear_model_deeper_head_cut)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx")};
 
     editor.cut_graph_fragment({{InputEdge(2, 0)}}, {});
 
     const auto ref_model = file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/model_editor/reference/subgraph__linear_model_deeper_head_cut.prototxt");
+        "onnx/model_editor/reference/subgraph__linear_model_deeper_head_cut.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -325,12 +325,12 @@ NGRAPH_TEST(onnx_editor, subgraph__linear_model_deeper_head_cut)
 NGRAPH_TEST(onnx_editor, subgraph__linear_model_tail_cut)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx")};
 
     editor.cut_graph_fragment({}, {{OutputEdge{1, 0}}});
 
     const auto ref_model = file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__linear_model_tail_cut.prototxt");
+        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__linear_model_tail_cut.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -340,13 +340,13 @@ NGRAPH_TEST(onnx_editor, subgraph__linear_model_tail_cut)
 NGRAPH_TEST(onnx_editor, subgraph__linear_model_tail_cut_ins_and_outs)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{0, 0}}}, {{OutputEdge{1, 0}}});
 
     // expected to behave the same way as subgraph__linear_model_tail_cut
     const auto ref_model = file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__linear_model_tail_cut.prototxt");
+        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__linear_model_tail_cut.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -356,13 +356,13 @@ NGRAPH_TEST(onnx_editor, subgraph__linear_model_tail_cut_ins_and_outs)
 NGRAPH_TEST(onnx_editor, subgraph__linear_model_with_initializer_tail_cut)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head_with_initializer.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head_with_initializer.onnx")};
 
     editor.cut_graph_fragment({}, {{OutputEdge{1, 0}}});
 
     const auto ref_model = file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/model_editor/reference/subgraph__linear_model_with_initializer_tail_cut.prototxt");
+        "onnx/model_editor/reference/subgraph__linear_model_with_initializer_tail_cut.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -372,14 +372,14 @@ NGRAPH_TEST(onnx_editor, subgraph__linear_model_with_initializer_tail_cut)
 NGRAPH_TEST(onnx_editor, subgraph__initializer_without_matching_input_tail_cut)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__initializer_without_matching_input.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__initializer_without_matching_input.onnx")};
 
     editor.cut_graph_fragment({}, {{OutputEdge{1, 0}}});
 
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__initializer_without_matching_input_tail_cut.prototxt");
+                             "subgraph__initializer_without_matching_input_tail_cut.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -389,13 +389,13 @@ NGRAPH_TEST(onnx_editor, subgraph__initializer_without_matching_input_tail_cut)
 NGRAPH_TEST(onnx_editor, subgraph__linear_model_deeper_tail_cut)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx")};
 
     editor.cut_graph_fragment({}, {{OutputEdge{0, 0}}});
 
     const auto ref_model = file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/model_editor/reference/subgraph__linear_model_deeper_tail_cut.prototxt");
+        "onnx/model_editor/reference/subgraph__linear_model_deeper_tail_cut.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -405,7 +405,7 @@ NGRAPH_TEST(onnx_editor, subgraph__linear_model_deeper_tail_cut)
 NGRAPH_TEST(onnx_editor, subgraph__no_input_params)
 {
     const auto model_path =
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt");
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx");
 
     ONNXModelEditor editor{model_path};
 
@@ -419,14 +419,14 @@ NGRAPH_TEST(onnx_editor, subgraph__no_input_params)
 NGRAPH_TEST(onnx_editor, subgraph__initializer_to_input_replacement)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head_with_initializer.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head_with_initializer.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{0, 2}}},
                               {{OutputEdge{0, 0}}});
 
     const auto ref_model = file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/model_editor/reference/subgraph__initializer_to_input_replacement.prototxt");
+        "onnx/model_editor/reference/subgraph__initializer_to_input_replacement.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -436,14 +436,14 @@ NGRAPH_TEST(onnx_editor, subgraph__initializer_to_input_replacement)
 NGRAPH_TEST(onnx_editor, subgraph__initializer_to_input_replacement_2)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__initializer_without_matching_input.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__initializer_without_matching_input.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{0, 2}}},
                               {{OutputEdge{0, 0}}});
 
     const auto ref_model = file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/model_editor/reference/subgraph__initializer_to_input_replacement.prototxt");
+        "onnx/model_editor/reference/subgraph__initializer_to_input_replacement.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -453,12 +453,12 @@ NGRAPH_TEST(onnx_editor, subgraph__initializer_to_input_replacement_2)
 NGRAPH_TEST(onnx_editor, subgraph__multiout_op_output_edge)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     editor.cut_graph_fragment({}, {{OutputEdge{5, 1}}});
 
     const auto ref_model = file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__multiout_op_output_edge.prototxt");
+        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__multiout_op_output_edge.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -468,7 +468,7 @@ NGRAPH_TEST(onnx_editor, subgraph__multiout_op_output_edge)
 NGRAPH_TEST(onnx_editor, subgraph__existing_inputs_and_outputs_based_extraction)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{1, 1}, InputEdge{2, 0}}},
                               {{OutputEdge{4, 0}}});
@@ -476,7 +476,7 @@ NGRAPH_TEST(onnx_editor, subgraph__existing_inputs_and_outputs_based_extraction)
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__existing_inputs_and_outputs_based_extraction.prototxt");
+                             "subgraph__existing_inputs_and_outputs_based_extraction.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -486,14 +486,14 @@ NGRAPH_TEST(onnx_editor, subgraph__existing_inputs_and_outputs_based_extraction)
 NGRAPH_TEST(onnx_editor, subgraph__twice_input_edge_from_tensor_with_single_consumer)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/add_ab.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/add_ab.onnx")};
 
     editor.cut_graph_fragment({InputEdge{1, 1}}, {});
 
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__twice_input_edge_from_tensor_with_single_consumer.prototxt");
+                             "subgraph__twice_input_edge_from_tensor_with_single_consumer.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -503,7 +503,7 @@ NGRAPH_TEST(onnx_editor, subgraph__twice_input_edge_from_tensor_with_single_cons
 NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumers)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{1, 0}, InputEdge{6, 0}}},
                               {{OutputEdge{6, 0}, OutputEdge{4, 0}}});
@@ -511,7 +511,7 @@ NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumer
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__input_edge_from_tensor_with_multiple_consumers.prototxt");
+                             "subgraph__input_edge_from_tensor_with_multiple_consumers.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -521,7 +521,7 @@ NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumer
 NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumers_2)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{3, 0}, InputEdge{3, 1}}},
                               {{OutputEdge{3, 0}, OutputEdge{4, 0}}});
@@ -529,7 +529,7 @@ NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumer
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__input_edge_from_tensor_with_multiple_consumers_2.prototxt");
+                             "subgraph__input_edge_from_tensor_with_multiple_consumers_2.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -539,7 +539,7 @@ NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumer
 NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumers_3)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{3, 0}, InputEdge{6, 0}}},
                               {{OutputEdge{6, 0}, OutputEdge{5, 1}}});
@@ -547,7 +547,7 @@ NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumer
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__input_edge_from_tensor_with_multiple_consumers_3.prototxt");
+                             "subgraph__input_edge_from_tensor_with_multiple_consumers_3.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -557,14 +557,14 @@ NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumer
 NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumers_4)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{1, 0}, InputEdge{3, 0}}}, {});
 
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__input_edge_from_tensor_with_multiple_consumers_4.prototxt");
+                             "subgraph__input_edge_from_tensor_with_multiple_consumers_4.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -574,7 +574,7 @@ NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumer
 NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumers_5)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     editor.cut_graph_fragment({InputEdge{3, 0}},
                              {{OutputEdge{6,0}, OutputEdge{5, 1}}});
@@ -583,7 +583,7 @@ NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumer
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__input_edge_from_tensor_with_multiple_consumers_5.prototxt");
+                             "subgraph__input_edge_from_tensor_with_multiple_consumers_5.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -593,7 +593,7 @@ NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumer
 NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumers_custom_names)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{1, 0, "new_name_1"}, InputEdge{6, 0, "new_name_2"}}},
                               {{OutputEdge{6, 0}, OutputEdge{4, 0}}});
@@ -601,7 +601,7 @@ NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumer
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__input_edge_from_tensor_with_multiple_consumers_custom_names.prototxt");
+                             "subgraph__input_edge_from_tensor_with_multiple_consumers_custom_names.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -611,14 +611,14 @@ NGRAPH_TEST(onnx_editor, subgraph__input_edge_from_tensor_with_multiple_consumer
 NGRAPH_TEST(onnx_editor, subgraph__multiple_consumers_of_graph_input_relu2)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{4, 0}}}, {});
 
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__multiple_consumers_of_graph_input_relu2.prototxt");
+                             "subgraph__multiple_consumers_of_graph_input_relu2.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -628,14 +628,14 @@ NGRAPH_TEST(onnx_editor, subgraph__multiple_consumers_of_graph_input_relu2)
 NGRAPH_TEST(onnx_editor, subgraph__multiple_consumers_of_graph_initializer)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{2, 0}}}, {});
 
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__multiple_consumers_of_graph_initializer.prototxt");
+                             "subgraph__multiple_consumers_of_graph_initializer.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -645,7 +645,7 @@ NGRAPH_TEST(onnx_editor, subgraph__multiple_consumers_of_graph_initializer)
 NGRAPH_TEST(onnx_editor, subgraph__multiple_consumers_of_graph_initializer_2)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{2, 0}, InputEdge{3, 0}}}, {});
 
@@ -653,7 +653,7 @@ NGRAPH_TEST(onnx_editor, subgraph__multiple_consumers_of_graph_initializer_2)
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__multiple_consumers_of_graph_initializer.prototxt");
+                             "subgraph__multiple_consumers_of_graph_initializer.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -663,14 +663,14 @@ NGRAPH_TEST(onnx_editor, subgraph__multiple_consumers_of_graph_initializer_2)
 NGRAPH_TEST(onnx_editor, subgraph__multiple_consumers_of_graph_initializer_relu2_and_init)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{5, 0}, InputEdge{3, 0}}}, {});
 
     const auto ref_model = file_util::path_join(
         SERIALIZED_ZOO,
         "onnx/model_editor/reference/"
-        "subgraph__multiple_consumers_of_graph_initializer_relu2_and_init.prototxt");
+        "subgraph__multiple_consumers_of_graph_initializer_relu2_and_init.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -680,7 +680,7 @@ NGRAPH_TEST(onnx_editor, subgraph__multiple_consumers_of_graph_initializer_relu2
 NGRAPH_TEST(onnx_editor, subgraph__invalid_edge_idx)
 {
     const auto model_path =
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt");
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx");
 
     ONNXModelEditor editor{model_path};
     try
@@ -699,7 +699,7 @@ NGRAPH_TEST(onnx_editor, subgraph__invalid_edge_idx)
 NGRAPH_TEST(onnx_editor, subgraph__invalid_port_idx)
 {
     const auto model_path =
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt");
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx");
 
     ONNXModelEditor editor{model_path};
     try
@@ -718,7 +718,7 @@ NGRAPH_TEST(onnx_editor, subgraph__invalid_port_idx)
 NGRAPH_TEST(onnx_editor, subgraph__inputs_getter)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx")};
 
     EXPECT_EQ(editor.model_inputs(),
               (std::vector<std::string>{"data_0", "conv1/7x7_s2_w_0", "conv1/7x7_s2_b_0"}));
@@ -731,7 +731,7 @@ NGRAPH_TEST(onnx_editor, subgraph__inputs_getter)
 NGRAPH_TEST(onnx_editor, subgraph__custom_input_name_already_exist)
 {
     const auto model_path =
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt");
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx");
 
     ONNXModelEditor editor{model_path};
     try
@@ -752,7 +752,7 @@ NGRAPH_TEST(onnx_editor, subgraph__custom_input_name_already_exist)
 NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_output_name_and_input_name)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx")};
 
     const InputEdge edge = editor.find_input_edge(EditorNode{EditorOutput{"conv1/7x7_s2_2"}},
                                                      EditorInput{"conv1/7x7_s2_1"});
@@ -768,7 +768,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_output_name_and_input_n
 NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_output_name_and_input_index)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx")};
 
     const InputEdge edge =
         editor.find_input_edge(EditorNode{EditorOutput{"conv1/7x7_s2_2"}}, EditorInput{0});
@@ -789,7 +789,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_output_name_and_input_i
 NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_node_name_and_input_name)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx")};
 
     const InputEdge edge =
         editor.find_input_edge(EditorNode{"relu1"}, EditorInput{"conv1/7x7_s2_1"});
@@ -805,7 +805,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_node_name_and_input_nam
 NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_node_name_and_input_index)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     const InputEdge edge = editor.find_input_edge(EditorNode{"relu1_name"}, EditorInput{0});
     EXPECT_EQ(edge.m_node_idx, 0);
@@ -819,7 +819,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_node_name_and_input_ind
 NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_node_name_and_input_index_custom_name)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     const InputEdge edge = editor.find_input_edge(EditorNode{"relu1_name"}, EditorInput{0, "custom_input_name_1"});
     EXPECT_EQ(edge.m_node_idx, 0);
@@ -835,7 +835,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_node_name_and_input_ind
 NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_empty_node_name)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     try
     {
@@ -854,7 +854,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_empty_node_name)
 NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_output_name)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     const OutputEdge edge =
         editor.find_output_edge(EditorNode{EditorOutput{"mul2"}}, EditorOutput{"mul2"});
@@ -881,7 +881,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_output_name)
 NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_output_name_and_output_index)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     const OutputEdge edge =
         editor.find_output_edge(EditorNode{EditorOutput{"add2"}}, EditorOutput{0});
@@ -902,7 +902,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_output_name_and_output
 NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_node_name_and_output_name)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     const OutputEdge edge =
         editor.find_output_edge(EditorNode{"relu1_name"}, EditorOutput{"relu1"});
@@ -918,7 +918,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_node_name_and_output_n
 NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_node_name_and_output_index)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     const OutputEdge edge = editor.find_output_edge(EditorNode{"relu1_name"}, EditorOutput{0});
     EXPECT_EQ(edge.m_node_idx, 0);
@@ -932,7 +932,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_node_name_and_output_i
 NGRAPH_TEST(onnx_editor, editor_api_select_edge_const_network)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.onnx")};
 
     const InputEdge edge =
         editor.find_input_edge(EditorNode{EditorOutput{"relu4"}}, EditorInput{0});
@@ -951,7 +951,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_edge_const_network)
 NGRAPH_TEST(onnx_editor, editor_api_select_edge_error_handling)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.onnx")};
 
     // node with given output name not found
     try
@@ -1032,7 +1032,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_edge_error_handling)
 NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_ambiguous_node_name_but_matched_input)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     InputEdge edge = editor.find_input_edge(EditorNode{"add_ambiguous_name"}, EditorInput{"in2"});
     EXPECT_EQ(edge.m_node_idx, 1);
@@ -1046,7 +1046,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_ambiguous_node_name_but
 NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_ambiguous_node_name_and_not_matched_input)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     try
     {
@@ -1074,7 +1074,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_ambiguous_node_name_and
 NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_ambiguous_node_name_and_input_index)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     try
     {
@@ -1091,7 +1091,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_input_edge_by_ambiguous_node_name_and
 NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_ambiguous_node_name_but_matched_output)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     const OutputEdge edge = editor.find_output_edge(EditorNode{"add_ambiguous_name"}, EditorOutput{"add1"});
     EXPECT_EQ(edge.m_node_idx, 1);
@@ -1105,7 +1105,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_ambiguous_node_name_bu
 NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_the_same_node_name_and_output_name)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests_2.onnx")};
 
     const OutputEdge edge = editor.find_output_edge(EditorNode{"add1"}, EditorOutput{0});
     EXPECT_EQ(edge.m_node_idx, 0);
@@ -1119,7 +1119,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_the_same_node_name_and
 NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_ambiguous_node_name_and_not_matched_output)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     try
     {
@@ -1136,7 +1136,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_ambiguous_node_name_an
 NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_ambiguous_node_name_and_output_index)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     try
     {
@@ -1153,7 +1153,7 @@ NGRAPH_TEST(onnx_editor, editor_api_select_output_edge_by_ambiguous_node_name_an
 NGRAPH_TEST(onnx_editor, editor_api_use_edge_mapper_with_graph_cutter)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     // InputEdge{1, "in2"}
     const auto input_edge_1 = editor.find_input_edge(
@@ -1171,7 +1171,7 @@ NGRAPH_TEST(onnx_editor, editor_api_use_edge_mapper_with_graph_cutter)
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__existing_inputs_and_outputs_based_extraction.prototxt");
+                             "subgraph__existing_inputs_and_outputs_based_extraction.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -1196,7 +1196,7 @@ NGRAPH_TEST(onnx_editor, editor_api_use_edge_mapper_with_graph_cutter)
 NGRAPH_TEST(onnx_editor, editor_api_use_edge_mapper_with_graph_cutter_custom_names)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     const auto input_edge_1 = editor.find_input_edge(
                                    EditorNode{EditorOutput{"mul2"}}, EditorInput{1, "new_name_1"});
@@ -1208,7 +1208,7 @@ NGRAPH_TEST(onnx_editor, editor_api_use_edge_mapper_with_graph_cutter_custom_nam
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__use_edge_mapper_with_graph_cutter_custom_names.prototxt");
+                             "subgraph__use_edge_mapper_with_graph_cutter_custom_names.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -1218,7 +1218,7 @@ NGRAPH_TEST(onnx_editor, editor_api_use_edge_mapper_with_graph_cutter_custom_nam
 NGRAPH_TEST(onnx_editor, editor_api_find_output_consumers)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     std::vector<InputEdge> output_consumers = editor.find_output_consumers("relu1");
     EXPECT_EQ(output_consumers.size(), 3);
@@ -1245,7 +1245,7 @@ NGRAPH_TEST(onnx_editor, editor_api_find_output_consumers)
 NGRAPH_TEST(onnx_editor, editor_api_find_output_consumers_empty_result)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     const std::vector<InputEdge> output_consumers = editor.find_output_consumers("not_existed");
     EXPECT_EQ(output_consumers.size(), 0);
@@ -1254,7 +1254,7 @@ NGRAPH_TEST(onnx_editor, editor_api_find_output_consumers_empty_result)
 NGRAPH_TEST(onnx_editor, editor_api_is_correct_and_unambiguous_node)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
 
     bool is_correct_node = editor.is_correct_and_unambiguous_node(EditorNode{EditorOutput{"relu1"}});
     EXPECT_EQ(is_correct_node, true);
@@ -1281,7 +1281,7 @@ NGRAPH_TEST(onnx_editor, editor_api_is_correct_and_unambiguous_node)
 NGRAPH_TEST(onnx_editor, editor_api_input_edge_from_tensor_with_single_consumer)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/add_ab.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/add_ab.onnx")};
 
     const auto edge = editor.find_input_edge(EditorNode{EditorOutput{"Y"}}, EditorInput{1});
     editor.cut_graph_fragment({edge}, {});
@@ -1289,7 +1289,7 @@ NGRAPH_TEST(onnx_editor, editor_api_input_edge_from_tensor_with_single_consumer)
     const auto ref_model =
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/model_editor/reference/"
-                             "subgraph__twice_input_edge_from_tensor_with_single_consumer.prototxt");
+                             "subgraph__twice_input_edge_from_tensor_with_single_consumer.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -1299,7 +1299,7 @@ NGRAPH_TEST(onnx_editor, editor_api_input_edge_from_tensor_with_single_consumer)
 NGRAPH_TEST(onnx_editor, editor_api_input_edge_from_tensor_with_single_consumer_ambiguous)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/add_ab.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/add_ab.onnx")};
 
     try
     {
@@ -1318,7 +1318,7 @@ using TestEngine = test::INTERPRETER_Engine;
 NGRAPH_TEST(onnx_editor, values__append_one_initializer)
 {
     onnx_editor::ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_1D.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_1D.onnx")};
     std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
 
     in_vals.emplace("A", op::Constant::create(element::i64, Shape{2}, {1, 2}));
@@ -1334,7 +1334,7 @@ NGRAPH_TEST(onnx_editor, values__append_one_initializer)
 NGRAPH_TEST(onnx_editor, values__append_two_initializers_to_invalid)
 {
     onnx_editor::ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_1D_invalid.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_1D_invalid.onnx")};
     std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
 
     in_vals.emplace("A", op::Constant::create(element::i64, Shape{2}, {4, 2}));
@@ -1350,7 +1350,7 @@ NGRAPH_TEST(onnx_editor, values__append_two_initializers_to_invalid)
 NGRAPH_TEST(onnx_editor, values__modify_one_initializer)
 {
     onnx_editor::ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/add_1D_with_initializers.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/add_1D_with_initializers.onnx")};
     std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
 
     in_vals.emplace("B", op::Constant::create(element::i64, Shape{2}, {3, 4}));
@@ -1365,7 +1365,7 @@ NGRAPH_TEST(onnx_editor, values__modify_one_initializer)
 NGRAPH_TEST(onnx_editor, values__modify_two_initializers)
 {
     onnx_editor::ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/add_1D_with_initializers.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/add_1D_with_initializers.onnx")};
     std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
 
     in_vals.emplace("A", op::Constant::create(element::i64, Shape{2}, {3, 6}));
@@ -1381,7 +1381,7 @@ NGRAPH_TEST(onnx_editor, values__modify_two_initializers)
 NGRAPH_TEST(onnx_editor, values__no_inputs_modify_two_initializers)
 {
     onnx_editor::ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/add_1D_with_initializers_only.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/add_1D_with_initializers_only.onnx")};
     std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
 
     in_vals.emplace("A", op::Constant::create(element::i64, Shape{2}, {1, 2}));
@@ -1397,7 +1397,7 @@ NGRAPH_TEST(onnx_editor, values__no_inputs_modify_two_initializers)
 NGRAPH_TEST(onnx_editor, values__append_two_initializers_change_shape_type)
 {
     onnx_editor::ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_1D.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/model_editor/add_1D.onnx")};
     std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
 
     in_vals.emplace("A", op::Constant::create(element::i8, Shape{2, 1}, {-1, 1}));
@@ -1413,7 +1413,7 @@ NGRAPH_TEST(onnx_editor, values__append_two_initializers_change_shape_type)
 NGRAPH_TEST(onnx_editor, values__append_two_initializers_mixed_types)
 {
     onnx_editor::ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_float_3D_axis_2.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_float_3D_axis_2.onnx")};
     std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
 
     in_vals.emplace("data",
@@ -1430,14 +1430,14 @@ NGRAPH_TEST(onnx_editor, values__append_two_initializers_mixed_types)
 NGRAPH_TEST(onnx_editor, combined__cut_and_replace_shape)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph__inception_head.onnx")};
 
     const auto new_shape = PartialShape({1, 64, 112, 112});
     editor.cut_graph_fragment({{InputEdge(1, 0)}}, {});
     editor.set_input_shapes({{"conv1/7x7_s2_1", new_shape}});
 
     const auto ref_model = file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__linear_model_head_cut.prototxt");
+        SERIALIZED_ZOO, "onnx/model_editor/reference/subgraph__linear_model_head_cut.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
@@ -1451,14 +1451,130 @@ NGRAPH_TEST(onnx_editor, combined__cut_and_replace_shape)
 NGRAPH_TEST(onnx_editor, cut_operator_with_no_schema)
 {
     ONNXModelEditor editor{file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/unknown_input_value_info.prototxt")};
+        SERIALIZED_ZOO, "onnx/model_editor/unknown_input_value_info.onnx")};
 
     editor.cut_graph_fragment({{InputEdge{1, 0}}}, {});
 
     const auto ref_model = file_util::path_join(
-        SERIALIZED_ZOO, "onnx/model_editor/reference/unknown_input_value_info.prototxt");
+        SERIALIZED_ZOO, "onnx/model_editor/reference/unknown_input_value_info.onnx");
 
     const auto result = compare_onnx_models(editor.model_string(), ref_model);
 
     EXPECT_TRUE(result.is_ok) << result.error_message;
 }
+
+NGRAPH_TEST(onnx_editor, is_model_input)
+{
+    ONNXModelEditor editor{file_util::path_join(
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
+
+    EXPECT_TRUE(editor.is_input(InputEdge{0, 0}));
+    const auto edge1 = editor.find_input_edge(EditorOutput{"add1"}, 1);
+    EXPECT_TRUE(editor.is_input(edge1));
+
+    EXPECT_FALSE(editor.is_input(InputEdge{1, 2}));
+    EXPECT_FALSE(editor.is_input(InputEdge{3, 0}));
+    EXPECT_FALSE(editor.is_input(InputEdge{11, 0}));
+    const auto edge2 = editor.find_input_edge(EditorOutput{"conv1"}, 2);
+    EXPECT_FALSE(editor.is_input(edge2));
+    EXPECT_FALSE(editor.is_input(InputEdge{2, 1})); // initializer is not treated as input
+    const auto edge3 = editor.find_input_edge(EditorOutput{"conv1"}, EditorInput{"in4"});
+    EXPECT_FALSE(editor.is_input(edge3));
+}
+
+NGRAPH_TEST(onnx_editor, is_model_output)
+{
+    ONNXModelEditor editor{file_util::path_join(
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
+
+    EXPECT_TRUE(editor.is_output(OutputEdge{4, 0}));
+    EXPECT_TRUE(editor.is_output(OutputEdge{5, 1}));
+    const auto edge1 = editor.find_output_edge(EditorNode{"split_name"}, EditorOutput{"split2"});
+    EXPECT_TRUE(editor.is_output(edge1));
+
+    EXPECT_FALSE(editor.is_output(OutputEdge{4, 1}));
+    EXPECT_FALSE(editor.is_output(OutputEdge{0, 0}));
+    EXPECT_FALSE(editor.is_output(OutputEdge{11, 0}));
+    const auto edge2 = editor.find_output_edge("add2");
+    EXPECT_FALSE(editor.is_output(edge2));
+}
+
+NGRAPH_TEST(onnx_editor, model_inputs)
+{
+    ONNXModelEditor editor{file_util::path_join(
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
+
+    const auto inputs = editor.model_inputs();
+    EXPECT_TRUE(inputs == (std::vector<std::string>{"in1", "in2", "in3"})); // in4 is initializer
+}
+
+NGRAPH_TEST(onnx_editor, model_inputs_with_non_input_initializers)
+{
+    ONNXModelEditor editor{file_util::path_join(
+        SERIALIZED_ZOO, "onnx/instance_norm_dynamic.onnx")};
+
+    const auto inputs = editor.model_inputs();
+    EXPECT_TRUE(inputs == (std::vector<std::string>{"input"}));
+}
+
+NGRAPH_TEST(onnx_editor, model_output)
+{
+    ONNXModelEditor editor{file_util::path_join(
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
+
+    const auto outputs = editor.model_outputs();
+    EXPECT_TRUE(outputs == (std::vector<std::string>{"mul1", "split2", "mul2"}));
+}
+
+NGRAPH_TEST(onnx_editor, get_tensor_shape)
+{
+    ONNXModelEditor editor{file_util::path_join(
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
+
+    EXPECT_EQ(editor.get_tensor_shape("mul2"), (PartialShape{1, 1, 2, 2}));
+    EXPECT_EQ(editor.get_tensor_shape("in1"), (PartialShape{2, 2}));
+    EXPECT_EQ(editor.get_tensor_shape("in2"), (PartialShape{}));
+    EXPECT_EQ(editor.get_tensor_shape("in3"), (PartialShape{1, 1, 2, 2}));
+    EXPECT_EQ(editor.get_tensor_shape("relu1"), (PartialShape{2, 2}));
+    EXPECT_EQ(editor.get_tensor_shape("add1"), (PartialShape{2, 2}));
+    try
+    {
+        editor.get_tensor_shape("not_existed");
+    }
+    catch (const std::exception& e)
+    {
+        std::string msg{e.what()};
+        EXPECT_TRUE(
+            msg.find("The tensor: not_existed was not found in the graph") !=
+            std::string::npos);
+    }
+}
+
+NGRAPH_TEST(onnx_editor, get_tensor_shape_after_modification)
+{
+    ONNXModelEditor editor{file_util::path_join(
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
+
+    EXPECT_EQ(editor.get_tensor_shape("in3"), (PartialShape{1, 1, 2, 2}));
+    EXPECT_EQ(editor.get_tensor_shape("conv1"), (PartialShape{1, 1, 2, 2}));
+    EXPECT_EQ(editor.get_tensor_shape("mul2"), (PartialShape{1, 1, 2, 2}));
+    editor.set_input_shapes({{"in3", (PartialShape{1, 1, 4, 4})}});
+    EXPECT_EQ(editor.get_tensor_shape("conv1"), (PartialShape{1, 1, 4, 4}));
+    EXPECT_EQ(editor.get_tensor_shape("in3"), (PartialShape{1, 1, 4, 4}));
+}
+
+NGRAPH_TEST(onnx_editor, is_correct_tensor_name)
+{
+    ONNXModelEditor editor{file_util::path_join(
+        SERIALIZED_ZOO, "onnx/model_editor/subgraph_extraction_tests.onnx")};
+
+    EXPECT_TRUE(editor.is_correct_tensor_name("in1"));
+    EXPECT_TRUE(editor.is_correct_tensor_name("relu1"));
+    EXPECT_TRUE(editor.is_correct_tensor_name("split2"));
+    EXPECT_TRUE(editor.is_correct_tensor_name("mul2"));
+    EXPECT_TRUE(editor.is_correct_tensor_name("in4"));
+
+    EXPECT_FALSE(editor.is_correct_tensor_name("relu1_name"));
+    EXPECT_FALSE(editor.is_correct_tensor_name("not_existed"));
+    EXPECT_FALSE(editor.is_correct_tensor_name(""));
+}
diff --git a/ngraph/test/onnx/onnx_import.in.cpp b/ngraph/test/onnx/onnx_import.in.cpp
index f16e0f2dc45..f4eb93c1e93 100644
--- a/ngraph/test/onnx/onnx_import.in.cpp
+++ b/ngraph/test/onnx/onnx_import.in.cpp
@@ -55,7 +55,7 @@ using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
 NGRAPH_TEST(${BACKEND_NAME}, onnx_test_test_case)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/add_abc.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/add_abc.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1});
@@ -68,7 +68,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_test_test_case)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_test_test_case_mutliple_inputs)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/add_abc.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/add_abc.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_multiple_inputs(Inputs{{1}, {2}, {3}});
@@ -79,7 +79,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_test_test_case_mutliple_inputs)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_output_names_check)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/split_equal_parts_default.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/split_equal_parts_default.onnx"));
 
     std::size_t size = function->get_output_size();
     for (std::size_t i{0}; i < size; ++i)
@@ -92,7 +92,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_output_names_check)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_node_names_check)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/add_abc.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/add_abc.onnx"));
 
     // Filter out Add nodes from the function graph
     std::vector<std::shared_ptr<Node>> additions;
@@ -112,17 +112,6 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_node_names_check)
               std::unordered_set<std::string>{"Y"});
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_abc)
-{
-    auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/add_abc.prototxt"));
-
-    auto test_case = test::TestCase<TestEngine>(function);
-    test_case.add_multiple_inputs(Inputs{{1}, {2}, {3}});
-    test_case.add_expected_output(Shape{1}, std::vector<float>{6});
-    test_case.run();
-}
-
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_binary_add_abc)
 {
     auto function =
@@ -137,7 +126,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_binary_add_abc)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_bool_const_op)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/bool_const_op.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/bool_const_op.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output(std::vector<bool>{1, 0, 0, 1});
@@ -147,7 +136,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_bool_const_op)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_bool_init_and)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/bool_init_and.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/bool_init_and.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output(std::vector<bool>{1});
@@ -157,7 +146,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_bool_init_and)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_bool_input_or)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/bool_input_or.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/bool_input_or.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<bool>{true, false, true, false});
@@ -169,7 +158,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_bool_input_or)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_bool_init_raw)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/bool_init_raw.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/bool_init_raw.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output(std::vector<bool>{true, false, true});
@@ -179,7 +168,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_bool_init_raw)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_abc_initializers)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/add_abc_initializers.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/add_abc_initializers.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1, 2, 3, 4});
@@ -202,7 +191,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_override_op)
         });
 
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/override_op.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/override_op.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(std::vector<float>{0.f, 1.f, 2.f, 3.f});
@@ -233,7 +222,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_unsupported_op)
     try
     {
         onnx_import::import_onnx_model(
-            file_util::path_join(SERIALIZED_ZOO, "onnx/unsupported_op.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/unsupported_op.onnx"));
         FAIL() << "Expected ngraph::ngraph_error";
     }
     catch (ngraph::ngraph_error const& err)
@@ -258,7 +247,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_custom_op)
         });
 
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/custom_operator.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/custom_operator.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f});
@@ -275,7 +264,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_custom_op_register_unregister)
         });
 
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/custom_operator.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/custom_operator.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f});
@@ -286,7 +275,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_custom_op_register_unregister)
     try
     {
         auto function = onnx_import::import_onnx_model(
-            file_util::path_join(SERIALIZED_ZOO, "onnx/custom_operator.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/custom_operator.onnx"));
         FAIL() << "Expected ngraph::ngraph_error";
     }
     catch (ngraph::ngraph_error const& err)
@@ -309,7 +298,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_custom_op_default_domain)
         });
 
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/custom_operator_default_domain.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/custom_operator_default_domain.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f});
@@ -359,7 +348,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_missing_op_domain)
     EXPECT_TRUE(onnx_import::is_operator_supported("CustomAdd", 1, "custom.op"));
 
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/missing_op_domain.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/missing_op_domain.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(std::vector<float>{0.f, 1.f, 2.f, 3.f});
@@ -375,7 +364,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_unknown_domain)
 {
     // the importer should not throw when it encounters an unknown domain in the model
     EXPECT_NO_THROW(onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/unknown_domain.prototxt")));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/unknown_domain.onnx")));
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_op_in_unknown_domain)
@@ -383,7 +372,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_op_in_unknown_domain)
     try
     {
         onnx_import::import_onnx_model(
-            file_util::path_join(SERIALIZED_ZOO, "onnx/unknown_domain_add.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/unknown_domain_add.onnx"));
 
         FAIL() << "The onnx_importer did not throw for unknown domain and op";
     }
@@ -433,7 +422,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_missing_input)
         });
 
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/missing_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/missing_input.onnx"));
 
     Inputs inputs{{1, 2, 3, 4}, {5, 6, 7, 8}};
 
@@ -447,7 +436,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_initializer_wo_input)
 {
     // This test checks a model which has an initializer, but no input with the same name
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/initializer_wo_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/initializer_wo_input.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0, 1, 2, 3, 4, 5});
@@ -458,7 +447,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_initializer_wo_input)
 NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamicquantizelinear.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamicquantizelinear.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({-1.f, -2.1f, -1.3f, -2.5f, -3.34f, -4.f});
@@ -472,7 +461,7 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_addmul_abc)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/addmul_abc.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/addmul_abc.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({9, 10, 11, 12});
@@ -485,7 +474,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_addmul_abc)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_no_keepdims)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/argmin_no_keepdims.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/argmin_no_keepdims.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({2, 1, 3, 10});
@@ -497,7 +486,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_batch_norm_default)
 {
     // Batch Normalization with default parameters
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/batchnorm_default.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/batchnorm_default.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({-1.f, 0.f, 1.f, 2.f, 3.f, 4.f}); // data {1, 2, 1, 3}
@@ -514,7 +503,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_relu)
 {
     // Simple ReLU test
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/relu.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/relu.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({-1, -2, 0, 1, 2, 3});
@@ -526,7 +515,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sum_opset1)
 {
     // Simple Sum test for opset1.
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/sum_opset1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/sum_opset1.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({3.f, 0.f, 2.f});
@@ -540,7 +529,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sum)
 {
     // Simple Sum test for opset8.
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/sum.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/sum.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({3.f});
@@ -553,7 +542,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sum)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sum_one_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/sum_one_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/sum_one_input.onnx"));
 
     // input data shape (3, )
     auto test_case = test::TestCase<TestEngine>(function);
@@ -565,7 +554,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sum_one_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_cum_sum_1d)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/cum_sum_1d.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/cum_sum_1d.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f, 2.f, 3.f});
@@ -576,7 +565,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_cum_sum_1d)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_cum_sum_2d_axis_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/cum_sum_2d_axis_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/cum_sum_2d_axis_input.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
@@ -587,7 +576,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_cum_sum_2d_axis_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_cum_sum_2d_dynamic_axis_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/cum_sum_2d_dynamic_axis_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/cum_sum_2d_dynamic_axis_input.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
@@ -599,7 +588,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_cum_sum_2d_dynamic_axis_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_cum_sum_3d_exclusive_reverse)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/cum_sum_3d_exclusive_reverse.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/cum_sum_3d_exclusive_reverse.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f,  2.f,  3.f,  4.f,  5.f,  6.f,  7.f,  8.f,
@@ -614,7 +603,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_cum_sum_3d_exclusive_reverse)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_min_two_inputs_opset1)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/min_two_inputs_opset1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/min_two_inputs_opset1.onnx"));
 
     // input data shape (3, )
     auto test_case = test::TestCase<TestEngine>(function);
@@ -627,7 +616,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_min_two_inputs_opset1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_min_two_inputs)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/min_two_inputs.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/min_two_inputs.onnx"));
 
     // input data shape (3, )
     auto test_case = test::TestCase<TestEngine>(function);
@@ -640,7 +629,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_min_two_inputs)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_max_opset1)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/max_opset1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/max_opset1.onnx"));
 
     // input data shape (3, )
     auto test_case = test::TestCase<TestEngine>(function);
@@ -655,7 +644,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_max_opset1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_max)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/max.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/max.onnx"));
 
     // input data shape (3, )
     auto test_case = test::TestCase<TestEngine>(function);
@@ -670,7 +659,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_max)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mean_opset1)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mean_opset1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mean_opset1.onnx"));
 
     // input data shape (3, )
     auto test_case = test::TestCase<TestEngine>(function);
@@ -685,7 +674,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mean_opset1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mean)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/mean.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/mean.onnx"));
 
     // input data shape (3, )
     auto test_case = test::TestCase<TestEngine>(function);
@@ -700,7 +689,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mean)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gemm_abc)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gemm_abc.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gemm_abc.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(test::NDArray<float, 2>(
@@ -732,7 +721,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gemm_abc)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/matmul.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/matmul.onnx"));
 
     std::vector<std::vector<float>> inputs;
 
@@ -755,7 +744,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_0D)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_0D.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_0D.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>({1.0});
@@ -765,7 +754,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_0D)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_1D)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_1D.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_1D.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({-1.0, 0.0, 1.0});
@@ -792,7 +781,7 @@ namespace
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_0.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_0.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(SOFTMAX_INPUT);
@@ -817,7 +806,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_1.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(SOFTMAX_INPUT);
@@ -842,7 +831,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_1D)
 {
     ASSERT_THROW(onnx_import::import_onnx_model(
-                     file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_invalid_axis_1D.prototxt")),
+                     file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_invalid_axis_1D.onnx")),
                  ngraph::ngraph_error)
         << "Softmax model with invalid axis was successfully imported while it should have thrown.";
 }
@@ -850,7 +839,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_1D)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_3D)
 {
     ASSERT_THROW(onnx_import::import_onnx_model(
-                     file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_invalid_axis_3D.prototxt")),
+                     file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_invalid_axis_3D.onnx")),
                  ngraph::ngraph_error)
         << "Softmax model with invalid axis was successfully imported while it should have thrown.";
 }
@@ -858,7 +847,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_3D)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/sub.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/sub.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(test::NDArray<float, 3>({{{1, 2, 3}}}).get_vector());
@@ -876,7 +865,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/div.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/div.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(test::NDArray<float, 3>({{{1, 2, 3}}}).get_vector());
@@ -893,7 +882,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_bcast)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/add_bcast.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/add_bcast.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(test::NDArray<float, 3>(
@@ -920,7 +909,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_bcast)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_nonmaxsuppression_center_point_box_format)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/nonmaxsuppression_center_point_box_format.prototxt"));
+        SERIALIZED_ZOO, "onnx/nonmaxsuppression_center_point_box_format.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
 
@@ -939,7 +928,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_nonmaxsuppression_center_point_box_forma
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_nonmaxsuppression_single_box)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/nonmaxsuppression_single_box.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/nonmaxsuppression_single_box.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
 
@@ -956,7 +945,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_nonmaxsuppression_single_box)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_log_sum)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_log_sum.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_log_sum.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs{
@@ -975,7 +964,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_log_sum)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_log_sum_exp)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_log_sum_exp.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_log_sum_exp.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs{
@@ -994,7 +983,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_log_sum_exp)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_l1)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_l1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_l1.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs{
@@ -1013,7 +1002,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_l1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_l2)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_l2.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_l2.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs{
@@ -1032,7 +1021,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_l2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_max)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_max.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_max.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs{
@@ -1051,14 +1040,14 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_max)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_max_invalid_axes)
 {
     EXPECT_THROW(onnx_import::import_onnx_model(
-                     file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_max_invalid_axes.prototxt")),
+                     file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_max_invalid_axes.onnx")),
                  ngraph::ngraph_error);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_mean)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_mean.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_mean.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs{
@@ -1077,7 +1066,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_mean)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_min)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_min.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_min.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs{
@@ -1096,7 +1085,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_min)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_prod)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_prod.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_prod.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs{
@@ -1115,7 +1104,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_prod)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs{
@@ -1134,7 +1123,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_dynamic_rank_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_dynamic_rank_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_dynamic_rank_input.onnx"));
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{1, 1, 4, 4},
                                {1.0f,
@@ -1161,7 +1150,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_dynamic_rank_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_square)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_square.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_square.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs{
@@ -1180,7 +1169,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_square)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_constant)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_constant.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_constant.onnx"));
 
     Inputs inputs{test::NDArray<float, 4>({{{{1.0f, 1.0f, 1.0f, 1.0f},
                                              {1.0f, 1.0f, 1.0f, 1.0f},
@@ -1199,7 +1188,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_constant)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_constant_single_axis)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_constant_single_axis.prototxt"));
+        SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_constant_single_axis.onnx"));
 
     Inputs inputs{
         test::NDArray<float, 3>({{{1, 2, 3}, {4, 5, 6}}, {{7, 8, 9}, {10, 11, 12}}}).get_vector()};
@@ -1215,7 +1204,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_constant_single_ax
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_constant_keepdims_off)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_constant_keepdims_off.prototxt"));
+        SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_constant_keepdims_off.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs{test::NDArray<float, 4>({{{{1.0f, 1.0f, 1.0f, 1.0f},
@@ -1235,7 +1224,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_constant_keepdims_
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_input.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>({1.0f, 2.0f, 3.0f, 4.0f});
@@ -1248,7 +1237,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_0_dim_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_0_dim_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_as_0_dim_input.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(
@@ -1263,7 +1252,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_as_0_dim_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_input_dynamic)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_input_dynamic.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_input_dynamic.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     test_case.add_input<int64_t>({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
@@ -1276,7 +1265,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_input_dynamic)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_empty)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_empty.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_empty.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.0f,
@@ -1303,7 +1292,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_empty)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_empty_dynamic_rank_input)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_empty_dynamic_rank_input.prototxt"));
+        SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_empty_dynamic_rank_input.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{1, 1, 4, 4},
@@ -1331,7 +1320,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_empty_dynamic_rank_in
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_empty_with_noop)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_empty_with_noop.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_empty_with_noop.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f,
@@ -1374,7 +1363,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_empty_with_noop)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_empty_without_noop)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_empty_without_noop.prototxt"));
+        SERIALIZED_ZOO, "onnx/reduce_sum_13_axes_empty_without_noop.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f,
@@ -1402,7 +1391,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reduce_sum_13_axes_empty_without_noop)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize10_asymertic_last_dim)
 {
     const auto function = onnx_import::import_onnx_model(
-    file_util::path_join(SERIALIZED_ZOO, "onnx/resize10_asymertic_last_dim.prototxt"));
+    file_util::path_join(SERIALIZED_ZOO, "onnx/resize10_asymertic_last_dim.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     std::vector<float> input_data{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f};
@@ -1416,7 +1405,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize10_asymertic_last_dim)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize10_asymertic_dim_in_the_middle)
 {
     const auto function = onnx_import::import_onnx_model(
-    file_util::path_join(SERIALIZED_ZOO, "onnx/resize10_asymertic_dim_in_the_middle.prototxt"));
+    file_util::path_join(SERIALIZED_ZOO, "onnx/resize10_asymertic_dim_in_the_middle.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     std::vector<float> input_data{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f};
@@ -1435,7 +1424,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_empty_constant_as_input)
     // this node is connected to the "roi" input of the Resize op but this input should be
     // ignored since the Resize coordinate_transformation_mode is set to asymmetric
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_empty_constant_as_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_empty_constant_as_input.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     std::vector<float> input_data{1.0f, 3.0f, 4.0f, 8.0f, 6.0f, 2.0f, 7.0f, 11.0f};
@@ -1456,7 +1445,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_empty_constant_as_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize10_down_scales_const_nearest)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/resize10_down_scales_const_nearest.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/resize10_down_scales_const_nearest.onnx"));
 
     // Input data shape (1, 1, 2, 4)
     // Input const scales values {1.0, 1.0, 0.6, 0.6}
@@ -1472,7 +1461,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize10_down_scales_const_nearest)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize10_up_scales_const_linear)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/resize10_up_scales_const_linear.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/resize10_up_scales_const_linear.onnx"));
 
     // Input data shape (1, 1, 2, 2)
     // Input const scales values {1.0, 1.0, 2.0, 2.0}
@@ -1490,7 +1479,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize10_up_scales_const_linear)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize10_up_scales_const_nearest)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/resize10_up_scales_const_nearest.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/resize10_up_scales_const_nearest.onnx"));
 
     // Input data shape (1, 1, 2, 2)
     // Input const scales values {1.0, 1.0, 2.0, 3.0}
@@ -1509,7 +1498,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize10_up_scales_const_nearest)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_down_scales_linear_asymmetric)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_down_scales_linear_asymmetric.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_down_scales_linear_asymmetric.onnx"));
 
     const Shape expected_output_shape{1, 1, 1, 2};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1525,7 +1514,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_down_scales_linear_asymmetric)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_scales_nearest_asymmetric_floor_dynamic_sizes)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_scales_nearest_asymmetric_floor_dynamic_scales.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_scales_nearest_asymmetric_floor_dynamic_scales.onnx"));
 
     const Shape expected_output_shape{2, 1, 4, 1};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1543,7 +1532,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_scales_nearest_asymmetric_floor_dynam
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_scales_linear_asymmetric)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_up_scales_linear_asymmetric.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_up_scales_linear_asymmetric.onnx"));
 
     const Shape expected_output_shape{2, 1, 4, 8};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1565,7 +1554,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_scales_linear_asymmetric)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_scales_nearest_asymmetric_floor)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_scales_nearest_asymmetric_floor.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_scales_nearest_asymmetric_floor.onnx"));
 
     const Shape expected_output_shape{2, 1, 4, 1};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1580,7 +1569,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_scales_nearest_asymmetric_floor)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_scales_cubic_align_corners)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_up_scales_cubic_align_corners.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_up_scales_cubic_align_corners.onnx"));
 
     const Shape expected_output_shape{1, 1, 8, 8};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1622,7 +1611,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_scales_cubic_align_corners)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_scales_tf_half_pixel)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_up_scales_tf_half_pixel.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_up_scales_tf_half_pixel.onnx"));
 
     const Shape expected_output_shape{1, 1, 8, 8};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1659,7 +1648,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_scales_tf_half_pixel)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_all_attributes_default)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_up_sizes_all_attributes_default.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_up_sizes_all_attributes_default.onnx"));
 
     const Shape expected_output_shape{1, 1, 7, 8};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1677,7 +1666,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_all_attributes_default)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_sizes_nearest_asymmetric_floor)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_sizes_nearest_asymmetric_floor.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_sizes_nearest_asymmetric_floor.onnx"));
 
     const Shape expected_output_shape{2, 1, 4, 1};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1692,7 +1681,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_sizes_nearest_asymmetric_floor)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_linear_asymmetric)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_up_sizes_linear_asymmetric.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_up_sizes_linear_asymmetric.onnx"));
 
     const Shape expected_output_shape{2, 1, 4, 8};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1712,7 +1701,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_linear_asymmetric)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_down_sizes_cubic_half_pixel)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_down_sizes_cubic_half_pixel.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_down_sizes_cubic_half_pixel.onnx"));
 
     const Shape expected_output_shape{1, 1, 3, 3};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1750,7 +1739,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_down_sizes_cubic_half_pixel)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_down_sizes_linear_pytorch_half_pixel)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_down_sizes_linear_pytorch_half_pixel.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_down_sizes_linear_pytorch_half_pixel.onnx"));
 
     const Shape expected_output_shape{1, 1, 3, 1};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1779,7 +1768,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_down_sizes_linear_pytorch_half_pixel)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_cubic_half_pixel)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_up_sizes_cubic_half_pixel.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_up_sizes_cubic_half_pixel.onnx"));
 
     const Shape expected_output_shape{1, 1, 9, 10};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1823,7 +1812,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_cubic_half_pixel)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_cubic_half_pixel_dynamic_sizes)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_up_sizes_cubic_half_pixel_dynamic_sizes.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_up_sizes_cubic_half_pixel_dynamic_sizes.onnx"));
 
     const Shape expected_output_shape{1, 1, 9, 10};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1868,7 +1857,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_cubic_half_pixel_dynamic_siz
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_nearest_round_prefer_floor_half_pixel)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_up_sizes_nearest_round_prefer_floor_half_pixel.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_up_sizes_nearest_round_prefer_floor_half_pixel.onnx"));
 
     const Shape expected_output_shape{1, 1, 7, 8};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1886,7 +1875,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_nearest_round_prefer_floor_h
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_nearest_prefer_ceil_asymmetric)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_up_sizes_nearest_prefer_ceil_asymmetric.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_up_sizes_nearest_prefer_ceil_asymmetric.onnx"));
 
     const Shape expected_output_shape{1, 1, 8, 8};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1923,7 +1912,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_nearest_prefer_ceil_asymmetr
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_nearest_ceil_half_pixel)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_up_sizes_nearest_ceil_half_pixel.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_up_sizes_nearest_ceil_half_pixel.onnx"));
 
     const Shape expected_output_shape{1, 1, 8, 8};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1957,7 +1946,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_nearest_ceil_half_pixel)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_nearest_floor_align_corners)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/resize11_up_sizes_nearest_floor_align_corners.prototxt"));
+        SERIALIZED_ZOO, "onnx/resize11_up_sizes_nearest_floor_align_corners.onnx"));
 
     const Shape expected_output_shape{1, 1, 8, 8};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -1991,7 +1980,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_nearest_floor_align_corners)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_down_sizes_tf_half_pixel)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_down_sizes_tf_half_pixel.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/resize11_down_sizes_tf_half_pixel.onnx"));
 
     const Shape expected_output_shape{1, 1, 3, 2};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -2020,7 +2009,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_down_sizes_tf_half_pixel)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_shape)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/shape.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/shape.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(test::NDArray<float, 3>(
@@ -2038,7 +2027,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_elu)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/elu.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/elu.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(
@@ -2087,7 +2076,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_elu)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_leaky_relu)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/leaky_relu.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/leaky_relu.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(
@@ -2120,7 +2109,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_leaky_relu)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_nd)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(
@@ -2152,7 +2141,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_nd)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_batch_nd_elementwise)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_batch_nd.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_batch_nd.onnx"));
 
     Inputs inputs;
     // Shape{2, 3, 4, 5}
@@ -2197,7 +2186,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_batch_nd_elementwise)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_1d)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_1d.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_1d.onnx"));
 
     Inputs inputs;
     // Shape{2, 3, 4, 5}
@@ -2236,7 +2225,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_1d)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_C_1_1)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_c_1_1.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/prelu_c_1_1.onnx"));
 
     Inputs inputs;
     // Shape{2, 3, 4, 5}
@@ -2275,7 +2264,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_C_1_1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_selu)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/selu.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/selu.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(
@@ -2318,7 +2307,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_selu)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sigmoid)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/sigmoid.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/sigmoid.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(
@@ -2395,7 +2384,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sigmoid)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_tanh)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/tanh.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/tanh.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(
@@ -2472,7 +2461,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_tanh)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_thresholded_relu)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/thresholded_relu.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/thresholded_relu.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(
@@ -2498,7 +2487,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_thresholded_relu)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_vec_ten3d)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_vec_ten3d.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_vec_ten3d.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(std::vector<float>{0.f, 1.f});
@@ -2516,7 +2505,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_matmul_vec_ten3d)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softplus)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/softplus.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softplus.onnx"));
 
     // -1.0f, 0, 1.0f, 10.f,                    normal input values for activation
     // 100.0f, -100.0f, 1000.0f, -1000.0f,      input values that leads to exp() overflow
@@ -2560,7 +2549,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softplus)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softplus_infinity)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/softplus.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softplus.onnx"));
 
     std::vector<float> input(13, std::numeric_limits<float>::infinity());
     std::vector<float> expected_output(13, std::numeric_limits<float>::infinity());
@@ -2574,7 +2563,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softplus_infinity)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sum_opset8)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/sum_opset8.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/sum_opset8.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(std::vector<float>{1.0f, 2.0f, 3.0f});
@@ -2599,7 +2588,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sum_opset8)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_int32)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/argmax_int32.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/argmax_int32.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<std::int32_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
@@ -2610,7 +2599,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_int32)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_int32)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/argmin_int32.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/argmin_int32.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<std::int32_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
@@ -2621,7 +2610,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_int32)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_float)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/argmax_float.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/argmax_float.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({4, 0.1, 2, 3, -3, 1, -0.9, 0, 1, 2, 3, 0});
@@ -2632,7 +2621,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_float)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_float)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/argmin_float.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/argmin_float.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({4, 0.1, 2, 3, -3, 1, -0.9, 0, 1, 2, 3, 0});
@@ -2643,7 +2632,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_float)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_select_last_index)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/argmax_select_last_index.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/argmax_select_last_index.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(Shape{4, 3}, {1, 1, 1, 0.5, 3, 4, 0.5, 1, 1.1, 0, 3, 0});
@@ -2654,7 +2643,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_select_last_index)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_select_last_index)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/argmin_select_last_index.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/argmin_select_last_index.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(Shape{4, 3}, {1, 1, 1, 2, 3, 4, 2, 1, 1.1, 3, 3, 8});
@@ -2665,7 +2654,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_select_last_index)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_top_k)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/top_k.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/top_k.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
@@ -2678,7 +2667,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_top_k)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_top_k_opset_10)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/top_k_opset_10.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/top_k_opset_10.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
@@ -2693,7 +2682,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_top_k_opset_10)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_top_k_opset_10_const_k)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/top_k_opset_10_const_k.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/top_k_opset_10_const_k.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
@@ -2707,7 +2696,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_top_k_opset_10_const_k)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_top_k_opset_11_const_k_smallest)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/top_k_opset_11_const_k_smallest.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/top_k_opset_11_const_k_smallest.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8});
@@ -2721,7 +2710,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_top_k_opset_11_const_k_smallest)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_top_k_opset_11_const_k_smallest_negative_axis)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/top_k_opset_11_const_k_smallest_negative_axis.prototxt"));
+        SERIALIZED_ZOO, "onnx/top_k_opset_11_const_k_smallest_negative_axis.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8});
@@ -2735,7 +2724,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_top_k_opset_11_const_k_smallest_negative_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_acosh)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/acosh.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/acosh.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(Shape{1, 3}, {1.0f, 2.5f, 4.3f});
@@ -2747,7 +2736,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_acosh)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_asinh)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/asinh.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/asinh.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(Shape{1, 3}, {-1.0f, 0.0f, 1.0f});
@@ -2759,7 +2748,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_asinh)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_atanh)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/atanh.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/atanh.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(Shape{1, 3}, {-0.9f, 0.0f, 0.9f});
@@ -2771,7 +2760,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_atanh)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sinh)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/sinh.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/sinh.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({-1.0f, 0.0f, 1.0f});
@@ -2782,7 +2771,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sinh)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_cosh)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/cosh.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/cosh.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({-1.0f, 0.0f, 1.0f});
@@ -2793,7 +2782,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_cosh)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sign)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/sign.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/sign.onnx"));
 
     Inputs inputs{std::vector<float>{-std::numeric_limits<float>::infinity(),
                                      -3.141592f,
@@ -2810,7 +2799,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sign)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_one_hot_with_axis)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/one_hot_axis.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/one_hot_axis.onnx"));
 
     Inputs inputs{{1.0, 9.0, 2.0, 4.0}, {1.0, 3.0}};
     std::vector<float> expected_output{{1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
@@ -2827,7 +2816,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_one_hot_with_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_one_hot_without_axis)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/one_hot_no_axis.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/one_hot_no_axis.onnx"));
 
     std::vector<std::vector<std::int64_t>> inputs{{0, 7, 8}, {2, 5}};
     std::vector<std::int64_t> expected_output{5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -2842,7 +2831,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_one_hot_without_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_where)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/where.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/where.onnx"));
 
     // conditions tensor - 3x3x3
     auto condition = std::vector<int>{
@@ -2871,7 +2860,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_where)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_erf)
 {
     const auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/erf.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/erf.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(test::NDArray<float, 2>{
@@ -2893,7 +2882,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_erf)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_erf_int32)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/erf_int32.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/erf_int32.onnx"));
 
     const std::vector<std::vector<int32_t>> inputs{
         {-std::numeric_limits<int32_t>::max(), -1, 0, 1, std::numeric_limits<int32_t>::max()}};
@@ -2909,7 +2898,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_erf_int32)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_shrink_float)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/shrink_float.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/shrink_float.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(
@@ -2923,7 +2912,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_shrink_float)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_shrink_int)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/shrink_int.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/shrink_int.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<int>({-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5});
@@ -2935,7 +2924,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_shrink_int)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_p1)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lp_norm_p1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lp_norm_p1.onnx"));
 
     Shape data_shape{2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
@@ -2955,7 +2944,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_p1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_p2)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lp_norm_p2.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lp_norm_p2.onnx"));
 
     Shape data_shape{2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
@@ -2975,7 +2964,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_p2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_default)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lp_norm_default.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lp_norm_default.onnx"));
 
     Shape data_shape{2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
@@ -2995,7 +2984,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_default)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_default_dynamic)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lp_norm_default_dynamic.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lp_norm_default_dynamic.onnx"));
 
     Shape data_shape{2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
@@ -3015,7 +3004,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_default_dynamic)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/instance_norm.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/instance_norm.onnx"));
 
     Shape data_shape{1, 2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
@@ -3038,7 +3027,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_instance_normalization_dynamic)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/instance_norm_dynamic.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/instance_norm_dynamic.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     std::vector<float> input_data{1.f, 2.f, 3.f};
@@ -3051,7 +3040,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_instance_normalization_dynamic)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_eye_like)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/eye_like.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/eye_like.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(
@@ -3063,7 +3052,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_eye_like)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reverse_sequence_0_batch_1)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reverse_sequence_time_0_batch_1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reverse_sequence_time_0_batch_1.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>(
@@ -3079,7 +3068,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reverse_sequence_0_batch_1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reverse_sequence_1_batch_0)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reverse_sequence_time_1_batch_0.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reverse_sequence_time_1_batch_0.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>(
@@ -3095,7 +3084,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reverse_sequence_1_batch_0)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reverse_sequence_incorrect_batch_axis)
 {
     EXPECT_THROW(onnx_import::import_onnx_model(file_util::path_join(
-                     SERIALIZED_ZOO, "onnx/reverse_sequence_incorrect_batch_axis.prototxt")),
+                     SERIALIZED_ZOO, "onnx/reverse_sequence_incorrect_batch_axis.onnx")),
                  ngraph_error)
         << "ReverseSequence batch_axis attribute can only equal 0 or 1. Value of '2' is not "
            "accepted.";
@@ -3104,7 +3093,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reverse_sequence_incorrect_batch_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reverse_sequence_incorrect_time_axis)
 {
     EXPECT_THROW(onnx_import::import_onnx_model(file_util::path_join(
-                     SERIALIZED_ZOO, "onnx/reverse_sequence_incorrect_time_axis.prototxt")),
+                     SERIALIZED_ZOO, "onnx/reverse_sequence_incorrect_time_axis.onnx")),
                  ngraph_error)
         << "ReverseSequence time_axis attribute can only equal 0 or 1. Value of '2' is not "
            "accepted.";
@@ -3113,7 +3102,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reverse_sequence_incorrect_time_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reverse_sequence_time_and_batch_axis_equal)
 {
     EXPECT_THROW(onnx_import::import_onnx_model(file_util::path_join(
-                     SERIALIZED_ZOO, "onnx/reverse_sequence_time_and_batch_axis_equal.prototxt")),
+                     SERIALIZED_ZOO, "onnx/reverse_sequence_time_and_batch_axis_equal.onnx")),
                  ngraph_error)
         << "ReverseSequence 'time_axis' and 'batch_axis' can't be equal.";
 }
@@ -3121,7 +3110,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reverse_sequence_time_and_batch_axis_equ
 NGRAPH_TEST(${BACKEND_NAME}, onnx_matmul_float_type)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_float.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/matmul_float.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(std::vector<float>{0, 1, 2, 3, 4, 5});
@@ -3134,7 +3123,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_matmul_float_type)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<int32_t>({-4, 7, 5, 4, -7, 8});
@@ -3147,7 +3136,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_i64)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_i64.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_i64.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<int64_t>({-4, 7, 5, 4, -7, 8});
@@ -3160,7 +3149,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_i64)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_broadcast)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_broadcast.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_broadcast.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<int32_t>({-8, 3, 4, 9, -17, 1});
@@ -3175,7 +3164,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_f32)
     try
     {
         const auto function = onnx_import::import_onnx_model(
-            file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_f32.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_f32.onnx"));
         FAIL() << "Expected exception was not thrown";
     }
     catch (const ngraph::ngraph_error& e)
@@ -3194,7 +3183,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_f32)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_fmod)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_fmod.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_fmod.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<int32_t>({-8, 3, 4, 9, -17, 1});
@@ -3207,7 +3196,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_fmod)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_fmod_broadcast)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_fmod_broadcast.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_fmod_broadcast.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<int32_t>({-8, 3, 4, 9, -17, 1});
@@ -3220,7 +3209,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_fmod_broadcast)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_fmod_f32)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_fmod_f32.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mod_sign_fmod_f32.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({-4.3, 7.2, 5.0, 4.3, -7.2, 8.0});
@@ -3236,7 +3225,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_incorrect_fmod)
     try
     {
         const auto function = onnx_import::import_onnx_model(
-            file_util::path_join(SERIALIZED_ZOO, "onnx/mod_incorrect_fmod.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/mod_incorrect_fmod.onnx"));
         FAIL() << "Expected exception was not thrown";
     }
     catch (const ngraph::ngraph_error& e)
@@ -3253,7 +3242,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_incorrect_fmod)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_scatterND_param_i64_indices)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/scatter_nd_param_i64_indices.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/scatter_nd_param_i64_indices.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f});
@@ -3267,7 +3256,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_scatterND_param_i64_indices)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_scatterND_const_i32_indices)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/scatter_nd_const_i32_indices.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/scatter_nd_const_i32_indices.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f});
@@ -3280,7 +3269,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_scatterND_const_i32_indices)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_elements_float_1D)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_float_1D.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_float_1D.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>(Shape{3}, {1, 2, 3});
@@ -3293,7 +3282,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_elements_float_1D)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_elements_int8_axis_1)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_int8_axis_1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_int8_axis_1.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<int8_t>(Shape{2, 2}, {1, 2, 3, 4});
@@ -3306,7 +3295,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_elements_int8_axis_1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_elements_int32_axis_0)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_int32_axis_0.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_int32_axis_0.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<int32_t>(Shape{3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
@@ -3319,7 +3308,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_elements_int32_axis_0)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_elements_float_negative_axis)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_float_negative_axis.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_float_negative_axis.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>(Shape{2, 2}, {1, 2, 3, 4});
@@ -3332,7 +3321,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_elements_float_negative_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_elements_float_3D_axis_2)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_float_3D_axis_2.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gather_elements_float_3D_axis_2.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>(Shape{2, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8});
@@ -3345,7 +3334,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_elements_float_3D_axis_2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gatherND_int32)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gatherND_int32.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gatherND_int32.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<int32_t>({0, 1, 2, 3});
@@ -3358,7 +3347,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gatherND_int32)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gatherND_float)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gatherND_float.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gatherND_float.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f});
@@ -3371,7 +3360,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gatherND_float)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_pad_constant)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/pad_constant.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/pad_constant.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({1.f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f});
@@ -3384,7 +3373,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_pad_constant)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_pow_float32_float32)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/pow_float32_float32.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/pow_float32_float32.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f}); // base
@@ -3398,7 +3387,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_pow_float32_float32)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_pow_float32_int32)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/pow_float32_int32.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/pow_float32_int32.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f}); // base
@@ -3412,7 +3401,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_pow_float32_int32)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_pow_int32_float32)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/pow_int32_float32.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/pow_int32_float32.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<int>({1, 2, 3, 4}); // base
@@ -3426,7 +3415,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_pow_int32_float32)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reciprocal)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reciprocal.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reciprocal.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f});
@@ -3439,7 +3428,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reciprocal)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_round)
 {
     const auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/round.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/round.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>(
@@ -3453,7 +3442,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_round)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_round_half_nearest_even)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/round_half_nearest_even.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/round_half_nearest_even.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({0.5f, 2.5f, -1.5f, -2.5f});
@@ -3465,7 +3454,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_round_half_nearest_even)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_scatter10_import_only)
 {
     const auto scatter_fn = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/scatter_opset10.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/scatter_opset10.onnx"));
 
     const Shape data_shape{2, 2};
 
@@ -3478,7 +3467,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_scatter10_import_only)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_scatter_elements_import_only)
 {
     const auto scatter_fn = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/scatter_elements_opset11.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/scatter_elements_opset11.onnx"));
 
     const Shape data_shape{1, 5};
 
@@ -3492,7 +3481,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample6_nearest_infer)
 {
     // clang-format off
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample6_nearest.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample6_nearest.onnx"));
     // height_scale: 2.0
     // width_scale: 3.0
     // mode: nearest
@@ -3516,7 +3505,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample6_bilinear_infer)
 {
     // clang-format off
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample6_bilinear.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample6_bilinear.onnx"));
     // height_scale: 2.0
     // width_scale: 3.0
     // mode: bilinear
@@ -3540,7 +3529,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample6_dynamic)
 {
     // clang-format off
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample6_dynamic.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample6_dynamic.onnx"));
     // height_scale: 1.5
     // width_scale: 2.5
     // mode: nearest
@@ -3564,7 +3553,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample6_dynamic)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample8_nearest_infer)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample8_nearest.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample8_nearest.onnx"));
 
     // Input data shape (1, 1, 2, 2)
     // Scales attribute values {1.0, 1.0, 2.0, 3.0}
@@ -3582,7 +3571,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample8_nearest_infer)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample8_linear_infer)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample8_linear.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample8_linear.onnx"));
 
     // Input data shape (1, 1, 2, 2)
     // Scales attribute values {1.0, 1.0, 2.0, 2.0}
@@ -3600,7 +3589,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample8_linear_infer)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample9_scales_const_nearest_infer)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample9_scales_const_nearest.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample9_scales_const_nearest.onnx"));
 
     // Input data shape (1, 1, 2, 2)
     // Input const scales values {1.0, 1.0, 2.0, 3.0}
@@ -3618,7 +3607,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample9_scales_const_nearest_infer)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample9_scales_const_linear_infer)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample9_scales_const_linear.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/upsample9_scales_const_linear.onnx"));
 
     // Input data shape (1, 1, 2, 2)
     // Input const scales values {1.0, 1.0, 2.0, 2.0}
@@ -3636,7 +3625,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample9_scales_const_linear_infer)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_image_scaler)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/image_scaler.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/image_scaler.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.0, 2.0, 3.0, 4.0, 10.0, 20.0, 30.0, 40.0});
@@ -3648,7 +3637,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_image_scaler)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_single)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/size_op_single.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/size_op_single.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(Shape{2, 3}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
@@ -3659,7 +3648,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_single)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_graph_end)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/size_op_graph_end.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/size_op_graph_end.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.0, 2.0, 3.0, 4.0});
@@ -3670,7 +3659,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_graph_end)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_graph_middle)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/size_op_graph_middle.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/size_op_graph_middle.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.0, 2.0, 3.0, 4.0});
@@ -3681,7 +3670,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_graph_middle)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_on_input_graph_middle)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/size_op_on_input_graph_middle.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/size_op_on_input_graph_middle.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(Shape{1, 2, 4, 1, 3},
@@ -3699,7 +3688,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_empty_initializers_handling)
     // this input should be ignored since the "sizes" optional input is provided
     // and the inference should use the data from the latter
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/empty_initializers_handling.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/empty_initializers_handling.onnx"));
 
     const Shape expected_output_shape{2, 1, 4, 8};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -3719,7 +3708,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_empty_initializers_handling)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_roi_align_f32)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/roi_align_f32.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/roi_align_f32.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.,  10., 11., 12.,
@@ -3761,7 +3750,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_roi_align_f32)
 NGRAPH_TEST(${BACKEND_NAME}, quant_dequant_pattern)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/quant_dequant_pattern.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quant_dequant_pattern.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
     // scale == 3.0
     // zero point == 10
@@ -3774,7 +3763,7 @@ NGRAPH_TEST(${BACKEND_NAME}, quant_dequant_pattern)
 NGRAPH_TEST(${BACKEND_NAME}, quant_dequant_pattern_axis)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/quant_dequant_pattern_axis.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quant_dequant_pattern_axis.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
     // axis = 1
     // scale == {2.0, 3.0, 4.0}
@@ -3788,7 +3777,7 @@ NGRAPH_TEST(${BACKEND_NAME}, quant_dequant_pattern_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax_0D)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_0D.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_0D.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({3.141592});
@@ -3799,7 +3788,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax_0D)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax_1D)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/logsoftmax_1D.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/logsoftmax_1D.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({-1.0f, 0.0f, 1.0f});
@@ -3810,7 +3799,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax_1D)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_1D)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/logsoftmax13_1D.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/logsoftmax13_1D.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({-1.0f, 0.0f, 1.0f});
@@ -3821,7 +3810,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_1D)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_2D)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/logsoftmax13_2D.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/logsoftmax13_2D.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({0.0f, 1.0f, 2.0f, 3.0f, 10000, 10001, 10002, 10003});
@@ -3840,7 +3829,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_2D)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_2D_reshape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/logsoftmax13_2D.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/logsoftmax13_2D.onnx"));
     InferenceEngine::CNNNetwork net(function);
     InferenceEngine::ICNNNetwork::InputShapes shapes = {};
     InferenceEngine::SizeVector shape = {1, 1, 4000};
@@ -3852,7 +3841,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_2D_reshape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_hard_sigmoid)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/hard_sigmoid.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/hard_sigmoid.onnx"));
 
     const auto inf = std::numeric_limits<float>::infinity();
     const auto neg_inf = -std::numeric_limits<float>::infinity();
@@ -3867,7 +3856,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_hard_sigmoid)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v6.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v6.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({1.0f, 2.0f, 3.0f});
@@ -3879,7 +3868,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_axis_1)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v6_broadcast_axis_1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v6_broadcast_axis_1.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{1, 3, 2, 2};
@@ -3895,7 +3884,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_axis_1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_axes_1_2)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v6_broadcast_axes_1_2.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v6_broadcast_axes_1_2.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{1, 3, 2, 2};
@@ -3910,7 +3899,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_axes_1_2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_no_axis)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v6_broadcast_no_axis.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v6_broadcast_no_axis.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{2, 2};
@@ -3925,7 +3914,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_no_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v7)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v7.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v7.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({1.0f, 2.0f, 3.0f});
@@ -3937,7 +3926,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v7)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v7_broadcast)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v7_broadcast.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mul_v7_broadcast.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{1, 2, 3};
@@ -3952,7 +3941,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v7_broadcast)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_axis_1)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/add_v6_broadcast_axis_1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/add_v6_broadcast_axis_1.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{1, 3, 2, 2};
@@ -3968,7 +3957,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_axis_1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_axes_1_2)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/add_v6_broadcast_axes_1_2.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/add_v6_broadcast_axes_1_2.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{1, 3, 2, 2};
@@ -3983,7 +3972,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_axes_1_2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_no_axis)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/add_v6_broadcast_no_axis.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/add_v6_broadcast_no_axis.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{2, 2};
@@ -3998,7 +3987,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_no_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v7)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/add_v7.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/add_v7.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({1.0f, 2.0f, 3.0f});
@@ -4010,7 +3999,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v7)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_axis_1)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/sub_v6_broadcast_axis_1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/sub_v6_broadcast_axis_1.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{1, 3, 2, 2};
@@ -4026,7 +4015,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_axis_1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_axes_1_2)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/sub_v6_broadcast_axes_1_2.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/sub_v6_broadcast_axes_1_2.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{1, 3, 2, 2};
@@ -4041,7 +4030,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_axes_1_2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_no_axis)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/sub_v6_broadcast_no_axis.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/sub_v6_broadcast_no_axis.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{2, 2};
@@ -4056,7 +4045,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_no_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v7)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/sub_v7.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/sub_v7.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({1.0f, 2.0f, 3.0f});
@@ -4068,7 +4057,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v7)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v7_broadcast)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/sub_v7_broadcast.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/sub_v7_broadcast.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{1, 2, 3};
@@ -4083,7 +4072,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v7_broadcast)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_axis_1)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/div_v6_broadcast_axis_1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/div_v6_broadcast_axis_1.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{1, 3, 2, 2};
@@ -4100,7 +4089,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_axis_1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_axes_1_2)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/div_v6_broadcast_axes_1_2.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/div_v6_broadcast_axes_1_2.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{1, 3, 2, 2};
@@ -4116,7 +4105,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_axes_1_2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_no_axis)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/div_v6_broadcast_no_axis.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/div_v6_broadcast_no_axis.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{2, 2};
@@ -4131,7 +4120,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_no_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v7)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/div_v7.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/div_v7.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input<float>({1.0f, 2.0f, 3.0f});
@@ -4143,7 +4132,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v7)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v7_broadcast)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/div_v7_broadcast.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/div_v7_broadcast.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     Shape shape{1, 2, 3};
@@ -4158,7 +4147,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v7_broadcast)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dangling_parameter)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dangling_parameter.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dangling_parameter.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -4170,7 +4159,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dangling_parameter)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_inbounds)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/test_clip_inbounds.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/test_clip_inbounds.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     const std::vector<int32_t> data{-1, 0, 1, -9999, 9999};
@@ -4182,7 +4171,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_inbounds)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_mvn_v6)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/mvn_v6.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/mvn_v6.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(
@@ -4202,7 +4191,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_mvn_v6)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout1_no_training_no_return_mask)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout1_no_training_no_return_mask.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout1_no_training_no_return_mask.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     const std::vector<float> data(3 * 4 * 5, 2.0f);
@@ -4214,7 +4203,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout1_no_training_no_return_mask)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout1_no_training_return_mask)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout1_no_training_return_mask.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout1_no_training_return_mask.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     const std::vector<float> data(3 * 4 * 5, 2.0f);
@@ -4228,7 +4217,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout1_no_training_return_mask)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout7_no_return_mask)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout7_no_return_mask.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout7_no_return_mask.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     const std::vector<float> data(3 * 4 * 5, 2.0f);
@@ -4240,7 +4229,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout7_no_return_mask)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout12_no_training_no_return_mask)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout12_no_training_no_return_mask.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout12_no_training_no_return_mask.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     const std::vector<float> data(3 * 4 * 5, 2.0f);
@@ -4252,7 +4241,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout12_no_training_no_return_mask)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout12_no_training_return_mask)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout12_no_training_return_mask.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout12_no_training_return_mask.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     const std::vector<float> data(3 * 4 * 5, 2.0f);
@@ -4266,7 +4255,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout12_no_training_return_mask)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout12_no_traning_no_const_rato)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout12_no_traning_no_const_rato.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dropout12_no_traning_no_const_rato.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1, 2, 3, 4});
@@ -4281,7 +4270,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout12_training_mode)
     try
     {
         auto function = onnx_import::import_onnx_model(
-            file_util::path_join(SERIALIZED_ZOO, "onnx/dropout12_training_mode.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/dropout12_training_mode.onnx"));
         FAIL() << "Expected exception was not thrown";
     }
     catch (const ngraph::ngraph_error& e)
@@ -4300,7 +4289,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout12_not_const_training_mode)
     try
     {
         auto function = onnx_import::import_onnx_model(file_util::path_join(
-            SERIALIZED_ZOO, "onnx/dropout12_not_const_training_mode.prototxt"));
+            SERIALIZED_ZOO, "onnx/dropout12_not_const_training_mode.onnx"));
         FAIL() << "Expected exception was not thrown";
     }
     catch (const ngraph::ngraph_error& e)
@@ -4320,7 +4309,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_multiple_slices_last_layer)
     std::fill(data.begin(), data.end(), 1);
 
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/multiple_slices_last_layer.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/multiple_slices_last_layer.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
     std::vector<float> o1(1 * 320 * 320 * 21);
     std::fill(o1.begin(), o1.end(), 1);
@@ -4337,7 +4326,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_multiple_slices_last_layer)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_slice_const_axes_source)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/slice_const_axes_source.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/slice_const_axes_source.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(std::vector<float>{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f});
@@ -4348,7 +4337,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_slice_const_axes_source)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_softmax_crossentropy_loss_mean)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_crossentropy_loss_mean.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_crossentropy_loss_mean.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0.54881352186203,
@@ -4374,7 +4363,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_softmax_crossentropy_loss_mean)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_negativelog_likelihood_loss)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/negativelog_likelihood_loss.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/negativelog_likelihood_loss.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({
@@ -4395,7 +4384,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_negativelog_likelihood_loss)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_fill_input_as_shape_default_value)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/constant_fill_input_as_shape_default_value.prototxt"));
+        SERIALIZED_ZOO, "onnx/constant_fill_input_as_shape_default_value.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{1, 2, 3}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f});
@@ -4405,7 +4394,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_fill_input_as_shape_default_value)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_fill_input_as_shape_u8_type)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_fill_input_as_shape_u8_type.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_fill_input_as_shape_u8_type.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<uint8_t>(Shape{3, 1, 2}, {3, 3, 3, 3, 3, 3});
@@ -4415,7 +4404,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_fill_input_as_shape_u8_type)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_fill_extra_shape)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_fill_extra_shape.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_fill_extra_shape.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{3, 1, 2, 2, 1}, std::vector<float>(12, 3.0f));
@@ -4425,7 +4414,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_fill_extra_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_fill_shape_attribute)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_fill_shape_attribute.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_fill_shape_attribute.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<int32_t>(Shape{2, 3, 4}, std::vector<int32_t>(24, 5));
@@ -4435,7 +4424,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_fill_shape_attribute)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_float_tensor)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_float_tensor.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_float_tensor.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{2, 3}, {0.0f, 0.5f, 1.f, 1.5f, 2.f, 2.5f});
@@ -4445,7 +4434,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_float_tensor)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_bfloat_tensor)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_bfloat_tensor.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_bfloat_tensor.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<bfloat16>(Shape{2, 3}, {0.f, 5.f, 10.f, 15.f, 20.f, 25.f});
@@ -4455,7 +4444,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_bfloat_tensor)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_float_scalar)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_float_scalar.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_float_scalar.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{}, {0.5f});
@@ -4465,7 +4454,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_float_scalar)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_float_array)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_float_array.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_float_array.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{3}, {0.5f, 1.f, 1.5f});
@@ -4475,7 +4464,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_float_array)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_integer_scalar)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_integer_scalar.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_integer_scalar.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<std::int64_t>(Shape{}, {1});
@@ -4485,7 +4474,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_integer_scalar)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_integer_array)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_integer_array.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_integer_array.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<std::int64_t>(Shape{3}, {0, 1, 2});
@@ -4495,7 +4484,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_integer_array)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_2x2)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{2, 2}, {0.f, 5.f, 0.f, 0.f});
@@ -4505,7 +4494,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_2x2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{3, 4}, {1.f, 0.f, 0.f, 8.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.f, 0.f});
@@ -4515,7 +4504,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_3x4_linearized_indices)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float_3x4_linearized_indices.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float_3x4_linearized_indices.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{3, 4}, {1.f, 0.f, 0.f, 8.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.f, 0.f});
@@ -4525,7 +4514,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_3x4_linearized_in
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_int32_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_int32_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_int32_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<int32_t>(Shape{3, 4}, {1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 3, 0});
@@ -4535,7 +4524,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_int32_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_int64_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_int64_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_int64_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<int64_t>(Shape{3, 4}, {1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 3, 0});
@@ -4545,7 +4534,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_int64_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_boolean_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_boolean_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_boolean_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<bool>(Shape{3, 4}, {1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0});
@@ -4555,7 +4544,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_boolean_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float16_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float16_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float16_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<ngraph::float16>(Shape{3, 4}, {1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 3, 0});
@@ -4565,7 +4554,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float16_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_double_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_double_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_double_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<double>(Shape{3, 4}, {1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 3, 0});
@@ -4575,7 +4564,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_double_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_int8_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_int8_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_int8_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<int8_t>(Shape{3, 4}, {1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 3, 0});
@@ -4585,7 +4574,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_int8_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_int16_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_int16_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_int16_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<int16_t>(Shape{3, 4}, {1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 3, 0});
@@ -4595,7 +4584,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_int16_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_uint8_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_uint8_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_uint8_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<uint8_t>(Shape{3, 4}, {1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 3, 0});
@@ -4605,7 +4594,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_uint8_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_uint16_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_uint16_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_uint16_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<uint16_t>(Shape{3, 4}, {1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 3, 0});
@@ -4615,7 +4604,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_uint16_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_uint32_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_uint32_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_uint32_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<uint32_t>(Shape{3, 4}, {1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 3, 0});
@@ -4625,7 +4614,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_uint32_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_uint64_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_uint64_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_uint64_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<uint64_t>(Shape{3, 4}, {1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 3, 0});
@@ -4635,7 +4624,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_uint64_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_bfloat16_3x4)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_bfloat16_3x4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_bfloat16_3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<ngraph::bfloat16>(Shape{3, 4}, {1, 0, 0, 8, 0, 0, 0, 0, 0, 0, 3, 0});
@@ -4645,7 +4634,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_bfloat16_3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_8x17)
 {
     auto function = onnx_import::import_onnx_model(
-    file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float_8x17.prototxt"));
+    file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float_8x17.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{8, 17}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
@@ -4662,7 +4651,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_8x17)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_2x3x4)
 {
     auto function = onnx_import::import_onnx_model(
-    file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float_2x3x4.prototxt"));
+    file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float_2x3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{2, 3, 4}, {1.f, 0.f, 0.f, 8.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.f, 0.f,
@@ -4673,7 +4662,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_2x3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_2x2x3x4)
 {
     auto function = onnx_import::import_onnx_model(
-    file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float_2x2x3x4.prototxt"));
+    file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float_2x2x3x4.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_expected_output<float>(Shape{2, 2, 3, 4}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 2.f, 3.f,
@@ -4686,7 +4675,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_2x2x3x4)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_einsum_sum)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/einsum_sum.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/einsum_sum.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(Shape{3, 4},
                                {1.764052345967664,
@@ -4709,7 +4698,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_einsum_sum)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_float16_tensor_as_int32)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_fp16_W_as_int32.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_fp16_W_as_int32.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // clang-format off
diff --git a/ngraph/test/onnx/onnx_import_const_folding.in.cpp b/ngraph/test/onnx/onnx_import_const_folding.in.cpp
index 473c98b2834..e0788ca82eb 100644
--- a/ngraph/test/onnx/onnx_import_const_folding.in.cpp
+++ b/ngraph/test/onnx/onnx_import_const_folding.in.cpp
@@ -56,7 +56,7 @@ namespace
 NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_scatter_elements)
 {
     const auto fn = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/scatter_elements_opset11.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/scatter_elements_opset11.onnx"));
 
     test_constant_folding<float>(fn, {1.0, 1.1, 3.0, 2.1, 5.0}, Shape{1, 5});
 }
@@ -64,7 +64,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_scatter_elements)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_scalar)
 {
     const auto fn = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/non_zero_scalar.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/non_zero_scalar.onnx"));
 
     test_constant_folding<int64_t>(fn, {0}, Shape{1, 1});
 }
@@ -72,7 +72,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_scalar)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_1d)
 {
     const auto fn = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/non_zero_1d.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/non_zero_1d.onnx"));
 
     test_constant_folding<int64_t>(fn, {1, 2, 4}, Shape{1, 3});
 }
@@ -80,7 +80,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_1d)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_1d_float)
 {
     const auto fn = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/non_zero_1d_float.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/non_zero_1d_float.onnx"));
 
     test_constant_folding<int64_t>(fn, {0, 1, 3, 4, 5, 6, 7, 8, 9});
 }
@@ -88,7 +88,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_1d_float)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_3d)
 {
     const auto fn = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/non_zero_3d.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/non_zero_3d.onnx"));
 
     // Vertical slices are 3D indices of non-zero elements in the input tensor
     // {0, 0, 0, 1, 1, 2, 2}
@@ -101,7 +101,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_3d)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_2d_bool)
 {
     const auto fn = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/non_zero_2d_bool.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/non_zero_2d_bool.onnx"));
 
     test_constant_folding<int64_t>(fn, {0, 1, 1, 0});
 }
diff --git a/ngraph/test/onnx/onnx_import_controlflow.in.cpp b/ngraph/test/onnx/onnx_import_controlflow.in.cpp
index ffcf4bacd7d..1be72f83b53 100644
--- a/ngraph/test/onnx/onnx_import_controlflow.in.cpp
+++ b/ngraph/test/onnx/onnx_import_controlflow.in.cpp
@@ -32,7 +32,7 @@ using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_add)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_add.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_add.onnx"));
 
     // Shape inference tests
     const auto& parameters = function->get_parameters();
@@ -67,7 +67,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_add)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_no_identity_termination_cond)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_no_identity_termination_cond.prototxt"));
+        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_no_identity_termination_cond.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     // termination condition
@@ -84,7 +84,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_no_identity_termination_co
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_trip_count_max_int)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_add_trip_count_max_int.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_add_trip_count_max_int.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     // termination condition
@@ -102,7 +102,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_no_identity_termination_co
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/loop/loop_2d_add_no_identity_termination_cond_static_shapes.prototxt"));
+        "onnx/loop/loop_2d_add_no_identity_termination_cond_static_shapes.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // termination condition
@@ -119,7 +119,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_no_identity_termination_co
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_no_identity_termination_cond_false)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_no_identity_termination_cond_false.prototxt"));
+        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_no_identity_termination_cond_false.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // a_init
@@ -138,7 +138,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_no_identity_termination_co
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_const_no_identity_termination_cond)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_const_no_identity_termination_cond.prototxt"));
+        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_const_no_identity_termination_cond.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     // a_init
@@ -154,7 +154,7 @@ NGRAPH_TEST(${BACKEND_NAME},
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/loop/loop_2d_add_const_no_identity_termination_cond_static_shapes.prototxt"));
+        "onnx/loop/loop_2d_add_const_no_identity_termination_cond_static_shapes.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // a_init
@@ -173,7 +173,7 @@ NGRAPH_TEST(${BACKEND_NAME},
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_both_cond_and_trip_count_as_inputs)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_cond_and_trip_count_as_inputs.prototxt"));
+        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_cond_and_trip_count_as_inputs.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     // trip count
@@ -196,7 +196,7 @@ NGRAPH_TEST(${BACKEND_NAME},
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/loop/loop_2d_add_cond_and_trip_count_as_inputs_static_shapes.prototxt"));
+        "onnx/loop/loop_2d_add_cond_and_trip_count_as_inputs_static_shapes.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // trip count
@@ -217,7 +217,7 @@ NGRAPH_TEST(${BACKEND_NAME},
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_add_initializer_from_parent_scope)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_initializer_from_parent_scope.prototxt"));
+        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_initializer_from_parent_scope.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -232,7 +232,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_add_initializer_from_parent_s
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_add_node_from_parent_scope)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_node_from_parent_scope.prototxt"));
+        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_node_from_parent_scope.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // a_init
@@ -248,7 +248,7 @@ NGRAPH_TEST(${BACKEND_NAME},
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/loop/loop_add_node_from_parent_scope_used_in_parent_and_in_body.prototxt"));
+        "onnx/loop/loop_add_node_from_parent_scope_used_in_parent_and_in_body.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // a_init
@@ -267,7 +267,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_add_value_access_to_body_scop
     try
     {
         const auto function = onnx_import::import_onnx_model(file_util::path_join(
-            SERIALIZED_ZOO, "onnx/loop/loop_2d_add_incorrect_access_body_scope.prototxt"));
+            SERIALIZED_ZOO, "onnx/loop/loop_2d_add_incorrect_access_body_scope.onnx"));
         FAIL() << "Incorrect access to body scope not detected";
     }
     catch (const ngraph_error& e)
@@ -285,7 +285,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_add_value_access_to_body_scop
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_add_value_the_same_node_from_parent_and_subgraph)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_add_the_same_name.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_add_the_same_name.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // a_init
@@ -299,7 +299,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_add_value_the_same_node_from_
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_add_input_from_parent_graph)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_input_from_parent_graph.prototxt"));
+        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_input_from_parent_graph.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // a_init
@@ -315,7 +315,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_add_input_from_parent_graph)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_the_proper_opset_in_subgraph)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_mul_opset1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_mul_opset1.onnx"));
 
     const auto parent_ops = function->get_ops();
     const auto loop_node_it =
@@ -337,7 +337,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_the_proper_opset_in_subgraph)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_scalars)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_scalars_add.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_scalars_add.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // a_init
@@ -351,7 +351,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_scalars)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_add_const_cond)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_add_const_cond.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_add_const_cond.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // a_init
@@ -365,7 +365,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_add_const_cond)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_trip_count_dynamic)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_add_trip_count_dynamic.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_2d_add_trip_count_dynamic.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     // trip count
@@ -384,7 +384,7 @@ NGRAPH_TEST(${BACKEND_NAME},
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/loop/onnx_controlflow_loop_2d_infer_types.prototxt"));
+        "onnx/loop/onnx_controlflow_loop_2d_infer_types.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // trip count
@@ -406,7 +406,7 @@ NGRAPH_TEST(${BACKEND_NAME},
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/loop/loop_add_node_from_parent_scope_infer_types.prototxt"));
+        "onnx/loop/loop_add_node_from_parent_scope_infer_types.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // a_init
@@ -426,7 +426,7 @@ NGRAPH_TEST(${BACKEND_NAME},
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_concat_values)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_concat_values.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_concat_values.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // trip_count
@@ -451,7 +451,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_concat_values)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_trip_count_and_cond_skipped_shape_inference)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_trip_count_and_cond_skipped.prototxt"));
+        SERIALIZED_ZOO, "onnx/loop/loop_2d_add_trip_count_and_cond_skipped.onnx"));
 
     const auto& results = function->get_results();
     EXPECT_EQ(results.size(), 2);
@@ -468,7 +468,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_2d_trip_count_and_cond_skippe
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_infinite)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_infinite.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_infinite.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // trip_count
@@ -490,7 +490,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_infinite)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_no_variadic_inputs_and_outputs)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/loop/loop_no_variadic_inputs_and_outputs.prototxt"));
+        SERIALIZED_ZOO, "onnx/loop/loop_no_variadic_inputs_and_outputs.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     // trip_count
@@ -506,7 +506,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_no_variadic_inputs_and_output
 NGRAPH_TEST(${BACKEND_NAME}, onnx_controlflow_loop_power)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_pow.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/loop/loop_pow.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     // trip_count
diff --git a/ngraph/test/onnx/onnx_import_convpool.in.cpp b/ngraph/test/onnx/onnx_import_convpool.in.cpp
index 9c0057d4b50..a9e2f625665 100644
--- a/ngraph/test/onnx/onnx_import_convpool.in.cpp
+++ b/ngraph/test/onnx/onnx_import_convpool.in.cpp
@@ -37,7 +37,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv2d_strides_padding)
 {
     // Convolution with strides=2 and padding=1
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_with_strides_padding.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_with_strides_padding.onnx"));
 
     Inputs inputs;
     // data (1, 1, 7, 5) input tensor
@@ -72,7 +72,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv2d_strides_no_padding)
 {
     // Convolution with strides=2 and padding=1
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_with_strides_no_padding.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_with_strides_no_padding.onnx"));
 
     Inputs inputs;
     // data (1, 1, 7, 5) input tensor
@@ -104,7 +104,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv2d_strides_assymetric_padding)
 {
     // Convolution with strides=2 and padding=1
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/conv_with_strides_and_asymmetric_padding.prototxt"));
+        SERIALIZED_ZOO, "onnx/conv_with_strides_and_asymmetric_padding.onnx"));
 
     Inputs inputs;
     // data (1, 1, 7, 5) input tensor
@@ -136,7 +136,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv2d_strides_assymetric_padding)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv2d_dilation_assymetric_pads_strides)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/conv2d_dilation_assym_pads_strides.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv2d_dilation_assym_pads_strides.onnx"));
 
     //   "",                           // auto_pad
     //   vector<int64_t>{1, 1},        // dilations
@@ -178,7 +178,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv2d_dilation_assymetric_pads_strides)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv3d_bias)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/conv3d_bias.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv3d_bias.onnx"));
 
     // "",                                 // auto_pad
     // vector<int64_t>{2, 2, 2},           // dilations
@@ -294,7 +294,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv3d_bias)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv_transpose_w_groups)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_transpose_w_groups.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_transpose_w_groups.onnx"));
 
     Inputs inputs;
     inputs.emplace_back(std::vector<float>{
@@ -316,7 +316,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_average_pool_2d)
 {
     // Pooling with strides=2 and no padding
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/average_pool_2d.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/average_pool_2d.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs;
@@ -339,7 +339,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_average_pool_2d_pads)
 {
     // Pooling with strides=2 and padding=1
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/average_pool_2d_pads.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/average_pool_2d_pads.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs;
@@ -364,7 +364,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_max_pool_2d_pads)
 {
     // Pooling with strides=2 and padding=1
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/max_pool_2d_pads.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/max_pool_2d_pads.onnx"));
 
     // input data shape (1, 1, 4, 4)
     Inputs inputs;
@@ -388,7 +388,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_max_pool_2d_pads)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_global_lp_pool_p0)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/global_lp_pool_p0.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/global_lp_pool_p0.onnx"));
 
     std::vector<std::int64_t> input{1,  0, -4, 0, 2,  1, -6, 1,  0, 0, 0, 0,
                                     -7, 1, -1, 0, -1, 8, 0,  10, 9, 0, 0, 5};
@@ -404,7 +404,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_global_lp_pool_p0)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_global_lp_pool_p1)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/global_lp_pool_p1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/global_lp_pool_p1.onnx"));
 
     Inputs inputs{std::vector<float>(2 * 3 * 4)};
     std::iota(std::begin(inputs.front()), std::end(inputs.front()), 0.f);
@@ -420,7 +420,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_global_lp_pool_p1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_global_lp_pool_p2)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/global_lp_pool_p2.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/global_lp_pool_p2.onnx"));
 
     Inputs inputs{std::vector<float>(2 * 3 * 4)};
     std::iota(std::begin(inputs.front()), std::end(inputs.front()), 0.f);
@@ -436,7 +436,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_global_lp_pool_p2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_global_lp_pool_p3)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/global_lp_pool_p3.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/global_lp_pool_p3.onnx"));
 
     Inputs inputs{std::vector<float>(2 * 3 * 4)};
     std::iota(std::begin(inputs.front()), std::end(inputs.front()), 0.f);
@@ -452,7 +452,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_global_lp_pool_p3)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_convtranspose_output_shape)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/convtranspose_output_shape.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/convtranspose_output_shape.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -467,7 +467,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_convtranspose_output_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_convtranspose_output_shape_auto_pads_same_upper)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/convtranspose_output_shape_auto_pads_same_upper.prototxt"));
+        SERIALIZED_ZOO, "onnx/convtranspose_output_shape_auto_pads_same_upper.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -495,7 +495,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_convtranspose_output_shape_auto_pads_sam
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_convtranspose_output_shape_auto_pads_same_lower)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/convtranspose_output_shape_auto_pads_same_lower.prototxt"));
+        SERIALIZED_ZOO, "onnx/convtranspose_output_shape_auto_pads_same_lower.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -523,7 +523,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_convtranspose_output_shape_auto_pads_sam
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_convtranspose_groups_w_pads)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/convtranspose_groups_w_pads.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/convtranspose_groups_w_pads.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -576,7 +576,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_convtranspose_groups_w_pads)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_convtranspose_groups_pads_bias)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/convtranspose_groups_pads_bias.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/convtranspose_groups_pads_bias.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
diff --git a/ngraph/test/onnx/onnx_import_deprecated.in.cpp b/ngraph/test/onnx/onnx_import_deprecated.in.cpp
new file mode 100644
index 00000000000..cdee4523a60
--- /dev/null
+++ b/ngraph/test/onnx/onnx_import_deprecated.in.cpp
@@ -0,0 +1,49 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <fstream>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <sstream>
+#include <stdexcept>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "onnx_import/onnx.hpp"
+#include "util/all_close.hpp"
+#include "util/all_close_f.hpp"
+#include "util/engine/test_engines.hpp"
+#include "util/ndarray.hpp"
+#include "util/test_case.hpp"
+#include "util/test_control.hpp"
+#include "util/test_tools.hpp"
+
+using namespace ngraph;
+
+static std::string s_manifest = "${MANIFEST}";
+
+using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
+
+using Inputs = std::vector<std::vector<float>>;
+using Outputs = std::vector<std::vector<float>>;
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_affine)
+{
+    auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/affine.onnx"));
+
+    // input/output shape (1, 3)
+    auto input = test::NDArray<float, 2>{{{0.f, 1.f, 2.f}}}.get_vector();
+    auto expected_output = test::NDArray<float, 2>{{{50.f, 50.5f, 51.f}}}.get_vector();
+
+    auto test_case = test::TestCase<TestEngine>(function);
+    test_case.add_input(Shape{1, 3}, input);
+    test_case.add_expected_output(Shape{1, 3}, expected_output);
+    test_case.run();
+}
diff --git a/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp b/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp
index 1c679e6813b..cdbab9db4a2 100644
--- a/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp
+++ b/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp
@@ -38,7 +38,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_onnx_dynamic_dims_to_ngraph_dynamic
     // the model represents a linear function A * x + B
     // where all 3 operands are model inputs (no initializers)
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/ab_plus_c.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/ab_plus_c.onnx"));
 
     const auto& graph_inputs = function->get_parameters();
     EXPECT_EQ(graph_inputs.size(), 3);
@@ -73,7 +73,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_onnx_dynamic_dims_to_ngraph_dynamic
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_ab_plus_c_inference)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/ab_plus_c.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/ab_plus_c.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -115,7 +115,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_scalar_initializers_shape_check)
     // initializers defined witout the "dims" field should produce Constants with an empty Shape
     // initializers with "dims: 0" should be have the same way (Shape{} not Shape{0})
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/scalar_initializers.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/scalar_initializers.onnx"));
 
     for (auto ng_node : function->get_ordered_ops())
     {
@@ -130,7 +130,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_dynamic_rank_input_check)
 {
     // the model contains a single Add operation that takes a fully dynamic input and a scalar
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/a_plus_b_dyn_rank.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/a_plus_b_dyn_rank.onnx"));
 
     const auto& graph_inputs = function->get_parameters();
     ASSERT_EQ(graph_inputs.size(), 2);
@@ -154,7 +154,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_dynamic_rank_input_inference)
 {
     // the model contains a single Add operation that takes a fully dynamic input and a scalar
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/a_plus_b_dyn_rank.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/a_plus_b_dyn_rank.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -183,7 +183,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_dynamic_rank_input_inference)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_acosh_1_3)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/acosh_dyn_shape.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/acosh_dyn_shape.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{1, 3}, {1.0f, 2.5f, 4.3f});
@@ -195,7 +195,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_acosh_1_3)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_acosh_3_2)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/acosh_dyn_shape.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/acosh_dyn_shape.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{3, 2}, {1.0f, 2.5f, 4.3f, 1.0f, 2.5f, 4.3f});
@@ -208,7 +208,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_acosh_3_2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_asinh_1_3)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/asinh_dyn_shape.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/asinh_dyn_shape.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{1, 3}, {-1.5f, 0.0f, 1.5f});
@@ -220,7 +220,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_asinh_1_3)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_asinh_3_2)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/asinh_dyn_shape.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/asinh_dyn_shape.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{3, 2}, {-1.5f, 0.0f, 1.5f, -1.5f, 0.0f, 1.5f});
@@ -233,7 +233,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_asinh_3_2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_atanh_1_3)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/atanh_dyn_shape.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/atanh_dyn_shape.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{1, 3}, {-0.9f, 0.0f, 0.9f});
@@ -245,7 +245,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_atanh_1_3)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_atanh_3_2)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/atanh_dyn_shape.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/atanh_dyn_shape.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{3, 2}, {-0.9f, 0.0f, 0.9f, -0.9f, 0.0f, 0.9f});
@@ -258,7 +258,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_atanh_3_2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_conv_with_dynamic_batch)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/conv_with_dynamic_batch.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/conv_with_dynamic_batch.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -281,7 +281,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_conv_with_dynamic_batch)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_conv_with_dynamic_bias)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/conv_with_dynamic_bias.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/conv_with_dynamic_bias.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -304,7 +304,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_conv_with_dynamic_bias)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_avg_pool_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/average_pool_2d_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/average_pool_2d_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -324,7 +324,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_avg_pool_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_max_pool_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/max_pool_2d_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/max_pool_2d_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -344,7 +344,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_max_pool_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_max_pool_with_indices_output)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/max_pool_with_indices_output.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/max_pool_with_indices_output.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -369,7 +369,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_max_pool_with_indices_output)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_global_avg_pool_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/global_average_pool_dyn.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/global_average_pool_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -389,7 +389,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_global_avg_pool_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_global_max_pool_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/global_max_pool_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/global_max_pool_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -409,7 +409,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_global_max_pool_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_arg_max_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/argmax_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/argmax_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -429,7 +429,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_arg_max_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_arg_min_no_keep_dims_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/argmin_no_keep_dims_dyn.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/argmin_no_keep_dims_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -449,7 +449,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_arg_min_no_keep_dims_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_constant_of_shape_float_zeros)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/constant_of_shape_float_zeros.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/constant_of_shape_float_zeros.onnx"));
 
     std::vector<float> expected_values(24, 0);
 
@@ -464,7 +464,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_constant_of_shape_float_zeros)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_constant_of_shape_int_ones)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/constant_of_shape_int_ones.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/constant_of_shape_int_ones.onnx"));
 
     std::vector<int32_t> expected_values(6, 1);
 
@@ -479,7 +479,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_constant_of_shape_int_ones)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_1_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -497,7 +497,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_1_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_2_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -514,7 +514,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_2_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_3_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -530,7 +530,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_3_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_4_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -546,7 +546,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_4_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_5_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -563,7 +563,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_5_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_6_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -581,7 +581,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_6_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_uint16_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_uint16_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/expand_uint16_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -597,7 +597,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_expand_uint16_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_tile)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/tile.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/tile.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<std::int16_t>({0, 1, 2, 3, 4, 5}); // input
@@ -609,7 +609,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_tile)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_tile_static)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/tile_static.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/tile_static.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<std::int16_t>({0, 1, 2, 3, 4, 5}); // input
@@ -621,7 +621,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_tile_static)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_convtranspose_dyn_data)
 {
     auto ct_fn = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/convtranspose_dyn_data.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/convtranspose_dyn_data.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(ct_fn);
 
@@ -680,7 +680,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_convtranspose_dyn_data)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_convtranspose_dyn_filters)
 {
     auto ct_fn = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/convtranspose_dyn_filters.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/convtranspose_dyn_filters.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(ct_fn);
 
@@ -738,7 +738,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_convtranspose_dyn_filters)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_transpose)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/transpose.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/transpose.onnx"));
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
     Shape shape{2, 2, 4, 3};
@@ -775,7 +775,7 @@ namespace
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_axis_0)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/flatten_dyn_shape_axis0.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/flatten_dyn_shape_axis0.onnx"));
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
     const size_t RANKS_TO_TEST = 4;
@@ -802,7 +802,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_axis_0)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_axis)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/flatten_dyn_shape_axis.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/flatten_dyn_shape_axis.onnx"));
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
     const size_t RANKS_TO_TEST = 4;
@@ -829,7 +829,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_neg_axis)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/flatten_dyn_shape_neg_axis.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/flatten_dyn_shape_neg_axis.onnx"));
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
     const size_t RANKS_TO_TEST = 4;
@@ -856,7 +856,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_neg_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_flatten)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/flatten.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/flatten.onnx"));
 
     std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8};
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
@@ -869,7 +869,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_flatten)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_global_lp_dynamic_hw)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/global_lp_pool_dynamic_hw.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/global_lp_pool_dynamic_hw.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<int64_t>(Shape{1, 2, 3, 4}, {1,  0, -4, 0, 2,  1, -6, 1,  0, 0, 0, 0,
@@ -882,7 +882,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_global_lp_dynamic_hw)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_2d_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_2d_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_2d_input.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
@@ -896,7 +896,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_2d_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_default_steps)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_default_steps.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_default_steps.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>({1, 2, 3, 4, 5, 6, 7, 8});
@@ -909,7 +909,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_default_steps)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_slice_2d_default_steps_dyn_begin_end)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_2d_default_steps_dyn_begin_end.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_2d_default_steps_dyn_begin_end.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>({1, 2, 3, 4});
@@ -922,7 +922,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_slice_2d_default_steps_dyn
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_clamp_neg_ends)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_default_steps.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_default_steps.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
@@ -935,7 +935,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_clamp_neg_ends)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_3d_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_3d_input.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -953,7 +953,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_neg_axes)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_3d_input_neg_axes.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_3d_input_neg_axes.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -971,7 +971,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_neg_axes)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_12_axes)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_3d_input_12_axes.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_3d_input_12_axes.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -988,7 +988,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_12_axes)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_20_axes)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_3d_input_20_axes.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_3d_input_20_axes.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -1006,7 +1006,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_20_axes)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_23_axes)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_4d_input_23_axes.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_4d_input_23_axes.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -1023,7 +1023,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_23_axes)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_0231_axes_ends_max)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_4d_input_0231_axes_ends_max.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_4d_input_0231_axes_ends_max.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -1043,7 +1043,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_0231_axes_ends_ma
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_2103_axes_ends_max)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_4d_input_2103_axes.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_4d_input_2103_axes.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -1064,7 +1064,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_2103_axes_ends_ma
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_23_axes_21_steps)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_4d_input_23_axes_21_steps.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_4d_input_23_axes_21_steps.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -1082,7 +1082,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_23_axes_21_steps)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_default_axes)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_default_axes.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_default_axes.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -1098,7 +1098,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_default_axes)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_10_the_same_output_same)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_2d_the_same_out_shape.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_2d_the_same_out_shape.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(std::vector<float>{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f});
@@ -1110,7 +1110,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_10_the_same_output_same)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_hardmax)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/hardmax.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/hardmax.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(
@@ -1146,7 +1146,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_hardmax)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_softmax_axis_2)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_2.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_2.onnx"));
 
     const std::vector<float> input = {
         2.75793882,  -0.50841322, 0.82013929,  -0.62409912, -0.96136118, 0.21004745,  1.38337255,
@@ -1184,7 +1184,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_softmax_axis_2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_range_positive_step)
 {
     const auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/range.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/range.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -1199,7 +1199,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_range_positive_step)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_range_negative_step)
 {
     const auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/range.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/range.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -1214,7 +1214,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_range_negative_step)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization_dyn_shape)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/instance_norm_dyn_shape.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/instance_norm_dyn_shape.onnx"));
 
     Shape data_shape{1, 2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
@@ -1236,7 +1236,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization_dyn_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization_dyn_shape2)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/instance_norm_dyn_shape2.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/instance_norm_dyn_shape2.onnx"));
 
     Shape data_shape{1, 2, 3, 4};
     std::vector<float> data(shape_size(data_shape));
@@ -1258,7 +1258,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization_dyn_shape2)
 // NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample9_scales_input_nearest_infer)
 // {
 //     const auto function = onnx_import::import_onnx_model(
-//         file_util::path_join(SERIALIZED_ZOO, "onnx/upsample9_scales_input_nearest.prototxt"));
+//         file_util::path_join(SERIALIZED_ZOO, "onnx/upsample9_scales_input_nearest.onnx"));
 //
 //     // Input data shape (1, 1, 2, 2)
 //     // mode: nearest
@@ -1276,7 +1276,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization_dyn_shape2)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_1_2d_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_2d_input_opset1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_2d_input_opset1.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
@@ -1287,7 +1287,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_1_2d_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_1_clamp_neg_ends)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_2d_clamp_neg_ends_opset1.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_2d_clamp_neg_ends_opset1.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
@@ -1298,7 +1298,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_1_clamp_neg_ends)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_1_3d_input_21_axes_ends_max)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_3d_input_21_axes_ends_max_opset1.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/slice_3d_input_21_axes_ends_max_opset1.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -1315,7 +1315,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_reduce_max_dynamic_input_rank_negat
     // the ReduceMax node has a fully dynamic input and the reduction axis is -1
     auto function = onnx_import::import_onnx_model(file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/dynamic_shapes/reduce_max_dynamic_input_rank_negative_axis.prototxt"));
+        "onnx/dynamic_shapes/reduce_max_dynamic_input_rank_negative_axis.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{2, 4}, std::vector<float>{1, 2, 3, 4, 5, 6, 7, 8});
@@ -1326,7 +1326,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_reduce_max_dynamic_input_rank_negat
 NGRAPH_TEST(${BACKEND_NAME}, onnx_size_dyn_op)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/size_op_dyn.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/size_op_dyn.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{2, 3}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
@@ -1337,7 +1337,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_dyn_op)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_max_pool_dyn_rank_without_default_attrs)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/max_pool_dyn_rank_without_default_attrs.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/max_pool_dyn_rank_without_default_attrs.onnx"));
 
     auto test_case = test::TestCase<TestEngine, TestCaseType::DYNAMIC>(function);
 
@@ -1352,7 +1352,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_max_pool_dyn_rank_without_default_attrs)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_dynamic_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/depth_to_space.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/depth_to_space.onnx"));
 
     std::vector<float> input(32);
     std::iota(input.begin(), input.end(), 0);
@@ -1370,7 +1370,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_dynamic_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_space_to_depth_dynamic_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/space_to_depth.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/space_to_depth.onnx"));
 
     std::vector<float> input(32);
     std::iota(input.begin(), input.end(), 0);
diff --git a/ngraph/test/onnx/onnx_import_exceptions.cpp b/ngraph/test/onnx/onnx_import_exceptions.cpp
index 80f530362c0..295f9f75fa7 100644
--- a/ngraph/test/onnx/onnx_import_exceptions.cpp
+++ b/ngraph/test/onnx/onnx_import_exceptions.cpp
@@ -16,7 +16,7 @@ using namespace ngraph;
 TEST(onnx_importer, exception_throws_ngraph_error)
 {
     EXPECT_THROW(onnx_import::import_onnx_model(file_util::path_join(
-                     SERIALIZED_ZOO, "onnx/depth_to_space_bad_blocksize.prototxt")),
+                     SERIALIZED_ZOO, "onnx/depth_to_space_bad_blocksize.onnx")),
                  ngraph_error);
 }
 
@@ -25,7 +25,7 @@ TEST(onnx_importer, exception_msg_ngraph_error)
     try
     {
         onnx_import::import_onnx_model(
-            file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space_bad_blocksize.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space_bad_blocksize.onnx"));
         // Should have thrown, so fail if it didn't
         FAIL() << "ONNX Importer did not detected incorrect model!";
     }
@@ -46,7 +46,7 @@ TEST(onnx_importer, exception_msg_onnx_node_validation_failure)
     try
     {
         onnx_import::import_onnx_model(
-            file_util::path_join(SERIALIZED_ZOO, "onnx/instance_norm_bad_scale_type.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/instance_norm_bad_scale_type.onnx"));
         // Should have thrown, so fail if it didn't
         FAIL() << "ONNX Importer did not detected incorrect model!";
     }
@@ -75,7 +75,7 @@ TEST(onnx_importer, exception_msg_std_err_wrapped)
     try
     {
         onnx_import::import_onnx_model(file_util::path_join(
-            SERIALIZED_ZOO, "onnx/dynamic_shapes/eye_link_dyn_shape.prototxt"));
+            SERIALIZED_ZOO, "onnx/dynamic_shapes/eye_link_dyn_shape.onnx"));
         // Should have thrown, so fail if it didn't
         FAIL() << "ONNX Importer did not detected incorrect model!";
     }
diff --git a/ngraph/test/onnx/onnx_import_external_data.in.cpp b/ngraph/test/onnx/onnx_import_external_data.in.cpp
index 10638eaa200..583451e9ca1 100644
--- a/ngraph/test/onnx/onnx_import_external_data.in.cpp
+++ b/ngraph/test/onnx/onnx_import_external_data.in.cpp
@@ -24,7 +24,7 @@ using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
 NGRAPH_TEST(${BACKEND_NAME}, onnx_external_data)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/external_data/external_data.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/external_data/external_data.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f});
@@ -36,7 +36,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_external_data)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_external_data_from_stream)
 {
     std::string path =
-        file_util::path_join(SERIALIZED_ZOO, "onnx/external_data/external_data.prototxt");
+        file_util::path_join(SERIALIZED_ZOO, "onnx/external_data/external_data.onnx");
     std::ifstream stream{path, std::ios::in | std::ios::binary};
     ASSERT_TRUE(stream.is_open());
     const auto function = onnx_import::import_onnx_model(stream, path);
@@ -50,10 +50,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_external_data_from_stream)
     stream.close();
 }
 
-NGRAPH_TEST(${BACKEND_NAME}, onnx_external_data_optinal_fields)
+NGRAPH_TEST(${BACKEND_NAME}, onnx_external_data_optional_fields)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/external_data/external_data_optional_fields.prototxt"));
+        SERIALIZED_ZOO, "onnx/external_data/external_data_optional_fields.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f});
@@ -65,7 +65,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_external_data_optinal_fields)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_external_data_in_different_paths)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/external_data/external_data_different_paths.prototxt"));
+        SERIALIZED_ZOO, "onnx/external_data/external_data_different_paths.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // first input: {3.f}, second: {1.f, 2.f, 5.f} read from external files
@@ -79,7 +79,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_external_two_tensors_data_in_the_same_file)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/external_data/external_data_two_tensors_data_in_the_same_file.prototxt"));
+        "onnx/external_data/external_data_two_tensors_data_in_the_same_file.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // first input: {3, 2, 1}, second: {1, 2, 3} read from external file
@@ -94,7 +94,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_external_invalid_external_data_exception)
     try
     {
         auto function = onnx_import::import_onnx_model(file_util::path_join(
-            SERIALIZED_ZOO, "onnx/external_data/external_data_file_not_found.prototxt"));
+            SERIALIZED_ZOO, "onnx/external_data/external_data_file_not_found.onnx"));
         FAIL() << "Incorrect path to external data not detected";
     }
     catch (const ngraph_error& error)
@@ -116,7 +116,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_external_invalid_up_dir_path)
     {
         auto function = onnx_import::import_onnx_model(file_util::path_join(
             SERIALIZED_ZOO,
-            "onnx/external_data/inner_scope/external_data_file_in_up_dir.prototxt"));
+            "onnx/external_data/inner_scope/external_data_file_in_up_dir.onnx"));
         FAIL() << "Incorrect path to external data not detected";
     }
     catch (const ngraph_error& error)
@@ -135,7 +135,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_external_invalid_up_dir_path)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_external_data_sanitize_path)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/external_data/external_data_sanitize_test.prototxt"));
+        SERIALIZED_ZOO, "onnx/external_data/external_data_sanitize_test.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({1.f, 2.f, 3.f, 4.f});
diff --git a/ngraph/test/onnx/onnx_import_org_openvino.in.cpp b/ngraph/test/onnx/onnx_import_org_openvino.in.cpp
index 3407ad8e956..46fec97913e 100644
--- a/ngraph/test/onnx/onnx_import_org_openvino.in.cpp
+++ b/ngraph/test/onnx/onnx_import_org_openvino.in.cpp
@@ -53,7 +53,7 @@ using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
 NGRAPH_TEST(${BACKEND_NAME}, onnx_prior_box)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/prior_box.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/prior_box.onnx"));
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     std::vector<float> A(3 * 2 * 2);
     std::vector<float> B(3 * 6 * 6);
@@ -78,7 +78,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_prior_box)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/priorbox_clustered.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/priorbox_clustered.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     std::vector<float> A{15.0};
@@ -96,7 +96,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered_most_attrs_default)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/priorbox_clustered_most_attrs_default.prototxt"));
+        SERIALIZED_ZOO, "onnx/priorbox_clustered_most_attrs_default.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     std::vector<float> A(1 * 1 * 2 * 1);
@@ -130,7 +130,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered_first_input_bad_shape)
     try
     {
         auto function = onnx_import::import_onnx_model(file_util::path_join(
-            SERIALIZED_ZOO, "onnx/priorbox_clustered_first_input_bad_shape.prototxt"));
+            SERIALIZED_ZOO, "onnx/priorbox_clustered_first_input_bad_shape.onnx"));
         FAIL() << "Expected exception was not thrown";
     }
     catch (const ngraph::ngraph_error& e)
@@ -150,7 +150,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered_second_input_bad_shape)
     try
     {
         auto function = onnx_import::import_onnx_model(file_util::path_join(
-            SERIALIZED_ZOO, "onnx/priorbox_clustered_second_input_bad_shape.prototxt"));
+            SERIALIZED_ZOO, "onnx/priorbox_clustered_second_input_bad_shape.onnx"));
         FAIL() << "Expected exception was not thrown";
     }
     catch (const ngraph::ngraph_error& e)
@@ -168,7 +168,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered_second_input_bad_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_detection_output)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/detection_output.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/detection_output.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     auto gen_vector = [](size_t size, float min, float max) -> std::vector<float> {
@@ -199,7 +199,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_detection_output)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_group_norm)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/group_norm.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/group_norm.onnx"));
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     Shape shape{2, 8, 2, 2};
     int size = shape_size(shape);
@@ -224,7 +224,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_group_norm)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_group_norm_5d)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/group_norm_5d.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/group_norm_5d.onnx"));
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     Shape shape{2, 8, 1, 2, 1};
     int size = shape_size(shape);
@@ -247,7 +247,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_group_norm_5d)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_normalize)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/normalize.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/normalize.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
     std::vector<float> data(12);
     std::iota(data.begin(), data.end(), 1);
@@ -273,7 +273,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_normalize)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_swish_with_beta)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/swish_with_beta.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/swish_with_beta.onnx"));
 
     const Shape expected_output_shape{3};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -287,7 +287,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_swish_with_beta)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_swish_without_beta)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/swish_without_beta.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/swish_without_beta.onnx"));
 
     const Shape expected_output_shape{3};
     auto test_case = test::TestCase<TestEngine>(function);
@@ -302,7 +302,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_detection_output)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/org.openvinotoolkit/experimental_detectron/detection_output.prototxt"));
+        "onnx/org.openvinotoolkit/experimental_detectron/detection_output.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // rois
@@ -366,7 +366,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_detection_output_
     auto function = onnx_import::import_onnx_model(
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/org.openvinotoolkit/experimental_detectron/"
-                             "detection_output_most_attrs_default.prototxt"));
+                             "detection_output_most_attrs_default.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // rois
@@ -413,7 +413,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_generate_proposal
     auto function = onnx_import::import_onnx_model(
         file_util::path_join(SERIALIZED_ZOO,
                              "onnx/org.openvinotoolkit/experimental_detectron/"
-                             "generate_proposals_single_image.prototxt"));
+                             "generate_proposals_single_image.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // im_info
@@ -468,7 +468,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_generate_proposal
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_group_norm)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/org.openvinotoolkit/experimental_detectron/group_norm.prototxt"));
+        SERIALIZED_ZOO, "onnx/org.openvinotoolkit/experimental_detectron/group_norm.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     Shape shape{2, 8, 2, 2};
@@ -495,7 +495,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_prior_grid_genera
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/org.openvinotoolkit/experimental_detectron/prior_grid_generator.prototxt"));
+        "onnx/org.openvinotoolkit/experimental_detectron/prior_grid_generator.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
 
@@ -522,7 +522,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_roi_feature_extra
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
         SERIALIZED_ZOO,
-        "onnx/org.openvinotoolkit/experimental_detectron/roi_feature_extractor.prototxt"));
+        "onnx/org.openvinotoolkit/experimental_detectron/roi_feature_extractor.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
 
@@ -580,7 +580,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_roi_feature_extra
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_topk_rios)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/org.openvinotoolkit/experimental_detectron/topk_rios.prototxt"));
+        SERIALIZED_ZOO, "onnx/org.openvinotoolkit/experimental_detectron/topk_rios.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
 
@@ -594,7 +594,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_topk_rios)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_deformable_conv_2d)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/org.openvinotoolkit/deformable_conv_2d.prototxt"));
+        SERIALIZED_ZOO, "onnx/org.openvinotoolkit/deformable_conv_2d.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
diff --git a/ngraph/test/onnx/onnx_import_provenance.in.cpp b/ngraph/test/onnx/onnx_import_provenance.in.cpp
index 4934a4343e3..5e9d85ab40c 100644
--- a/ngraph/test/onnx/onnx_import_provenance.in.cpp
+++ b/ngraph/test/onnx/onnx_import_provenance.in.cpp
@@ -20,7 +20,7 @@ static std::string s_manifest = "${MANIFEST}";
 NGRAPH_TEST(${BACKEND_NAME}, onnx_provenance_tag_text)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_tag_add.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_tag_add.onnx"));
 
     const auto ng_nodes = function->get_ordered_ops();
     for (auto ng_node : ng_nodes)
@@ -60,7 +60,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_provenance_only_output)
     // the Add node in the model does not have a name,
     // only its output name should be found in the provenance tags
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_only_outputs.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_only_outputs.onnx"));
     test_provenance_tags<default_opset::Add>(function, "<ONNX Add (-> output_of_add)>");
 }
 
@@ -69,7 +69,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_provenance_node_name_and_outputs)
     test::ProvenanceEnabler provenance_enabler;
 
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_node_name_and_outputs.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_node_name_and_outputs.onnx"));
     test_provenance_tags<default_opset::Add>(function, "<ONNX Add (Add_node -> output_of_add)>");
 }
 
@@ -78,7 +78,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_provenance_multiple_outputs_op)
     test::ProvenanceEnabler provenance_enabler;
 
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_multiple_outputs_op.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_multiple_outputs_op.onnx"));
     test_provenance_tags<default_opset::TopK>(function, "<ONNX TopK (TOPK -> values, indices)>");
 }
 
@@ -87,7 +87,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_provenance_tagging_constants)
     test::ProvenanceEnabler provenance_enabler;
 
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_input_tags.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_input_tags.onnx"));
     test_provenance_tags<default_opset::Constant>(function,
                                                   "<ONNX Input (initializer_of_A) Shape:{1}>");
 }
@@ -97,6 +97,6 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_provenance_tagging_parameters)
     test::ProvenanceEnabler provenance_enabler;
 
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_input_tags.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/provenance_input_tags.onnx"));
     test_provenance_tags<default_opset::Parameter>(function, "<ONNX Input (input_B) Shape:{}>");
 }
diff --git a/ngraph/test/onnx/onnx_import_quant.in.cpp b/ngraph/test/onnx/onnx_import_quant.in.cpp
index 5881541564e..d44f9fde38b 100644
--- a/ngraph/test/onnx/onnx_import_quant.in.cpp
+++ b/ngraph/test/onnx/onnx_import_quant.in.cpp
@@ -36,7 +36,7 @@ using Outputs = std::vector<std::vector<float>>;
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quantize_linear_const_scale_const_zero_p)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/quantize_linear_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quantize_linear_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<float>{32.25f, 48.34f, 50.f, 83.f});
@@ -48,7 +48,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quantize_linear_const_scale_const_zero_p
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quantize_linear)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/quantize_linear.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quantize_linear.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<float>{32.25f, 48.34f, 50.f, 83.f});
@@ -61,7 +61,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quantize_linear)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quantize_linear_zero_point)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/quantize_linear_zero_point.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quantize_linear_zero_point.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<float>{0.f, 2.f, 3.f, 1000.f, -254.f, -1000.f}); // x
@@ -76,7 +76,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quantize_linear_zero_point)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quantize_linear_axis_zero)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/quantize_linear_axis_zero.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quantize_linear_axis_zero.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<float>{
@@ -95,7 +95,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quantize_linear_axis_zero)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quantize_linear_axis_negative)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/quantize_linear_axis_negative.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quantize_linear_axis_negative.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<float>{
@@ -114,7 +114,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quantize_linear_axis_negative)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dequant_lin.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequant_lin.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<std::uint8_t>{19, 210, 21, 10});
@@ -126,7 +126,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_scale_and_zero_point)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_scalar_scale_and_zero_point.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_scalar_scale_and_zero_point.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<std::uint8_t>{19, 210, 21, 10});    // x
@@ -140,7 +140,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_scale_and_zero_
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_scale)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_scalar_scale.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_scalar_scale.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<std::uint8_t>{19, 210, 21, 10});    // x
@@ -154,7 +154,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_scale)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_inputs)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_scalar_inputs.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_scalar_inputs.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<std::uint8_t>{19});              // x
@@ -168,7 +168,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_inputs)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_zero_point)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_scalar_zero_point.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_scalar_zero_point.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<std::uint8_t>{19, 210, 21, 10});    // x
@@ -183,7 +183,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_zero_point)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_zero_scale_uint8)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_0.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_0.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input(std::vector<uint8_t>{0, 3, 128, 255}); // x
@@ -197,7 +197,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_zero_scale_uint
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_zero_scale_int8)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_1.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -212,7 +212,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_scalar_zero_scale_int8
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_uint8)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_2.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_2.onnx"));
 
     auto test_case = ngraph::test::TestCase<TestEngine>(function);
 
@@ -230,7 +230,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_uint8)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_int8)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_3.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_3.onnx"));
 
     auto test_case = ngraph::test::TestCase<TestEngine>(function);
 
@@ -248,7 +248,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_int8)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_int8_4d)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_4.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_4.onnx"));
 
     auto test_case = ngraph::test::TestCase<TestEngine>(function);
 
@@ -272,7 +272,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_int8_4d)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_uint8_negative_axis)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_5.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dequantize_linear_5.onnx"));
 
     auto test_case = ngraph::test::TestCase<TestEngine>(function);
 
@@ -290,7 +290,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_uint8_ne
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/quant_conv_lin.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quant_conv_lin.onnx"));
 
     std::vector<std::vector<std::uint8_t>> inputs;
     inputs.emplace_back(std::vector<std::uint8_t>{
@@ -313,7 +313,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_2d)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinear_conv_2d.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinear_conv_2d.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -334,7 +334,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_2d)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_3d)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinear_conv_3d.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinear_conv_3d.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -355,7 +355,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear_3d)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_qlinear_matmul_3d)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinear_matmul_3d.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/qlinear_matmul_3d.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -380,7 +380,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_qlinear_matmul_3d)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv_integer)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input(std::vector<uint8_t>{2, 3, 4, 5, 6, 7, 8, 9, 10}); // x
@@ -394,7 +394,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv_integer)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv_integer_zero_point_zero)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input(std::vector<uint8_t>{1, 2, 3, 4, 5, 6, 7, 8, 9}); // x
@@ -408,7 +408,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv_integer_zero_point_zero)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv_integer_no_zero_point)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer_no_zero_point.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer_no_zero_point.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input(std::vector<uint8_t>{1, 2, 3, 4, 5, 6, 7, 8, 9}); // x
@@ -421,7 +421,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv_integer_no_zero_point)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv_integer_pads)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer_pads.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/conv_integer_pads.onnx"));
     auto test_case = test::TestCase<TestEngine>(function);
 
     test_case.add_input(std::vector<uint8_t>{2, 3, 4, 5, 6, 7, 8, 9, 10}); // x
@@ -437,7 +437,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_conv_integer_pads)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fake_quantize_import_only)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/quantization/fake_quantize_const_inputs.prototxt"));
+        SERIALIZED_ZOO, "onnx/quantization/fake_quantize_const_inputs.onnx"));
 
     const Shape expected_output_shape{1, 2, 3, 4};
     EXPECT_EQ(function->get_output_size(), 1);
@@ -449,7 +449,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fake_quantize_import_only)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fake_quantize_const_inputs_infer)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/quantization/fake_quantize_const_inputs.prototxt"));
+        SERIALIZED_ZOO, "onnx/quantization/fake_quantize_const_inputs.onnx"));
 
     const Shape data_shape{1, 2, 3, 4};
     const auto n_elements = shape_size(data_shape);
@@ -468,7 +468,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fake_quantize_const_inputs_infer)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fake_quantize_nonconst_inputs_infer)
 {
     const auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/quantization/fake_quantize_nonconst_inputs.prototxt"));
+        SERIALIZED_ZOO, "onnx/quantization/fake_quantize_nonconst_inputs.onnx"));
 
     const Shape data_shape{1, 2, 3, 4};
     const size_t n_elements = shape_size(data_shape);
diff --git a/ngraph/test/onnx/onnx_import_reshape.in.cpp b/ngraph/test/onnx/onnx_import_reshape.in.cpp
index 1dad7b986d3..87072f3572c 100644
--- a/ngraph/test/onnx/onnx_import_reshape.in.cpp
+++ b/ngraph/test/onnx/onnx_import_reshape.in.cpp
@@ -35,7 +35,7 @@ using Outputs = std::vector<std::vector<float>>;
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_reduced_dims)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_reduced_dims.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_reduced_dims.onnx"));
 
     // input data shape (2, 3, 4)
     auto input = test::NDArray<float, 3>({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}},
@@ -57,7 +57,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_reduced_dims)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_reordered_dims)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_reordered_dims.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_reordered_dims.onnx"));
 
     // input data shape (2, 3, 4)
     auto input = test::NDArray<float, 3>({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}},
@@ -80,7 +80,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_reordered_dims)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_extended_dims)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_extended_dims.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_extended_dims.onnx"));
 
     // input data shape (2, 3, 4)
     auto input = test::NDArray<float, 3>({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}},
@@ -102,7 +102,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_extended_dims)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_single_dim)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_single_dim.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_single_dim.onnx"));
 
     // input data shape (2, 3, 4)
     auto input = test::NDArray<float, 3>({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}},
@@ -124,7 +124,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_negative_dim)
 {
     // the model contains the target shape in the initializers: [2, -1, 2]
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_negative_dim.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_negative_dim.onnx"));
 
     // 2x3x4
     auto input = test::NDArray<float, 3>({{{0.5488135, 0.71518934, 0.60276335, 0.5448832},
@@ -161,7 +161,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_negative_dim)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_negative_with_zero_dim)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_negative_with_zero_dims.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_negative_with_zero_dims.onnx"));
 
     // input data shape (2, 3, 4)
     auto input = test::NDArray<float, 3>({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}},
@@ -183,7 +183,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_negative_with_zero_dim)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_output_shape_as_input)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_output_shape_as_input.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/reshape_output_shape_as_input.onnx"));
 
     // input data shape (2, 3, 4)
     auto input = test::NDArray<float, 3>({{{0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}},
@@ -205,7 +205,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_output_shape_as_input)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space.onnx"));
 
     std::vector<float> input(32);
     std::iota(input.begin(), input.end(), 0);
@@ -223,7 +223,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_v1)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space_v1.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space_v1.onnx"));
 
     std::vector<float> input(32);
     std::iota(input.begin(), input.end(), 0);
@@ -241,7 +241,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_v1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_crd)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space_crd.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space_crd.onnx"));
 
     std::vector<float> input(32);
     std::iota(input.begin(), input.end(), 0);
@@ -262,7 +262,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_bad_blocksize)
     // This model fails to import since the depth channel length must be a multiple of the
     // `blocksize` attribute value.
     EXPECT_THROW(onnx_import::import_onnx_model(file_util::path_join(
-                     SERIALIZED_ZOO, "onnx/depth_to_space_bad_blocksize.prototxt")),
+                     SERIALIZED_ZOO, "onnx/depth_to_space_bad_blocksize.onnx")),
                  std::runtime_error);
 }
 
@@ -270,7 +270,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_no_blocksize)
 {
     // This model fails to import since it lacks of required parameter `blocksize`.
     EXPECT_THROW(onnx_import::import_onnx_model(file_util::path_join(
-                     SERIALIZED_ZOO, "onnx/depth_to_space_no_blocksize.prototxt")),
+                     SERIALIZED_ZOO, "onnx/depth_to_space_no_blocksize.onnx")),
                  std::runtime_error);
 }
 
@@ -279,7 +279,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_bad_mode)
     try
     {
         onnx_import::import_onnx_model(
-            file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space_bad_mode.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space_bad_mode.onnx"));
         FAIL() << "The onnx_importer did not throw for unknown mode to DepthToSpace op";
     }
     catch (const ngraph::ngraph_error& e)
@@ -298,7 +298,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_bad_input_shape)
     try
     {
         onnx_import::import_onnx_model(
-            file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space_bad_input_shape.prototxt"));
+            file_util::path_join(SERIALIZED_ZOO, "onnx/depth_to_space_bad_input_shape.onnx"));
         FAIL() << "The onnx_importer did not throw for invalid input shape to DepthToSpace op";
     }
     catch (const ngraph::ngraph_error& e)
@@ -315,7 +315,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_bad_input_shape)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_space_to_depth)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/space_to_depth.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/space_to_depth.onnx"));
 
     std::vector<float> input(32);
     std::iota(input.begin(), input.end(), 0);
@@ -336,7 +336,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_space_to_depth_invalid_input_shape)
     try
     {
         onnx_import::import_onnx_model(file_util::path_join(
-            SERIALIZED_ZOO, "onnx/space_to_depth_invalid_input_shape.prototxt"));
+            SERIALIZED_ZOO, "onnx/space_to_depth_invalid_input_shape.onnx"));
         FAIL() << "Expected ngraph_error exception, but no exception was thrown";
     }
     catch (const ngraph::ngraph_error& e)
@@ -357,7 +357,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_space_to_depth_bad_blocksize)
     // This model fails to import since the depth channel length must be a multiple of the
     // `blocksize` attribute value.
     EXPECT_THROW(onnx_import::import_onnx_model(file_util::path_join(
-                     SERIALIZED_ZOO, "onnx/space_to_depth_bad_blocksize.prototxt")),
+                     SERIALIZED_ZOO, "onnx/space_to_depth_bad_blocksize.onnx")),
                  std::runtime_error);
 }
 
@@ -365,14 +365,14 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_space_to_depth_no_blocksize)
 {
     // This model fails to import since it lacks of required `blocksize` attribute.
     EXPECT_THROW(onnx_import::import_onnx_model(file_util::path_join(
-                     SERIALIZED_ZOO, "onnx/space_to_depth_no_blocksize.prototxt")),
+                     SERIALIZED_ZOO, "onnx/space_to_depth_no_blocksize.onnx")),
                  std::runtime_error);
 }
 
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_squeeze)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/squeeze.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/squeeze.onnx"));
 
     // {1, 4, 1, 1, 2}
     auto input = test::NDArray<float, 5>(
@@ -393,7 +393,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_squeeze)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_squeeze_opset13_no_axes)
 {
     const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/squeeze_opset13_no_axes.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/squeeze_opset13_no_axes.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     const std::vector<float> data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
@@ -405,7 +405,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_squeeze_opset13_no_axes)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_unsqueeze)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/unsqueeze.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/unsqueeze.onnx"));
 
     auto input = test::NDArray<float, 3>(
                      {{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}},
@@ -429,7 +429,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_unsqueeze)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_unsqueeze_negative_axes)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/unsqueeze_negative_axes.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/unsqueeze_negative_axes.onnx"));
 
     auto input = test::NDArray<float, 4>(
                      {{{{-1.8427763f, -1.0467733f, 0.50550157f, 1.4897262f, 0.33057404f}},
@@ -453,7 +453,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_unsqueeze_negative_axes)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_concat)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/concat.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/concat.onnx"));
 
     Inputs inputs;
 
@@ -471,7 +471,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_concat)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_concat_negative_axis)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/concat_negative_axis.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/concat_negative_axis.onnx"));
 
     Inputs inputs;
 
@@ -489,7 +489,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_concat_negative_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_split_equal_parts_default)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/split_equal_parts_default.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/split_equal_parts_default.onnx"));
 
     Inputs inputs{{1, 2, 3, 4, 5, 6}};
     Outputs expected_outputs{{1, 2}, {3, 4}, {5, 6}};
@@ -508,7 +508,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_split_equal_parts_2d)
 {
     // Split into 2 equal parts along axis=1
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/split_equal_parts_2d.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/split_equal_parts_2d.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
@@ -521,7 +521,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_split_variable_parts_2d)
 {
     // Split into variable parts {2, 4} along axis=1
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/split_variable_parts_2d.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/split_variable_parts_2d.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
@@ -533,7 +533,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_split_variable_parts_2d)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_expand_static_shape)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/expand_static_shape.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/expand_static_shape.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     // input data shape (3,1)
diff --git a/ngraph/test/onnx/onnx_import_rnn.in.cpp b/ngraph/test/onnx/onnx_import_rnn.in.cpp
index a985cc95fc3..6416283dba8 100644
--- a/ngraph/test/onnx/onnx_import_rnn.in.cpp
+++ b/ngraph/test/onnx/onnx_import_rnn.in.cpp
@@ -34,7 +34,7 @@ using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_default_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_default_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_default_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X
@@ -50,7 +50,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_default_const)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_reverse_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_reverse_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_reverse_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X
@@ -66,7 +66,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_reverse_const)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_bidir_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_bidir_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X
@@ -91,7 +91,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_const)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_clip_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_clip_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X
@@ -107,7 +107,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_const)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_mixed_seq_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_mixed_seq_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_mixed_seq_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X
@@ -138,7 +138,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_mixed_seq_const)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_reverse_mixed_seq_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_reverse_mixed_seq_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_reverse_mixed_seq_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X
@@ -169,7 +169,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_reverse_mixed_seq_const)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_mixed_seq_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_bidir_mixed_seq_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_bidir_mixed_seq_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({0.68172926,
@@ -223,7 +223,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_mixed_seq_const)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_peepholes)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_with_clip_peepholes.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_with_clip_peepholes.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>({-0.455351, -0.276391, -0.185934, -0.269585}); // X
@@ -291,7 +291,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_peepholes)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_mixed_seq)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_mixed_seq.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_mixed_seq.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     int hidden_size{3};
@@ -330,7 +330,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_mixed_seq)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_hardsigmoid_activation)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_hardsigmoid_activation.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_hardsigmoid_activation.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -386,7 +386,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_hardsigmoid_activation)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_large_batch_no_clip)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_large_batch_no_clip.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_fwd_large_batch_no_clip.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -431,7 +431,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_large_batch_no_clip)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bdir_short_input_seq_peepholes)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_bdir_short_input_seq.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_bdir_short_input_seq.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -491,7 +491,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bdir_short_input_seq_peepholes)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_mixed_seq_reverse)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_mixed_seq_reverse.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_mixed_seq_reverse.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -536,7 +536,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_mixed_seq_reverse)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_import_only_lstm_dynamic_batch_seq_all_inputs)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/lstm_dyn_batch_seq.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/dynamic_shapes/lstm_dyn_batch_seq.onnx"));
 
     auto batch_size = Dimension::dynamic();
     auto seq_length = Dimension::dynamic();
@@ -557,7 +557,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_import_only_lstm_dynamic_batch_seq_all_i
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_import_only_lstm_dynamic_batch_seq_3_inputs)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/lstm_dyn_batch_seq_3_inputs.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/lstm_dyn_batch_seq_3_inputs.onnx"));
 
     auto batch_size = Dimension::dynamic();
     auto seq_length = Dimension::dynamic();
@@ -578,7 +578,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_import_only_lstm_dynamic_batch_seq_3_inp
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_dynamic_batch_size_and_seq_len)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_dynamic_batch_size_and_seq_len.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/lstm_dynamic_batch_size_and_seq_len.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>({1, 2, 3, 4, 5, 6});
@@ -707,7 +707,7 @@ protected:
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_defaults_fwd_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_defaults_fwd_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_defaults_fwd_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -752,7 +752,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_defaults_fwd_const)
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_defaults_fwd)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_defaults_fwd.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_defaults_fwd.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -801,7 +801,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_activations_con
 {
     // activations: relu, sigmoid
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/gru_fwd_activations_relu_sigmoid_const.prototxt"));
+        SERIALIZED_ZOO, "onnx/gru_fwd_activations_relu_sigmoid_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -843,7 +843,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_activations_rel
 {
     // activations: relu, hardsigmoid
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_activations_relu_hardsigmoid.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_activations_relu_hardsigmoid.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -891,7 +891,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_activations_rel
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_mixed_seq_len)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_mixed_seq_len.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_mixed_seq_len.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -942,7 +942,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_mixed_seq_len)
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_mixed_seq_len_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_mixed_seq_len_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_mixed_seq_len_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -985,7 +985,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_mixed_seq_len_c
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_reverse_mixed_seq_len_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_reverse_mixed_seq_len_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_reverse_mixed_seq_len_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -1027,7 +1027,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_reverse_mixed_seq_l
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_bidir_mixed_seq_len_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_bidir_mixed_seq_len_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_bidir_mixed_seq_len_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -1072,7 +1072,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_bidir_mixed_seq_len
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_rev_clip)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_rev_clip.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_rev_clip.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -1120,7 +1120,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_rev_clip)
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_rev_clip_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_rev_clip_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_rev_clip_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -1165,7 +1165,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_rev_clip_const)
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_reverse_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_reverse_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_reverse_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -1210,7 +1210,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_reverse_const)
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_reverse)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_reverse.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_reverse.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -1258,7 +1258,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_reverse)
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_bias_initial_h_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_bias_initial_h_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_bias_initial_h_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -1303,7 +1303,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_bias_initial_h_
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_bias_initial_h)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_bias_initial_h.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_bias_initial_h.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -1353,7 +1353,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_bias_initial_h)
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_bidirectional_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_bidirectional_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_bidirectional_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -1399,7 +1399,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_bidirectional_const
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_bidirectional)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_bidirectional.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_bidirectional.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -1448,7 +1448,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_bidirectional)
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_linear_before_reset_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_linear_before_reset_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_linear_before_reset_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -1493,7 +1493,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_linear_before_r
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_linear_before_reset)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_linear_before_reset.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/gru_fwd_linear_before_reset.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -1542,7 +1542,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_linear_before_r
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_defaults_fwd_const_dynamic)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/gru_defaults_fwd_const_dynamic.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/gru_defaults_fwd_const_dynamic.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{4, 3, 2}, in_X);
@@ -1587,7 +1587,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_defaults_fwd_const_
 NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_import_only_gru_defaults_fwd_const_dynamic)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/gru_defaults_fwd_const_dynamic.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/gru_defaults_fwd_const_dynamic.onnx"));
 
     auto batch_size = Dimension::dynamic();
     auto seq_length = Dimension::dynamic();
@@ -1694,7 +1694,7 @@ protected:
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_defaults_fwd_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_defaults_fwd_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_defaults_fwd_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -1739,7 +1739,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_defaults_fwd_const)
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_defaults_fwd)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_defaults_fwd.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_defaults_fwd.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -1787,7 +1787,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_defaults_fwd)
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_activations_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_activations_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_activations_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -1833,7 +1833,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_activations_con
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_activations)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_activations.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_activations.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -1881,7 +1881,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_activations)
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_mixed_seq_len_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_mixed_seq_len_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_mixed_seq_len_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -1926,7 +1926,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_mixed_seq_len_c
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_mixed_seq_len)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_mixed_seq_len.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_mixed_seq_len.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -1977,7 +1977,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_mixed_seq_len)
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_reverse_mixed_seq_len_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_reverse_mixed_seq_len_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_reverse_mixed_seq_len_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -2018,7 +2018,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_reverse_mixed_seq_l
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_bidir_mixed_seq_len_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_bidir_mixed_seq_len_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_bidir_mixed_seq_len_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -2065,7 +2065,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_bidir_mixed_seq_len
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_rev_clip_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_rev_clip_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_rev_clip_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -2110,7 +2110,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_rev_clip_const)
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_rev_clip)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_rev_clip.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_rev_clip.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -2158,7 +2158,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_rev_clip)
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_reverse_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_reverse_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_reverse_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -2203,7 +2203,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_reverse_const)
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_reverse)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_reverse.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_reverse.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -2251,7 +2251,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_reverse)
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_bias_initial_h_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_bias_initial_h_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_bias_initial_h_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
     test_case.add_input<float>(in_X);
@@ -2296,7 +2296,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_bias_initial_h_
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_bias_initial_h)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_bias_initial_h.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_fwd_bias_initial_h.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -2346,7 +2346,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_fwd_bias_initial_h)
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_bidirectional)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_bidirectional.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_bidirectional.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -2395,7 +2395,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_bidirectional)
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_bidirectional_const)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_bidirectional_const.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/rnn_bidirectional_const.onnx"));
 
     auto test_case = test::TestCase<TestEngine>(function);
 
@@ -2442,7 +2442,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_bidirectional_const
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_defaults_fwd_const_dynamic)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/rnn_defaults_fwd_const_dynamic.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/rnn_defaults_fwd_const_dynamic.onnx"));
 
     auto test_case = test::TestCase<TestEngine, test::TestCaseType::DYNAMIC>(function);
     test_case.add_input<float>(Shape{4, 3, 2}, in_X);
@@ -2487,7 +2487,7 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_defaults_fwd_const_
 NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_import_only_rnn_defaults_fwd_const_dynamic)
 {
     auto function = onnx_import::import_onnx_model(file_util::path_join(
-        SERIALIZED_ZOO, "onnx/dynamic_shapes/rnn_defaults_fwd_const_dynamic.prototxt"));
+        SERIALIZED_ZOO, "onnx/dynamic_shapes/rnn_defaults_fwd_const_dynamic.onnx"));
 
     auto batch_size = Dimension::dynamic();
     auto seq_length = Dimension::dynamic();
diff --git a/ngraph/test/onnx/onnx_import_with_editor.in.cpp b/ngraph/test/onnx/onnx_import_with_editor.in.cpp
index 99ceef8011d..1650aa2fc8a 100644
--- a/ngraph/test/onnx/onnx_import_with_editor.in.cpp
+++ b/ngraph/test/onnx/onnx_import_with_editor.in.cpp
@@ -28,7 +28,7 @@ using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME});
 NGRAPH_TEST(${BACKEND_NAME}, onnx_compress_axis_0)
 {
     onnx_editor::ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_0.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_0.onnx")};
 
     std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
 
@@ -46,7 +46,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_compress_axis_0)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_compress_axis_1)
 {
     onnx_editor::ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_1.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_1.onnx")};
 
     std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
 
@@ -64,7 +64,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_compress_axis_1)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_compress_default_axis)
 {
     onnx_editor::ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_default_axis.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_default_axis.onnx")};
 
     std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
 
@@ -82,7 +82,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_compress_default_axis)
 NGRAPH_TEST(${BACKEND_NAME}, onnx_compress_negative_axis)
 {
     onnx_editor::ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_negative_axis.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/compress_negative_axis.onnx")};
 
     std::map<std::string, std::shared_ptr<ngraph::op::Constant>> in_vals;
 
diff --git a/ngraph/test/onnx/onnx_tensor_names.cpp b/ngraph/test/onnx/onnx_tensor_names.cpp
index e9a69187e8a..317b826736c 100644
--- a/ngraph/test/onnx/onnx_tensor_names.cpp
+++ b/ngraph/test/onnx/onnx_tensor_names.cpp
@@ -21,7 +21,7 @@ using Outputs = std::vector<std::vector<float>>;
 NGRAPH_TEST(onnx_tensor_names, simple_model)
 {
     auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/tensor_names.prototxt"));
+        file_util::path_join(SERIALIZED_ZOO, "onnx/tensor_names.onnx"));
 
     auto ops = function->get_ordered_ops();
     ASSERT_EQ(ops[0]->get_friendly_name(), "input");
@@ -43,7 +43,7 @@ NGRAPH_TEST(onnx_tensor_names, simple_model)
 NGRAPH_TEST(onnx_tensor_names, node_multiple_outputs)
 {
     auto function =
-        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/top_k.prototxt"));
+        onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/top_k.onnx"));
 
     auto ops = function->get_ordered_ops();
 
diff --git a/ngraph/test/onnx/onnx_test_utils.in.cpp b/ngraph/test/onnx/onnx_test_utils.in.cpp
index d9fe105e98d..5e70e3e13c2 100644
--- a/ngraph/test/onnx/onnx_test_utils.in.cpp
+++ b/ngraph/test/onnx/onnx_test_utils.in.cpp
@@ -37,7 +37,7 @@ TYPED_TEST_P(ElemTypesTests, onnx_test_add_abc_set_precission)
     const element::Type ng_type = element::from<DataType>();
 
     onnx_editor::ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/add_abc_3d.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/add_abc_3d.onnx")};
 
     editor.set_input_types({{"A", ng_type}, {"B", ng_type}, {"C", ng_type}});
 
@@ -56,7 +56,7 @@ TYPED_TEST_P(ElemTypesTests, onnx_test_split_multioutput_set_precission)
     const element::Type ng_type = element::from<DataType>();
 
     onnx_editor::ONNXModelEditor editor{
-        file_util::path_join(SERIALIZED_ZOO, "onnx/split_equal_parts_default.prototxt")};
+        file_util::path_join(SERIALIZED_ZOO, "onnx/split_equal_parts_default.onnx")};
 
     editor.set_input_types({{"input", ng_type}});
 
diff --git a/ngraph/test/requirements_test_onnx.txt b/ngraph/test/requirements_test_onnx.txt
new file mode 100644
index 00000000000..4e7803c987b
--- /dev/null
+++ b/ngraph/test/requirements_test_onnx.txt
@@ -0,0 +1,4 @@
+# ONNX - generate test models
+docopt~=0.6.2
+onnx~=1.9.0
+protobuf>=3.9
\ No newline at end of file
diff --git a/ngraph/test/runtime/dynamic/dynamic_backend.cpp b/ngraph/test/runtime/dynamic/dynamic_backend.cpp
index 0ef1f656837..a0adc243fa6 100644
--- a/ngraph/test/runtime/dynamic/dynamic_backend.cpp
+++ b/ngraph/test/runtime/dynamic/dynamic_backend.cpp
@@ -230,8 +230,10 @@ bool runtime::dynamic::DynamicExecutable::call(
                 i++;
             }
 
+            NGRAPH_SUPPRESS_DEPRECATED_START;
             clone = specialize_function(
                 m_wrapped_function, arg_element_types, arg_shapes, arg_value_base_pointers);
+            NGRAPH_SUPPRESS_DEPRECATED_END;
         }
 
         pass::Manager passes;
diff --git a/ngraph/test/runtime/ie/ie_tensor.hpp b/ngraph/test/runtime/ie/ie_tensor.hpp
index 02a79e1a489..acf2f4f44df 100644
--- a/ngraph/test/runtime/ie/ie_tensor.hpp
+++ b/ngraph/test/runtime/ie/ie_tensor.hpp
@@ -17,13 +17,11 @@ namespace ngraph
     {
         namespace ie
         {
-            class IETensor : public ngraph::runtime::Tensor
+            class IE_BACKEND_API IETensor : public ngraph::runtime::Tensor
             {
             public:
-                IE_BACKEND_API IETensor(const ngraph::element::Type& element_type,
-                                        const Shape& shape);
-                IE_BACKEND_API IETensor(const ngraph::element::Type& element_type,
-                                        const PartialShape& shape);
+                IETensor(const ngraph::element::Type& element_type, const Shape& shape);
+                IETensor(const ngraph::element::Type& element_type, const PartialShape& shape);
 
                 ///
                 /// \brief      Write bytes directly into the tensor
@@ -50,6 +48,6 @@ namespace ngraph
                 IETensor& operator=(const IETensor&) = delete;
                 AlignedBuffer m_data;
             };
-        }
-    }
-}
+        } // namespace ie
+    }     // namespace runtime
+} // namespace ngraph
diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest
index 3984346f97b..6c420ed96b6 100644
--- a/ngraph/test/runtime/ie/unit_test.manifest
+++ b/ngraph/test/runtime/ie/unit_test.manifest
@@ -426,7 +426,6 @@ notequal
 greater
 greatereq
 less
-lesseq
 sum_3d_to_scalar_int32
 sum_2d_to_scalar_int8
 max_pool_uint8
@@ -473,6 +472,9 @@ abc_int64
 # [NOT_IMPLEMENTED] Output format I64 is not supported yet...
 onnx_constant_integer_scalar
 onnx_constant_integer_array
+adaptive_max_pool_1d
+adaptive_max_pool_2d
+adaptive_max_pool_3d
 
 # Unsupported primitive of type: SigmoidBackprop
 sigmoid_bprop_n1c1h4
@@ -486,7 +488,6 @@ logical_xor
 logical_or
 logical_and
 gather_axis_0_bool
-lesseq_bool
 auto_bcast_binary_elementwise
 auto_bcast_binary_elementwise_pdpd
 any_2x2_to_scalar_true
@@ -755,7 +756,6 @@ strided_slice_stride_optional
 divide_int32
 divide_cpp_rounding_int32
 divide_python_rounding_int32
-lesseq_int32
 
 # Constant and Low Precision
 constant_equality_u4_2x2x3
@@ -903,12 +903,6 @@ non_zero_all_0s
 IE_CPU.normalize_l2_4D_axes_123_big_eps_max
 IE_CPU.normalize_l2_4D_axes_123_big_eps_add
 
-# NomalizeL2 - unsorted axes are not supported,
-# message: "Doesn't support reduction axes: (3.1.2)"
-# Issue: 59794
-IE_CPU.normalize_l2_4D_axes_unsorted_312_max
-IE_CPU.normalize_l2_4D_axes_unsorted_312_add
-
 # NormalizeL2 - Plugins support normalize over "channel" dimension
 # or "channel + all spatial" dimensions for 2D, 3D or 4D cases
 # Issue: 35627, 59791
@@ -1006,14 +1000,6 @@ IE_CPU.backwards_log
 # Unsupported op detected
 IE_CPU.backwards_batchmatmultranspose_tensor2_tensor2
 IE_CPU.round_int64
-IE_CPU.convert_like_float32_int32
-IE_CPU.convert_like_int32_float32
-IE_CPU.convert_like_uint16_float32
-IE_CPU.convert_like_int32_bool
-IE_CPU.convert_like_float32_bool
-IE_CPU.convert_like_float32_bfloat16
-IE_CPU.convert_like_bfloat16_float32
-IE_CPU.convert_like_dyn_float16_to_int64
 
 # Operations were removed from opset
 IE_CPU.atanh
@@ -1608,10 +1594,3 @@ IE_GPU.deformable_convolution_opset8_2D_neg_offsets_groups_and_deforgroups_mask
 IE_GPU.deformable_convolution_2D_integral_offsets_groups_and_deforgroups
 IE_GPU.deformable_convolution_opset8_2D_integral_offsets_groups_and_deforgroups
 
-# No plugin support for AdaptiveAvgPool and AdaptiveMaxPool
-adaptive_avg_pool_1d
-adaptive_avg_pool_2d
-adaptive_avg_pool_3d
-adaptive_max_pool_1d
-adaptive_max_pool_2d
-adaptive_max_pool_3d
diff --git a/ngraph/test/runtime/interpreter/evaluates_map.cpp b/ngraph/test/runtime/interpreter/evaluates_map.cpp
index 80341055763..42572cc3f4b 100644
--- a/ngraph/test/runtime/interpreter/evaluates_map.cpp
+++ b/ngraph/test/runtime/interpreter/evaluates_map.cpp
@@ -1969,13 +1969,25 @@ namespace
                              const HostTensorVector& outputs,
                              const HostTensorVector& inputs)
         {
-            using TI = typename element_type_traits<ti>::value_type;
-            using TO = typename element_type_traits<to>::value_type;
-            runtime::reference::convert<TI, TO>(inputs[0]->get_data_ptr<TI>(),
-                                                outputs[0]->get_data_ptr<TO>(),
-                                                shape_size(inputs[0]->get_shape()));
-        }
+            outputs[0]->set_shape(inputs[0]->get_shape());
+            size_t element_count = shape_size(outputs[0]->get_shape());
 
+            if (((ti == element::u1) || (to == element::u1)) ||
+            ((ti == element::u4) || (to == element::u4)) ||
+            ((ti == element::i4) || (to == element::i4)))
+            {
+                runtime::reference::detail::lp_convert(inputs[0]->get_data_ptr<ti>(),
+                                                       outputs[0]->get_data_ptr<to>(),
+                                                       element_count,
+                                                       ti,
+                                                       to);
+            }
+            else
+            {
+                runtime::reference::convert(
+                    inputs[0]->get_data_ptr<ti>(), outputs[0]->get_data_ptr<to>(), element_count);
+            }
+        }
     } // namespace convert_like_v1
 
     template <element::Type_t OUT_ET>
@@ -1988,6 +2000,12 @@ namespace
         case element::Type_t::boolean:
             convert_like_v1::evaluate<element::Type_t::boolean, OUT_ET>(op, outputs, inputs);
             break;
+        case element::Type_t::u1:
+            convert_like_v1::evaluate<element::Type_t::u1, OUT_ET>(op, outputs, inputs);
+            break;
+        case element::Type_t::u4:
+            convert_like_v1::evaluate<element::Type_t::u4, OUT_ET>(op, outputs, inputs);
+            break;
         case element::Type_t::u8:
             convert_like_v1::evaluate<element::Type_t::u8, OUT_ET>(op, outputs, inputs);
             break;
@@ -2000,6 +2018,9 @@ namespace
         case element::Type_t::u64:
             convert_like_v1::evaluate<element::Type_t::u64, OUT_ET>(op, outputs, inputs);
             break;
+        case element::Type_t::i4:
+            convert_like_v1::evaluate<element::Type_t::i4, OUT_ET>(op, outputs, inputs);
+            break;
         case element::Type_t::i8:
             convert_like_v1::evaluate<element::Type_t::i8, OUT_ET>(op, outputs, inputs);
             break;
@@ -2878,11 +2899,19 @@ namespace
                   const HostTensorVector& inputs)
     {
         using T = typename element_type_traits<ET>::value_type;
-        runtime::reference::adaptive_max_pool(inputs[0]->get_data_ptr<T>(),
-                                              outputs[0]->get_data_ptr<T>(),
-                                              outputs[1]->get_data_ptr<int64_t>(),
-                                              inputs[0]->get_shape(),
-                                              op->get_output_shape(0));
+        if (op->get_index_element_type() == element::i32) {
+            runtime::reference::adaptive_max_pool(inputs[0]->get_data_ptr<T>(),
+                                                  outputs[0]->get_data_ptr<T>(),
+                                                  outputs[1]->get_data_ptr<int32_t>(),
+                                                  inputs[0]->get_shape(),
+                                                  op->get_output_shape(0));
+        } else if (op->get_index_element_type() == element::i64) {
+            runtime::reference::adaptive_max_pool(inputs[0]->get_data_ptr<T>(),
+                                                  outputs[0]->get_data_ptr<T>(),
+                                                  outputs[1]->get_data_ptr<int64_t>(),
+                                                  inputs[0]->get_shape(),
+                                                  op->get_output_shape(0));
+        }
         return true;
     }
 
@@ -2924,6 +2953,8 @@ namespace
             return evaluate<element::Type_t::f64>(as_type_ptr<T>(node), outputs, inputs);
         case element::Type_t::f32:
             return evaluate<element::Type_t::f32>(as_type_ptr<T>(node), outputs, inputs);
+        case element::Type_t::i4:
+            return evaluate<element::Type_t::i4>(as_type_ptr<T>(node), outputs, inputs);
         case element::Type_t::i8:
             return evaluate<element::Type_t::i8>(as_type_ptr<T>(node), outputs, inputs);
         case element::Type_t::i16:
@@ -2932,6 +2963,10 @@ namespace
             return evaluate<element::Type_t::i32>(as_type_ptr<T>(node), outputs, inputs);
         case element::Type_t::i64:
             return evaluate<element::Type_t::i64>(as_type_ptr<T>(node), outputs, inputs);
+        case element::Type_t::u1:
+            return evaluate<element::Type_t::u1>(as_type_ptr<T>(node), outputs, inputs);
+        case element::Type_t::u4:
+            return evaluate<element::Type_t::u4>(as_type_ptr<T>(node), outputs, inputs);
         case element::Type_t::u8:
             return evaluate<element::Type_t::u8>(as_type_ptr<T>(node), outputs, inputs);
         case element::Type_t::u16:
diff --git a/ngraph/test/specialize_function.cpp b/ngraph/test/specialize_function.cpp
index c023cc3cd74..e26f15f2512 100644
--- a/ngraph/test/specialize_function.cpp
+++ b/ngraph/test/specialize_function.cpp
@@ -8,6 +8,7 @@
 #include "ngraph/specialize_function.hpp"
 
 using namespace ngraph;
+NGRAPH_SUPPRESS_DEPRECATED_START;
 
 // Simple case: create a function with static parameter shapes and "specialize" them to the same
 // shapes.
diff --git a/ngraph/test/type_prop/erf.cpp b/ngraph/test/type_prop/erf.cpp
new file mode 100644
index 00000000000..fb6966276d5
--- /dev/null
+++ b/ngraph/test/type_prop/erf.cpp
@@ -0,0 +1,9 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "unary_ops.hpp"
+
+using Type = ::testing::Types<ngraph::op::Erf>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(type_prop_erf, UnaryOperator, Type);
diff --git a/ngraph/test/type_prop/logical_and.cpp b/ngraph/test/type_prop/logical_and.cpp
new file mode 100644
index 00000000000..2a8699cfbe8
--- /dev/null
+++ b/ngraph/test/type_prop/logical_and.cpp
@@ -0,0 +1,72 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/type_prop.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace {
+    void incorrect_init(const ngraph::element::Type& type, const std::string& err, const Shape& shape1 = {1, 3, 6}, const Shape& shape2 = {1, 3, 6}) {
+        auto input1 = make_shared<op::Parameter>(type, shape1);
+        auto input2 = make_shared<op::Parameter>(type, shape2);
+        try
+        {
+            auto logical_and = make_shared<op::v1::LogicalAnd>(input1, input2);
+        }
+        catch (const NodeValidationFailure& error)
+        {
+            EXPECT_HAS_SUBSTRING(error.what(), err);
+        }
+    }
+}
+
+TEST(type_prop, logical_and_incorrect_type_f32)
+{
+    incorrect_init(element::f32, "Operands for logical operators must have boolean element type but have element type f32");
+}
+
+TEST(type_prop, logical_and_incorrect_type_f64)
+{
+    incorrect_init(element::f64, "Operands for logical operators must have boolean element type but have element type f64");
+}
+
+TEST(type_prop, logical_and_incorrect_type_i32)
+{
+    incorrect_init(element::i32, "Operands for logical operators must have boolean element type but have element type i32");
+}
+
+TEST(type_prop, logical_and_incorrect_type_i64)
+{
+    incorrect_init(element::i64, "Operands for logical operators must have boolean element type but have element type i64");
+}
+
+TEST(type_prop, logical_and_incorrect_type_u32)
+{
+    incorrect_init(element::u32, "Operands for logical operators must have boolean element type but have element type u32");
+}
+
+TEST(type_prop, logical_and_incorrect_type_u64)
+{
+    incorrect_init(element::u64, "Operands for logical operators must have boolean element type but have element type u64");
+
+}
+
+TEST(type_prop, logical_and_incorrect_shape)
+{
+    incorrect_init(element::boolean, "Argument shapes are inconsistent", Shape {1, 3, 6}, Shape {1, 2, 3});
+}
+
+TEST(type_prop, logical_and_broadcast)
+{
+    auto input1 = make_shared<op::Parameter>(element::boolean, Shape{1, 1, 6});
+    auto input2 = make_shared<op::Parameter>(element::boolean, Shape{1, 3, 1});
+
+    auto logical_and = make_shared<op::v1::LogicalAnd>(input1, input2);
+
+    ASSERT_EQ(logical_and->get_element_type(), element::boolean);
+    ASSERT_EQ(logical_and->get_shape(), (Shape{1, 3, 6}));
+}
diff --git a/ngraph/test/type_prop/max_pool.cpp b/ngraph/test/type_prop/max_pool.cpp
index 3c6391a5213..ac64f912d3f 100644
--- a/ngraph/test/type_prop/max_pool.cpp
+++ b/ngraph/test/type_prop/max_pool.cpp
@@ -157,3 +157,93 @@ TEST(type_prop, max_pool_default_values)
     ASSERT_EQ(mp->get_rounding_type(), op::RoundingType::FLOOR);
     ASSERT_EQ(mp->get_auto_pad(), op::PadType::EXPLICIT);
 }
+
+TEST(type_prop, max_pool_v8_3D_no_dilations)
+{
+    const PartialShape arg_shape{1, 7, 13};
+    const Strides strides{1};
+    const Strides dilations{1};
+    const Shape pads_begin{0};
+    const Shape pads_end{0};
+    const Shape kernel_shape{3};
+
+    const auto arg = make_shared<op::Parameter>(element::f32, arg_shape);
+    const auto mp =
+        make_shared<op::v8::MaxPool>(arg, strides, dilations, pads_begin, pads_end, kernel_shape);
+
+    const auto expected_output_shape = PartialShape({1, 7, 11});
+    ASSERT_TRUE(mp->get_output_partial_shape(0).same_scheme(expected_output_shape));
+    ASSERT_TRUE(mp->get_output_partial_shape(1).same_scheme(expected_output_shape));
+}
+
+TEST(type_prop, max_pool_v8_3D_with_dilations)
+{
+    const PartialShape arg_shape{1, 7, 13};
+    const Strides strides{1};
+    const Strides dilations{2};
+    const Shape pads_begin{0};
+    const Shape pads_end{0};
+    const Shape kernel_shape{3};
+
+    const auto arg = make_shared<op::Parameter>(element::f32, arg_shape);
+    const auto mp =
+        make_shared<op::v8::MaxPool>(arg, strides, dilations, pads_begin, pads_end, kernel_shape);
+
+    const auto expected_output_shape = PartialShape({1, 7, 9});
+    ASSERT_TRUE(mp->get_output_partial_shape(0).same_scheme(expected_output_shape));
+    ASSERT_TRUE(mp->get_output_partial_shape(1).same_scheme(expected_output_shape));
+}
+
+TEST(type_prop, max_pool_v8_3D_with_dilations_and_padding)
+{
+    const PartialShape arg_shape{1, 7, 13};
+    const Strides strides{1};
+    const Strides dilations{2};
+    const Shape pads_begin{1};
+    const Shape pads_end{2};
+    const Shape kernel_shape{3};
+
+    const auto arg = make_shared<op::Parameter>(element::f32, arg_shape);
+    const auto mp =
+        make_shared<op::v8::MaxPool>(arg, strides, dilations, pads_begin, pads_end, kernel_shape);
+
+    const auto expected_output_shape = PartialShape({1, 7, 12});
+    ASSERT_TRUE(mp->get_output_partial_shape(0).same_scheme(expected_output_shape));
+    ASSERT_TRUE(mp->get_output_partial_shape(1).same_scheme(expected_output_shape));
+}
+
+TEST(type_prop, max_pool_v8_4D_no_dilations)
+{
+    const PartialShape arg_shape{1, 3, 13, 13};
+    const Strides strides{1, 1};
+    const Strides dilations{1, 1};
+    const Shape pads_begin{0, 0};
+    const Shape pads_end{0, 0};
+    const Shape kernel_shape{2, 2};
+
+    const auto arg = make_shared<op::Parameter>(element::f32, arg_shape);
+    const auto mp =
+        make_shared<op::v8::MaxPool>(arg, strides, dilations, pads_begin, pads_end, kernel_shape);
+
+    const auto expected_output_shape = PartialShape({1, 3, 12, 12});
+    ASSERT_TRUE(mp->get_output_partial_shape(0).same_scheme(expected_output_shape));
+    ASSERT_TRUE(mp->get_output_partial_shape(1).same_scheme(expected_output_shape));
+}
+
+TEST(type_prop, max_pool_v8_4D_with_dilations)
+{
+    const PartialShape arg_shape{1, 3, 13, 13};
+    const Strides strides{1, 1};
+    const Strides dilations{2, 3};
+    const Shape pads_begin{0, 0};
+    const Shape pads_end{0, 0};
+    const Shape kernel_shape{2, 2};
+
+    const auto arg = make_shared<op::Parameter>(element::f32, arg_shape);
+    const auto mp =
+        make_shared<op::v8::MaxPool>(arg, strides, dilations, pads_begin, pads_end, kernel_shape);
+
+    const auto expected_output_shape = PartialShape({1, 3, 11, 10});
+    ASSERT_TRUE(mp->get_output_partial_shape(0).same_scheme(expected_output_shape));
+    ASSERT_TRUE(mp->get_output_partial_shape(1).same_scheme(expected_output_shape));
+}
diff --git a/ngraph/test/type_prop/random_uniform.cpp b/ngraph/test/type_prop/random_uniform.cpp
new file mode 100644
index 00000000000..1b82e137e0d
--- /dev/null
+++ b/ngraph/test/type_prop/random_uniform.cpp
@@ -0,0 +1,254 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "ngraph/opsets/opset8.hpp"
+#include "util/type_prop.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+TEST(type_prop, random_uniform_type_shape)
+{
+    auto out_shape = opset8::Constant::create(element::i64, Shape{4}, {2, 3, 4, 5});
+    auto min_val = make_shared<opset8::Constant>(element::f32, Shape{}, 0.f);
+    auto max_val = make_shared<opset8::Constant>(element::f32, Shape{}, 1.f);
+
+    auto r =
+        make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::f32, 120, 100);
+
+    EXPECT_EQ(r->get_output_element_type(0), element::f32);
+    EXPECT_TRUE(r->get_output_partial_shape(0).same_scheme(PartialShape{2, 3, 4, 5}));
+}
+
+TEST(type_prop, random_uniform_dynamic_shape)
+{
+    auto out_shape =
+        make_shared<opset8::Parameter>(element::i32, PartialShape{Dimension::dynamic()});
+    auto min_val = make_shared<opset8::Constant>(element::i64, Shape{}, 5);
+    auto max_val = make_shared<opset8::Constant>(element::i64, Shape{}, 10);
+
+    auto r =
+        make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::i64, 100, 200);
+
+    EXPECT_EQ(r->get_output_element_type(0), element::i64);
+    EXPECT_TRUE(r->get_output_partial_shape(0).same_scheme(PartialShape::dynamic()));
+}
+
+TEST(type_prop, random_uniform_dynamic_rank)
+{
+    auto out_shape = make_shared<opset8::Parameter>(element::i32, PartialShape::dynamic());
+    auto min_val = make_shared<opset8::Constant>(element::f64, Shape{}, 5);
+    auto max_val = make_shared<opset8::Constant>(element::f64, Shape{}, 10);
+
+    auto r =
+        make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::f64, 100, 200);
+
+    EXPECT_EQ(r->get_output_element_type(0), element::f64);
+    EXPECT_TRUE(r->get_output_partial_shape(0).same_scheme(PartialShape::dynamic()));
+}
+
+TEST(type_prop, random_uniform_invalid_out_shape_type)
+{
+    auto out_shape = opset8::Constant::create(element::f64, Shape{4}, {2, 3, 4, 5});
+    auto min_val = make_shared<opset8::Constant>(element::f32, Shape{}, 0.f);
+    auto max_val = make_shared<opset8::Constant>(element::f32, Shape{}, 1.f);
+
+    try
+    {
+        auto r =
+            make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::f32, 120, 100);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Unexpected pass with invalid output shape.";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(),
+                             std::string("Type of the input should be int32 or int64."));
+    }
+    catch (...)
+    {
+        FAIL() << "Check failed for unexpected reason.";
+    }
+}
+
+TEST(type_prop, random_uniform_invalid_out_shape_rank)
+{
+    auto out_shape = make_shared<opset8::Parameter>(element::i32, Shape{3, 2});
+    auto min_val = make_shared<opset8::Constant>(element::f32, Shape{}, 0.f);
+    auto max_val = make_shared<opset8::Constant>(element::f32, Shape{}, 1.f);
+    try
+    {
+        auto r =
+            make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::f32, 120, 100);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Unexpected pass with invalid output shape.";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(
+            error.what(),
+            std::string("The rank of the tensor defining output shape must be equal to 1."));
+    }
+    catch (...)
+    {
+        FAIL() << "Check failed for unexpected reason.";
+    }
+}
+
+TEST(type_prop, random_uniform_invalid_min_val)
+{
+    auto out_shape = opset8::Constant::create(element::i32, Shape{4}, {2, 3, 4, 5});
+    auto min_val = opset8::Constant::create(element::f32, Shape{2}, {2, 3});
+    auto max_val = make_shared<opset8::Constant>(element::f32, Shape{}, 1.f);
+
+    try
+    {
+        auto r =
+            make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::f32, 120, 100);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Unexpected pass with invalid min value.";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("'min_val' should have 1 element."));
+    }
+    catch (...)
+    {
+        FAIL() << "Check failed for unexpected reason.";
+    }
+}
+
+TEST(type_prop, random_uniform_invalid_max_val)
+{
+    auto out_shape = opset8::Constant::create(element::i32, Shape{4}, {2, 3, 4, 5});
+    auto min_val = make_shared<opset8::Constant>(element::f32, Shape{}, 0.f);
+    auto max_val = opset8::Constant::create(element::f32, Shape{3}, {2, 3, 5});
+
+    try
+    {
+        auto r =
+            make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::f32, 120, 100);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Unexpected pass with invalid max value.";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("'max_val' should have 1 element."));
+    }
+    catch (...)
+    {
+        FAIL() << "Check failed for unexpected reason.";
+    }
+}
+
+TEST(type_prop, random_uniform_invalid_min_max_val_type_case1)
+{
+    auto out_shape = opset8::Constant::create(element::i64, Shape{4}, {2, 3, 4, 5});
+    auto min_val = make_shared<opset8::Constant>(element::f32, Shape{}, 0.f);
+    auto max_val = make_shared<opset8::Constant>(element::i32, Shape{}, 100);
+
+    try
+    {
+        auto r =
+            make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::f32, 120, 100);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Unexpected pass with invalid min value type.";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(),
+                             std::string("'min_val' should have the same type as 'max_val'."));
+    }
+    catch (...)
+    {
+        FAIL() << "Check failed for unexpected reason";
+    }
+}
+
+TEST(type_prop, random_uniform_invalid_min_max_val_type_case2)
+{
+    auto out_shape = opset8::Constant::create(element::i64, Shape{4}, {2, 3, 4, 5});
+    auto min_val = make_shared<opset8::Constant>(element::f32, Shape{}, 0.f);
+    auto max_val = make_shared<opset8::Constant>(element::f32, Shape{}, 1.f);
+
+    try
+    {
+        auto r =
+            make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::i32, 120, 100);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Unexpected pass with invalid min and max value type.";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(
+            error.what(),
+            std::string(
+                "'min_val' and 'max_val' should have the same type as 'out_type' attribute."));
+    }
+    catch (...)
+    {
+        FAIL() << "Check failed for unexpected reason";
+    }
+}
+
+TEST(type_prop, random_uniform_invalid_min_max_values_case1)
+{
+    auto out_shape = opset8::Constant::create(element::i64, Shape{4}, {2, 3, 4, 5});
+    auto min_val = make_shared<opset8::Constant>(element::f32, Shape{}, 1.f);
+    auto max_val = make_shared<opset8::Constant>(element::f32, Shape{}, 0.f);
+
+    try
+    {
+        auto r =
+            make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::f32, 120, 100);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Unexpected pass with invalid min and max values.";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("Min value must be less than max value."));
+    }
+    catch (...)
+    {
+        FAIL() << "Check failed for unexpected reason";
+    }
+}
+
+TEST(type_prop, random_uniform_invalid_min_max_values_case2)
+{
+    auto out_shape = opset8::Constant::create(element::i64, Shape{4}, {2, 3, 4, 5});
+    auto min_val = make_shared<opset8::Constant>(element::i32, Shape{}, 100);
+    auto max_val = make_shared<opset8::Constant>(element::i32, Shape{}, 100);
+
+    try
+    {
+        auto r =
+            make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::i32, 120, 100);
+        // Should have thrown, so fail if it didn't
+        FAIL() << "Unexpected pass with invalid min and max values.";
+    }
+    catch (const NodeValidationFailure& error)
+    {
+        EXPECT_HAS_SUBSTRING(error.what(), std::string("Min value must be less than max value."));
+    }
+    catch (...)
+    {
+        FAIL() << "Check failed for unexpected reason";
+    }
+}
+
+TEST(type_prop, random_uniform_min_max_1d_tensors)
+{
+    auto out_shape = opset8::Constant::create(element::i64, Shape{4}, {2, 3, 4, 5});
+    auto min_val = opset8::Constant::create(element::f32, Shape{1}, {-1.0});
+    auto max_val = opset8::Constant::create(element::f32, Shape{1}, {2.0});
+
+    auto r =
+        make_shared<opset8::RandomUniform>(out_shape, min_val, max_val, element::f32, 120, 100);
+
+    EXPECT_EQ(r->get_output_element_type(0), element::f32);
+    EXPECT_TRUE(r->get_output_partial_shape(0).same_scheme(PartialShape{2, 3, 4, 5}));
+}
diff --git a/ngraph/test/type_prop/tan.cpp b/ngraph/test/type_prop/tan.cpp
new file mode 100644
index 00000000000..19cf0616962
--- /dev/null
+++ b/ngraph/test/type_prop/tan.cpp
@@ -0,0 +1,9 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "unary_ops.hpp"
+
+using Type = ::testing::Types<ngraph::op::Tan>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(type_prop_tan, UnaryOperator, Type);
diff --git a/ngraph/test/util.cpp b/ngraph/test/util.cpp
index f1384d01072..7135b8c46dd 100644
--- a/ngraph/test/util.cpp
+++ b/ngraph/test/util.cpp
@@ -15,6 +15,7 @@
 #include "ngraph/ngraph.hpp"
 #include "ngraph/op/util/op_annotations.hpp"
 #include "ngraph/opsets/opset6.hpp"
+#include "ngraph/opsets/opset8.hpp"
 #include "ngraph/pass/manager.hpp"
 #include "ngraph/pass/visualize_tree.hpp"
 #include "util/all_close.hpp"
@@ -251,6 +252,24 @@ TEST(graph_util, clone_multiple_results)
     auto copy = clone_function(*f);
 }
 
+TEST(graph_util, clone_function_variables)
+{
+    auto c_fp16 = make_shared<opset8::Constant>(element::f16, Shape{3}, std::vector<float>{0});
+    auto variable = make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "var_1"});
+    auto read_value = make_shared<opset8::ReadValue>(c_fp16, variable);
+    auto assign = make_shared<opset8::Assign>(read_value, variable);
+    auto f = make_shared<Function>(OutputVector{assign}, ParameterVector{}, VariableVector{variable});
+    auto copy = clone_function(*f);
+    auto c_fp32 = make_shared<opset8::Constant>(element::f32, Shape{3}, std::vector<float>{0});
+    for (const auto& op : copy->get_ops()) {
+        if (auto constant = std::dynamic_pointer_cast<opset8::Constant>(op)) {
+            ngraph::replace_node(constant, c_fp32);
+        }
+    }
+    copy->validate_nodes_and_infer_types();
+    copy = clone_function(*f);
+}
+
 TEST(graph_util, clone_rt_info)
 {
     const std::string testAffinity = "CPU";
diff --git a/ngraph/test/util/visitor.hpp b/ngraph/test/util/visitor.hpp
index 27955068f1e..8c443c0aa43 100644
--- a/ngraph/test/util/visitor.hpp
+++ b/ngraph/test/util/visitor.hpp
@@ -393,20 +393,14 @@ namespace ngraph
             AttributeVisitor& get_node_loader() { return *this; }
             static FactoryRegistry<Node>& get_ops()
             {
-                static std::shared_ptr<FactoryRegistry<Node>> registry;
-                static std::mutex init_guard;
-                if (!registry)
-                {
-                    std::lock_guard<std::mutex> guard(init_guard);
-                    if (!registry)
-                    {
-                        registry = std::make_shared<FactoryRegistry<Node>>();
-#define NGRAPH_OP(NAME, NAMESPACE, VERSION) registry->register_factory<NAMESPACE::NAME>();
+                static FactoryRegistry<Node> registry = [] {
+                    FactoryRegistry<Node> registry;
+#define NGRAPH_OP(NAME, NAMESPACE, VERSION) registry.register_factory<NAMESPACE::NAME>();
 #include "op_version_tbl.hpp"
 #undef NGRAPH_OP
-                    }
-                }
-                return *registry;
+                    return registry;
+                }();
+                return registry;
             }
 
         protected:
diff --git a/ngraph/test/visitors/op/acosh.cpp b/ngraph/test/visitors/op/acosh.cpp
index 413a68710ab..901e69812b8 100644
--- a/ngraph/test/visitors/op/acosh.cpp
+++ b/ngraph/test/visitors/op/acosh.cpp
@@ -3,7 +3,8 @@
 //
 
 #include "unary_ops.hpp"
-using Type = ::testing::Types<UnaryOperatorType<ngraph::op::Acosh, element::f32>>;
+
+using Type = ::testing::Types<UnaryOperatorType<ngraph::op::Acosh, ngraph::element::f32>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/add.cpp b/ngraph/test/visitors/op/add.cpp
new file mode 100644
index 00000000000..1f13a33c1ba
--- /dev/null
+++ b/ngraph/test/visitors/op/add.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::Add, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/asinh.cpp b/ngraph/test/visitors/op/asinh.cpp
index 6e18ede91c8..d791ae636ea 100644
--- a/ngraph/test/visitors/op/asinh.cpp
+++ b/ngraph/test/visitors/op/asinh.cpp
@@ -3,7 +3,7 @@
 //
 
 #include "unary_ops.hpp"
-using Type = ::testing::Types<UnaryOperatorType<ngraph::op::Asinh, element::f32>>;
+using Type = ::testing::Types<UnaryOperatorType<ngraph::op::Asinh, ngraph::element::f32>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/atan.cpp b/ngraph/test/visitors/op/atan.cpp
index 72b5931c72b..ba65d4c2865 100644
--- a/ngraph/test/visitors/op/atan.cpp
+++ b/ngraph/test/visitors/op/atan.cpp
@@ -4,7 +4,7 @@
 
 #include "unary_ops.hpp"
 
-using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Atan, element::f32>>;
+using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Atan, ngraph::element::f32>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                               UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/binary_ops.hpp b/ngraph/test/visitors/op/binary_ops.hpp
new file mode 100644
index 00000000000..6c94d5c4596
--- /dev/null
+++ b/ngraph/test/visitors/op/binary_ops.hpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "gtest/gtest.h"
+#include "util/visitor.hpp"
+
+template <typename T, ngraph::element::Type_t ELEMENT_TYPE>
+class BinaryOperatorType
+{
+public:
+    using op_type = T;
+    static constexpr ngraph::element::Type_t element_type = ELEMENT_TYPE;
+};
+
+template <typename T>
+class BinaryOperatorVisitor : public testing::Test
+{
+};
+
+class BinaryOperatorTypeName
+{
+public:
+    template <typename T>
+    static std::string GetName(int)
+    {
+        using OP_Type = typename T::op_type;
+        constexpr ngraph::element::Type precision(T::element_type);
+        const ngraph::Node::type_info_t typeinfo = OP_Type::get_type_info_static();
+        return std::string{typeinfo.name} + "_" + precision.get_type_name();
+    }
+};
+
+TYPED_TEST_SUITE_P(BinaryOperatorVisitor);
+
+TYPED_TEST_P(BinaryOperatorVisitor, Auto_Broadcast)
+{
+    using OP_Type = typename TypeParam::op_type;
+    const ngraph::element::Type_t element_type = TypeParam::element_type;
+
+    ngraph::test::NodeBuilder::get_ops().register_factory<OP_Type>();
+    const auto A =
+        std::make_shared<ngraph::op::Parameter>(element_type, ngraph::PartialShape{1, 2, 3});
+    const auto B =
+        std::make_shared<ngraph::op::Parameter>(element_type, ngraph::PartialShape{3, 2, 1});
+
+    auto auto_broadcast = ngraph::op::AutoBroadcastType::NUMPY;
+
+    const auto op_func = std::make_shared<OP_Type>(A, B, auto_broadcast);
+    ngraph::test::NodeBuilder builder(op_func);
+    const auto g_op_func = ngraph::as_type_ptr<OP_Type>(builder.create());
+
+    const auto expected_attr_count = 1;
+    EXPECT_EQ(builder.get_value_map_size(), expected_attr_count);
+    EXPECT_EQ(op_func->get_autob(), g_op_func->get_autob());
+}
+
+REGISTER_TYPED_TEST_SUITE_P(BinaryOperatorVisitor, Auto_Broadcast);
diff --git a/ngraph/test/visitors/op/ceiling.cpp b/ngraph/test/visitors/op/ceiling.cpp
index dc4f32d29b6..6097e44f50b 100644
--- a/ngraph/test/visitors/op/ceiling.cpp
+++ b/ngraph/test/visitors/op/ceiling.cpp
@@ -3,7 +3,8 @@
 //
 
 #include "unary_ops.hpp"
-using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Ceiling, element::f32>>;
+
+using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Ceiling, ngraph::element::f32>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/cos.cpp b/ngraph/test/visitors/op/cos.cpp
index 6b48a4b8b0c..9da84fe529b 100644
--- a/ngraph/test/visitors/op/cos.cpp
+++ b/ngraph/test/visitors/op/cos.cpp
@@ -3,7 +3,8 @@
 //
 
 #include "unary_ops.hpp"
-using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Cos, element::f32>>;
+
+using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Cos, ngraph::element::f32>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/cosh.cpp b/ngraph/test/visitors/op/cosh.cpp
index d44b7701eb3..308f9e85deb 100644
--- a/ngraph/test/visitors/op/cosh.cpp
+++ b/ngraph/test/visitors/op/cosh.cpp
@@ -3,7 +3,8 @@
 //
 
 #include "unary_ops.hpp"
-using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Cosh, element::f32>>;
+
+using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Cosh, ngraph::element::f32>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/equal.cpp b/ngraph/test/visitors/op/equal.cpp
new file mode 100644
index 00000000000..af23535c866
--- /dev/null
+++ b/ngraph/test/visitors/op/equal.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::Equal, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/erf.cpp b/ngraph/test/visitors/op/erf.cpp
new file mode 100644
index 00000000000..0e14a14d3f3
--- /dev/null
+++ b/ngraph/test/visitors/op/erf.cpp
@@ -0,0 +1,12 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "unary_ops.hpp"
+
+using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Erf, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_atrribute,
+                               UnaryOperatorVisitor,
+                               Type,
+                               UnaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/floor.cpp b/ngraph/test/visitors/op/floor.cpp
index 6393a8295f2..145c7d8804c 100644
--- a/ngraph/test/visitors/op/floor.cpp
+++ b/ngraph/test/visitors/op/floor.cpp
@@ -4,8 +4,8 @@
 
 #include "unary_ops.hpp"
 
-using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Floor, element::f32>,
-                               UnaryOperatorType<ngraph::op::v0::Floor, element::f16>>;
+using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Floor, ngraph::element::f32>,
+                               UnaryOperatorType<ngraph::op::v0::Floor, ngraph::element::f16>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/floor_mod.cpp b/ngraph/test/visitors/op/floor_mod.cpp
new file mode 100644
index 00000000000..9a1b9e2a98b
--- /dev/null
+++ b/ngraph/test/visitors/op/floor_mod.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::FloorMod, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/greater.cpp b/ngraph/test/visitors/op/greater.cpp
new file mode 100644
index 00000000000..71362c489f8
--- /dev/null
+++ b/ngraph/test/visitors/op/greater.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::Greater, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/greater_equal.cpp b/ngraph/test/visitors/op/greater_equal.cpp
new file mode 100644
index 00000000000..fb12a162ee0
--- /dev/null
+++ b/ngraph/test/visitors/op/greater_equal.cpp
@@ -0,0 +1,14 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type =
+    ::testing::Types<BinaryOperatorType<ngraph::opset1::GreaterEqual, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/less.cpp b/ngraph/test/visitors/op/less.cpp
new file mode 100644
index 00000000000..91ac6c742f1
--- /dev/null
+++ b/ngraph/test/visitors/op/less.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::Less, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/less_equal.cpp b/ngraph/test/visitors/op/less_equal.cpp
new file mode 100644
index 00000000000..33bb954532e
--- /dev/null
+++ b/ngraph/test/visitors/op/less_equal.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::LessEqual, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/log.cpp b/ngraph/test/visitors/op/log.cpp
index f81d40e7302..3ac1e04b46e 100644
--- a/ngraph/test/visitors/op/log.cpp
+++ b/ngraph/test/visitors/op/log.cpp
@@ -1,9 +1,11 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+
 #include "unary_ops.hpp"
-using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Log, element::f32>,
-                               UnaryOperatorType<ngraph::op::v0::Log, element::f16>>;
+
+using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Log, ngraph::element::f32>,
+                               UnaryOperatorType<ngraph::op::v0::Log, ngraph::element::f16>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/logical_and.cpp b/ngraph/test/visitors/op/logical_and.cpp
new file mode 100644
index 00000000000..919e89ea9c6
--- /dev/null
+++ b/ngraph/test/visitors/op/logical_and.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::LogicalAnd, ngraph::element::boolean>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/logical_or.cpp b/ngraph/test/visitors/op/logical_or.cpp
new file mode 100644
index 00000000000..fe1ca378111
--- /dev/null
+++ b/ngraph/test/visitors/op/logical_or.cpp
@@ -0,0 +1,14 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type =
+    ::testing::Types<BinaryOperatorType<ngraph::opset1::LogicalOr, ngraph::element::boolean>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/logical_xor.cpp b/ngraph/test/visitors/op/logical_xor.cpp
index 80f0085b094..b30bfba4857 100644
--- a/ngraph/test/visitors/op/logical_xor.cpp
+++ b/ngraph/test/visitors/op/logical_xor.cpp
@@ -2,33 +2,13 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "gtest/gtest.h"
-
-#include "ngraph/ngraph.hpp"
-#include "ngraph/op/util/attr_types.hpp"
+#include "binary_ops.hpp"
 #include "ngraph/opsets/opset1.hpp"
-#include "ngraph/opsets/opset3.hpp"
-#include "ngraph/opsets/opset4.hpp"
-#include "ngraph/opsets/opset5.hpp"
 
-#include "util/visitor.hpp"
+using Type =
+    ::testing::Types<BinaryOperatorType<ngraph::opset1::LogicalXor, ngraph::element::boolean>>;
 
-using namespace std;
-using namespace ngraph;
-using ngraph::test::NodeBuilder;
-using ngraph::test::ValueMap;
-
-TEST(attributes, logical_xor_op)
-{
-    NodeBuilder::get_ops().register_factory<opset1::LogicalXor>();
-    auto x1 = make_shared<op::Parameter>(element::boolean, Shape{200});
-    auto x2 = make_shared<op::Parameter>(element::boolean, Shape{200});
-
-    auto auto_broadcast = op::AutoBroadcastType::NUMPY;
-
-    auto logical_xor = make_shared<opset1::LogicalXor>(x1, x2, auto_broadcast);
-    NodeBuilder builder(logical_xor);
-    auto g_logical_xor = as_type_ptr<opset1::LogicalXor>(builder.create());
-
-    EXPECT_EQ(g_logical_xor->get_autob(), logical_xor->get_autob());
-}
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/max_pool.cpp b/ngraph/test/visitors/op/max_pool.cpp
index 1f7847ed912..bbf41adc472 100644
--- a/ngraph/test/visitors/op/max_pool.cpp
+++ b/ngraph/test/visitors/op/max_pool.cpp
@@ -7,9 +7,7 @@
 #include "ngraph/ngraph.hpp"
 #include "ngraph/op/util/attr_types.hpp"
 #include "ngraph/opsets/opset1.hpp"
-#include "ngraph/opsets/opset3.hpp"
-#include "ngraph/opsets/opset4.hpp"
-#include "ngraph/opsets/opset5.hpp"
+#include "ngraph/opsets/opset8.hpp"
 
 #include "util/visitor.hpp"
 
@@ -42,3 +40,39 @@ TEST(attributes, max_pool_op)
     EXPECT_EQ(g_max_pool->get_rounding_type(), max_pool->get_rounding_type());
     EXPECT_EQ(g_max_pool->get_auto_pad(), max_pool->get_auto_pad());
 }
+
+TEST(attributes, max_pool_v8_op)
+{
+    NodeBuilder::get_ops().register_factory<opset8::MaxPool>();
+    const auto data = make_shared<op::Parameter>(element::i32, Shape{1, 3, 37, 37});
+
+    const auto strides = Strides{1, 1};
+    const auto dilations = Strides{1, 1};
+    const auto pads_begin = Shape{1, 1};
+    const auto pads_end = Shape{1, 1};
+    const auto kernel = Shape{2, 2};
+    const auto rounding_mode = op::RoundingType::CEIL;
+    const auto auto_pad = op::PadType::EXPLICIT;
+    const element::Type& index_element_type = element::i32;
+
+    const auto max_pool = make_shared<opset8::MaxPool>(data,
+                                                       strides,
+                                                       dilations,
+                                                       pads_begin,
+                                                       pads_end,
+                                                       kernel,
+                                                       rounding_mode,
+                                                       auto_pad,
+                                                       index_element_type);
+    NodeBuilder builder(max_pool);
+    auto g_max_pool = as_type_ptr<opset8::MaxPool>(builder.create());
+
+    EXPECT_EQ(g_max_pool->get_strides(), max_pool->get_strides());
+    EXPECT_EQ(g_max_pool->get_dilations(), max_pool->get_dilations());
+    EXPECT_EQ(g_max_pool->get_pads_begin(), max_pool->get_pads_begin());
+    EXPECT_EQ(g_max_pool->get_pads_end(), max_pool->get_pads_end());
+    EXPECT_EQ(g_max_pool->get_kernel(), max_pool->get_kernel());
+    EXPECT_EQ(g_max_pool->get_rounding_type(), max_pool->get_rounding_type());
+    EXPECT_EQ(g_max_pool->get_auto_pad(), max_pool->get_auto_pad());
+    EXPECT_EQ(g_max_pool->get_index_element_type(), max_pool->get_index_element_type());
+}
diff --git a/ngraph/test/visitors/op/maximum.cpp b/ngraph/test/visitors/op/maximum.cpp
new file mode 100644
index 00000000000..26b748e019b
--- /dev/null
+++ b/ngraph/test/visitors/op/maximum.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::Maximum, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/minimum.cpp b/ngraph/test/visitors/op/minimum.cpp
new file mode 100644
index 00000000000..f2c4d164280
--- /dev/null
+++ b/ngraph/test/visitors/op/minimum.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::Minimum, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/mod.cpp b/ngraph/test/visitors/op/mod.cpp
index dce8ef15a07..bae485b02d6 100644
--- a/ngraph/test/visitors/op/mod.cpp
+++ b/ngraph/test/visitors/op/mod.cpp
@@ -2,33 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "gtest/gtest.h"
-
-#include "ngraph/ngraph.hpp"
-#include "ngraph/op/util/attr_types.hpp"
+#include "binary_ops.hpp"
 #include "ngraph/opsets/opset1.hpp"
-#include "ngraph/opsets/opset3.hpp"
-#include "ngraph/opsets/opset4.hpp"
-#include "ngraph/opsets/opset5.hpp"
 
-#include "util/visitor.hpp"
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::Mod, ngraph::element::f32>>;
 
-using namespace std;
-using namespace ngraph;
-using ngraph::test::NodeBuilder;
-using ngraph::test::ValueMap;
-
-TEST(attributes, mod_op)
-{
-    NodeBuilder::get_ops().register_factory<opset1::Mod>();
-    auto A = make_shared<op::Parameter>(element::f32, Shape{1, 2});
-    auto B = make_shared<op::Parameter>(element::f32, Shape{2, 1});
-
-    auto auto_broadcast = op::AutoBroadcastType::NUMPY;
-
-    auto mod = make_shared<opset1::Mod>(A, B, auto_broadcast);
-    NodeBuilder builder(mod);
-    auto g_mod = as_type_ptr<opset1::Mod>(builder.create());
-
-    EXPECT_EQ(g_mod->get_autob(), mod->get_autob());
-}
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/multiply.cpp b/ngraph/test/visitors/op/multiply.cpp
new file mode 100644
index 00000000000..f60e2b5ebb1
--- /dev/null
+++ b/ngraph/test/visitors/op/multiply.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::Multiply, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/negative.cpp b/ngraph/test/visitors/op/negative.cpp
index 4880c73c0af..c71cc64cb99 100644
--- a/ngraph/test/visitors/op/negative.cpp
+++ b/ngraph/test/visitors/op/negative.cpp
@@ -4,8 +4,8 @@
 
 #include "unary_ops.hpp"
 
-using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Negative, element::f32>,
-                               UnaryOperatorType<ngraph::op::v0::Negative, element::i32>>;
+using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Negative, ngraph::element::f32>,
+                               UnaryOperatorType<ngraph::op::v0::Negative, ngraph::element::i32>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/not_equal.cpp b/ngraph/test/visitors/op/not_equal.cpp
new file mode 100644
index 00000000000..93b3fc2eae4
--- /dev/null
+++ b/ngraph/test/visitors/op/not_equal.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::NotEqual, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/power.cpp b/ngraph/test/visitors/op/power.cpp
new file mode 100644
index 00000000000..24c25b3d64c
--- /dev/null
+++ b/ngraph/test/visitors/op/power.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::Power, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/random_uniform.cpp b/ngraph/test/visitors/op/random_uniform.cpp
new file mode 100644
index 00000000000..6f1b3a1853f
--- /dev/null
+++ b/ngraph/test/visitors/op/random_uniform.cpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gtest/gtest.h"
+
+#include "ngraph/ngraph.hpp"
+#include "ngraph/op/util/attr_types.hpp"
+#include "ngraph/opsets/opset8.hpp"
+
+#include "util/visitor.hpp"
+
+using namespace std;
+using namespace ngraph;
+using ngraph::test::NodeBuilder;
+using ngraph::test::ValueMap;
+
+TEST(attributes, random_uniform_op)
+{
+    NodeBuilder::get_ops().register_factory<opset8::RandomUniform>();
+    auto out_shape =
+        make_shared<opset8::Constant>(element::i64, Shape{3}, vector<int64_t>{3, 2, 4});
+    auto min_val = make_shared<opset8::Constant>(element::f32, Shape{}, 0);
+    auto max_val = make_shared<opset8::Constant>(element::f32, Shape{}, 1);
+
+    const auto random_uniform = make_shared<opset8::RandomUniform>(
+        out_shape, min_val, max_val, element::Type_t::f32, 150, 10);
+    NodeBuilder builder(random_uniform);
+    auto g_random_uniform = as_type_ptr<opset8::RandomUniform>(builder.create());
+
+    const auto expected_attr_count = 3;
+    EXPECT_EQ(builder.get_value_map_size(), expected_attr_count);
+    EXPECT_EQ(g_random_uniform->get_global_seed(), random_uniform->get_global_seed());
+    EXPECT_EQ(g_random_uniform->get_op_seed(), random_uniform->get_op_seed());
+    EXPECT_EQ(g_random_uniform->get_out_type(), random_uniform->get_out_type());
+}
diff --git a/ngraph/test/visitors/op/result.cpp b/ngraph/test/visitors/op/result.cpp
index 8adbe76321b..49a02203778 100644
--- a/ngraph/test/visitors/op/result.cpp
+++ b/ngraph/test/visitors/op/result.cpp
@@ -4,8 +4,8 @@
 
 #include "unary_ops.hpp"
 
-using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Result, element::f32>,
-                               UnaryOperatorType<ngraph::op::v0::Result, element::f16>>;
+using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Result, ngraph::element::f32>,
+                               UnaryOperatorType<ngraph::op::v0::Result, ngraph::element::f16>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/sign.cpp b/ngraph/test/visitors/op/sign.cpp
index 557b7dc1669..5ef604744ae 100644
--- a/ngraph/test/visitors/op/sign.cpp
+++ b/ngraph/test/visitors/op/sign.cpp
@@ -3,7 +3,8 @@
 //
 
 #include "unary_ops.hpp"
-using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Sign, element::f32>>;
+
+using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Sign, ngraph::element::f32>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/sinh.cpp b/ngraph/test/visitors/op/sinh.cpp
index db35d185c4a..0f780a8f3c7 100644
--- a/ngraph/test/visitors/op/sinh.cpp
+++ b/ngraph/test/visitors/op/sinh.cpp
@@ -3,9 +3,10 @@
 //
 
 #include "unary_ops.hpp"
-using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Sinh, element::f32>>;
+
+using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Sinh, ngraph::element::f32>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
-                              UnaryOperatorVisitor,
-                              Type,
-                              UnaryOperatorTypeName);
+                               UnaryOperatorVisitor,
+                               Type,
+                               UnaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/softplus.cpp b/ngraph/test/visitors/op/softplus.cpp
index 6f0134b5a82..e27c13ecd22 100644
--- a/ngraph/test/visitors/op/softplus.cpp
+++ b/ngraph/test/visitors/op/softplus.cpp
@@ -4,7 +4,7 @@
 
 #include "unary_ops.hpp"
 
-using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v4::SoftPlus, element::f32>>;
+using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v4::SoftPlus, ngraph::element::f32>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_atrribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/sqrt.cpp b/ngraph/test/visitors/op/sqrt.cpp
index 7bedbe3d28a..6bf6e0e286c 100644
--- a/ngraph/test/visitors/op/sqrt.cpp
+++ b/ngraph/test/visitors/op/sqrt.cpp
@@ -3,8 +3,9 @@
 //
 
 #include "unary_ops.hpp"
-using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Sqrt, element::f32>,
-                               UnaryOperatorType<ngraph::op::v0::Sqrt, element::f16>>;
+
+using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Sqrt, ngraph::element::f32>,
+                               UnaryOperatorType<ngraph::op::v0::Sqrt, ngraph::element::f16>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/squared_difference.cpp b/ngraph/test/visitors/op/squared_difference.cpp
index cf2a7e0981e..6ebc06579a4 100644
--- a/ngraph/test/visitors/op/squared_difference.cpp
+++ b/ngraph/test/visitors/op/squared_difference.cpp
@@ -2,31 +2,13 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "gtest/gtest.h"
-
-#include "ngraph/ngraph.hpp"
-#include "ngraph/op/util/attr_types.hpp"
+#include "binary_ops.hpp"
 #include "ngraph/opsets/opset1.hpp"
-#include "ngraph/opsets/opset3.hpp"
-#include "ngraph/opsets/opset4.hpp"
-#include "ngraph/opsets/opset5.hpp"
 
-#include "util/visitor.hpp"
+using Type =
+    ::testing::Types<BinaryOperatorType<ngraph::opset1::SquaredDifference, ngraph::element::f32>>;
 
-using namespace std;
-using namespace ngraph;
-using ngraph::test::NodeBuilder;
-using ngraph::test::ValueMap;
-
-TEST(attributes, squared_difference_op)
-{
-    NodeBuilder::get_ops().register_factory<opset1::SquaredDifference>();
-    auto x1 = make_shared<op::Parameter>(element::i32, Shape{200});
-    auto x2 = make_shared<op::Parameter>(element::i32, Shape{200});
-    auto auto_broadcast = op::AutoBroadcastType::NUMPY;
-    auto squared_difference = make_shared<opset1::SquaredDifference>(x1, x2, auto_broadcast);
-    NodeBuilder builder(squared_difference);
-    auto g_squared_difference = as_type_ptr<opset1::SquaredDifference>(builder.create());
-
-    EXPECT_EQ(g_squared_difference->get_autob(), squared_difference->get_autob());
-}
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/squeeze.cpp b/ngraph/test/visitors/op/squeeze.cpp
index 5a14b08864c..5863f255c4e 100644
--- a/ngraph/test/visitors/op/squeeze.cpp
+++ b/ngraph/test/visitors/op/squeeze.cpp
@@ -3,8 +3,9 @@
 //
 
 #include "unary_ops.hpp"
-using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Squeeze, element::f32>,
-                               UnaryOperatorType<ngraph::op::v0::Squeeze, element::f16>>;
+
+using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Squeeze, ngraph::element::f32>,
+                               UnaryOperatorType<ngraph::op::v0::Squeeze, ngraph::element::f16>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/subtract.cpp b/ngraph/test/visitors/op/subtract.cpp
new file mode 100644
index 00000000000..a2aa158c076
--- /dev/null
+++ b/ngraph/test/visitors/op/subtract.cpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "binary_ops.hpp"
+#include "ngraph/opsets/opset1.hpp"
+
+using Type = ::testing::Types<BinaryOperatorType<ngraph::opset1::Subtract, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_with_auto_broadcast,
+                               BinaryOperatorVisitor,
+                               Type,
+                               BinaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/swish.cpp b/ngraph/test/visitors/op/swish.cpp
index 8bfc74c0e5d..34388a44815 100644
--- a/ngraph/test/visitors/op/swish.cpp
+++ b/ngraph/test/visitors/op/swish.cpp
@@ -1,9 +1,10 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+
 #include "unary_ops.hpp"
 
-using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v4::Swish, element::f32>>;
+using Type = ::testing::Types<UnaryOperatorType<ngraph::op::v4::Swish, ngraph::element::f32>>;
 
 INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_atrribute,
                                UnaryOperatorVisitor,
diff --git a/ngraph/test/visitors/op/tan.cpp b/ngraph/test/visitors/op/tan.cpp
new file mode 100644
index 00000000000..9e786aa3385
--- /dev/null
+++ b/ngraph/test/visitors/op/tan.cpp
@@ -0,0 +1,12 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "unary_ops.hpp"
+
+using Types = ::testing::Types<UnaryOperatorType<ngraph::op::v0::Tan, ngraph::element::f32>>;
+
+INSTANTIATE_TYPED_TEST_SUITE_P(visitor_without_attribute,
+                              UnaryOperatorVisitor,
+                              Types,
+                              UnaryOperatorTypeName);
diff --git a/ngraph/test/visitors/op/unary_ops.hpp b/ngraph/test/visitors/op/unary_ops.hpp
index 7bf588b5231..29527416a78 100644
--- a/ngraph/test/visitors/op/unary_ops.hpp
+++ b/ngraph/test/visitors/op/unary_ops.hpp
@@ -4,21 +4,15 @@
 
 #pragma once
 
-#include <vector>
 #include "gtest/gtest.h"
-#include "ngraph/ngraph.hpp"
-
-#include "ngraph/op/util/attr_types.hpp"
 #include "util/visitor.hpp"
 
-using namespace ngraph;
-using ngraph::test::NodeBuilder;
-template <typename T, element::Type_t ELEMENT_TYPE>
+template <typename T, ngraph::element::Type_t ELEMENT_TYPE>
 class UnaryOperatorType
 {
 public:
     using op_type = T;
-    static constexpr element::Type_t element_type = ELEMENT_TYPE;
+    static constexpr ngraph::element::Type_t element_type = ELEMENT_TYPE;
 };
 template <typename T>
 class UnaryOperatorVisitor : public testing::Test
@@ -32,11 +26,9 @@ public:
     static std::string GetName(int)
     {
         using OP_Type = typename T::op_type;
-        constexpr element::Type precision(T::element_type);
+        constexpr ngraph::element::Type precision(T::element_type);
         const ngraph::Node::type_info_t typeinfo = OP_Type::get_type_info_static();
-        std::string op_name{typeinfo.name};
-        op_name.append("_");
-        return (op_name.append(precision.get_type_name()));
+        return std::string{typeinfo.name} + "_" + precision.get_type_name();
     }
 };
 
@@ -45,13 +37,13 @@ TYPED_TEST_SUITE_P(UnaryOperatorVisitor);
 TYPED_TEST_P(UnaryOperatorVisitor, No_Attribute_4D)
 {
     using OP_Type = typename TypeParam::op_type;
-    const element::Type_t element_type = TypeParam::element_type;
+    const ngraph::element::Type_t element_type = TypeParam::element_type;
 
-    NodeBuilder::get_ops().register_factory<OP_Type>();
-    const auto A = std::make_shared<op::Parameter>(element_type, PartialShape{2, 2, 2, 2});
+    ngraph::test::NodeBuilder::get_ops().register_factory<OP_Type>();
+    const auto A = std::make_shared<ngraph::op::Parameter>(element_type, ngraph::PartialShape{2, 2, 2, 2});
 
     const auto op_func = std::make_shared<OP_Type>(A);
-    NodeBuilder builder(op_func);
+    ngraph::test::NodeBuilder builder(op_func);
     const auto expected_attr_count = 0;
     EXPECT_EQ(builder.get_value_map_size(), expected_attr_count);
 }
diff --git a/ngraph/test/visitors/user_op.cpp b/ngraph/test/visitors/user_op.cpp
index b48194f3877..48ea1c8de10 100644
--- a/ngraph/test/visitors/user_op.cpp
+++ b/ngraph/test/visitors/user_op.cpp
@@ -172,7 +172,7 @@ public:
 
     TuringModel get_turing_model() const { return m_turing_model; }
     const element::Type get_element_type() const { return m_element_type; }
-    const element::Type_t get_element_type_t() const { return m_element_type_t; }
+    element::Type_t get_element_type_t() const { return m_element_type_t; }
     const string& get_val_string() const { return m_val_string; }
     bool get_val_bool() const { return m_val_bool; }
     bool get_val_float() const { return m_val_float; }
diff --git a/openvino/conditional_compilation/CMakeLists.txt b/openvino/conditional_compilation/CMakeLists.txt
index daadd1c8320..6c786d401a2 100644
--- a/openvino/conditional_compilation/CMakeLists.txt
+++ b/openvino/conditional_compilation/CMakeLists.txt
@@ -25,7 +25,7 @@ elseif(SELECTIVE_BUILD STREQUAL "ON")
 
     target_compile_definitions(${TARGET_NAME} INTERFACE SELECTIVE_BUILD)
 
-    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR OV_COMPILER_IS_CLANG)
+    if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
         # After disabling a block of code, some variables might be unused.
         target_compile_options(${TARGET_NAME} INTERFACE
                                 -Wno-unused-function
diff --git a/openvino/itt/CMakeLists.txt b/openvino/itt/CMakeLists.txt
index e9f880b8ce7..d45b9857ada 100644
--- a/openvino/itt/CMakeLists.txt
+++ b/openvino/itt/CMakeLists.txt
@@ -26,10 +26,8 @@ if(TARGET ittnotify)
     endif()
 endif()
 
-if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    target_compile_options(${TARGET_NAME} PRIVATE
-        "-Wall"
-    )
+if (CMAKE_COMPILER_IS_GNUCXX)
+    target_compile_options(${TARGET_NAME} PRIVATE -Wall)
 endif()
 
 target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
diff --git a/tests/conditional_compilation/conftest.py b/tests/conditional_compilation/conftest.py
index 7f76ec13c22..85e2c333d66 100644
--- a/tests/conditional_compilation/conftest.py
+++ b/tests/conditional_compilation/conftest.py
@@ -6,7 +6,6 @@
 # pylint: disable=line-too-long
 
 """Pytest configuration for compilation tests."""
-
 import logging
 import sys
 from inspect import getsourcefile
@@ -19,8 +18,8 @@ import yaml
 import pytest
 
 from path_utils import expand_env_vars  # pylint: disable=import-error
-from test_utils import make_build, validate_path_arg, write_session_info, SESSION_INFO_FILE  # pylint: disable=import-error
-
+from test_utils import make_build, validate_path_arg, write_session_info, \
+    SESSION_INFO_FILE  # pylint: disable=import-error
 
 log = logging.getLogger()
 
@@ -71,15 +70,19 @@ def pytest_generate_tests(metafunc):
         test_cases = yaml.safe_load(file)
 
     for test in test_cases:
-        extra_args = {}
-        model_path = test["model"]["path"]
-        if "marks" in test:
-            extra_args["marks"] = test["marks"]
+        model_list = []
+        test_id_list = []
+        for models in test:
+            extra_args = {}
+            model_path = models["model"]["path"]
+            if "marks" in test:
+                extra_args["marks"] = test["marks"]
+            model_list.append(expand_env_vars(model_path))
+            test_id_list.append(model_path.split("/")[- 1])
+        ids = ids + ['-'.join(test_id_list)]
+        params.append(pytest.param('-'.join(test_id_list), model_list), **extra_args)
 
-        test_id = model_path.replace("$", "").replace("{", "").replace("}", "")
-        params.append(pytest.param(test_id, Path(expand_env_vars(model_path)), **extra_args))
-        ids = ids + [test_id]
-    metafunc.parametrize("test_id, model", params, ids=ids)
+    metafunc.parametrize("test_id, models", params, ids=ids)
 
 
 @pytest.fixture(scope="session")
diff --git a/tests/conditional_compilation/test_cc.py b/tests/conditional_compilation/test_cc.py
index aab828ad5a4..423b9524426 100644
--- a/tests/conditional_compilation/test_cc.py
+++ b/tests/conditional_compilation/test_cc.py
@@ -10,27 +10,30 @@ import glob
 import logging
 import os
 import sys
+from pathlib import Path
+
 import numpy as np
 import pytest
-
 from proc_utils import cmd_exec  # pylint: disable=import-error
-from test_utils import get_lib_sizes, infer_tool, make_build, run_infer  # pylint: disable=import-error
 
+from test_utils import get_lib_sizes, infer_tool, make_build, run_infer  # pylint: disable=import-error
 
 log = logging.getLogger()
 
 
 @pytest.mark.dependency(name="cc_collect")
-def test_cc_collect(test_id, model, openvino_ref, test_info,
+def test_cc_collect(test_id, models, openvino_ref, test_info,
                     save_session_info, sea_runtool, collector_dir, artifacts):  # pylint: disable=unused-argument
     """Test conditional compilation statistics collection
     :param test_info: custom `test_info` field of built-in `request` pytest fixture.
                       contain a dictionary to store test metadata.
     """
     out = artifacts / test_id
+    infer_out_dir = out / "inference_result"
     test_info["test_id"] = test_id
+
     # cleanup old data if any
-    prev_result = glob.glob(f"{out}.pid*.csv")
+    prev_result = glob.glob(f"{out / test_id}.pid*.csv")
     for path in prev_result:
         os.remove(path)
     # run use case
@@ -38,32 +41,32 @@ def test_cc_collect(test_id, model, openvino_ref, test_info,
         [
             sys.executable,
             str(sea_runtool),
-            f"--output={out}",
+            f"--output={out / test_id}",
             f"--bindir={collector_dir}",
             "!",
             sys.executable,
             infer_tool,
-            f"-m={model}",
+            *[f"-m={model}" for model in models],
             "-d=CPU",
-            f"-r={out}",
+            f"-r={infer_out_dir}"
         ]
     )
-    out_csv = glob.glob(f"{out}.pid*.csv")
+    out_csv = glob.glob(f"{out / test_id}.pid*.csv")
     test_info["out_csv"] = out_csv
 
     assert return_code == 0, f"Command exited with non-zero status {return_code}:\n {output}"
-    assert len(out_csv) == 1, f'Multiple or none "{out}.pid*.csv" files'
+    assert len(out_csv) == 1, f'Multiple or none "{out / test_id}.pid*.csv" files'
 
 
 @pytest.mark.dependency(depends=["cc_collect"])
-def test_minimized_pkg(test_id, model, openvino_root_dir, artifacts):  # pylint: disable=unused-argument
+def test_minimized_pkg(test_id, models, openvino_root_dir, artifacts):  # pylint: disable=unused-argument
     """Build and install OpenVINO package with collected conditional compilation statistics."""
     out = artifacts / test_id
     install_prefix = out / "install_pkg"
     build_dir = openvino_root_dir / "build_minimized"
 
-    out_csv = glob.glob(f"{out}.pid*.csv")
-    assert len(out_csv) == 1, f'Multiple or none "{out}.pid*.csv" files'
+    out_csv = glob.glob(f"{out / test_id}.pid*.csv")
+    assert len(out_csv) == 1, f'Multiple or none "{out / test_id}.pid*.csv" files'
 
     log.info("Building minimized build at %s", build_dir)
 
@@ -78,38 +81,47 @@ def test_minimized_pkg(test_id, model, openvino_root_dir, artifacts):  # pylint:
 
 
 @pytest.mark.dependency(depends=["cc_collect", "minimized_pkg"])
-def test_infer(test_id, model, artifacts):
+def test_infer(test_id, models, artifacts):
     """Test inference with conditional compiled binaries."""
     out = artifacts / test_id
     minimized_pkg = out / "install_pkg"
-    return_code, output = run_infer(model, f"{out}_cc.npz", minimized_pkg)
+    infer_out_dir_cc =  out / "inference_result_cc/"
+
+    return_code, output = run_infer(models, infer_out_dir_cc, minimized_pkg)
     assert return_code == 0, f"Command exited with non-zero status {return_code}:\n {output}"
 
 
 @pytest.mark.dependency(depends=["cc_collect", "minimized_pkg"])
-def test_verify(test_id, model, openvino_ref, artifacts, tolerance=1e-6):  # pylint: disable=too-many-arguments
+def test_verify(test_id, models, openvino_ref, artifacts, tolerance=1e-6):  # pylint: disable=too-many-arguments
     """Test verifying that inference results are equal."""
     out = artifacts / test_id
     minimized_pkg = out / "install_pkg"
-    out_file = f"{out}.npz"
-    out_file_cc = f"{out}_cc.npz"
-    return_code, output = run_infer(model, out_file, openvino_ref)
+
+    infer_out_dir_cc = out / "inference_result_cc/"
+    infer_out_dir = out / "inference_result/"
+
+    return_code, output = run_infer(models, infer_out_dir, openvino_ref)
     assert return_code == 0, f"Command exited with non-zero status {return_code}:\n {output}"
-    return_code, output = run_infer(model, out_file_cc, minimized_pkg)
+    return_code, output = run_infer(models, infer_out_dir_cc, minimized_pkg)
     assert return_code == 0, f"Command exited with non-zero status {return_code}:\n {output}"
-    reference_results = dict(np.load(out_file))
-    inference_results = dict(np.load(out_file_cc))
-    assert sorted(reference_results.keys()) == sorted(
-        inference_results.keys()
-    ), "Results have different number of layers"
-    for layer in reference_results.keys():
-        assert np.allclose(
-            reference_results[layer], inference_results[layer], tolerance
-        ), "Reference and inference results differ"
+
+    for model in models:
+        out_file = f"{infer_out_dir / Path(model).name}.npz"
+        out_file_cc = f"{infer_out_dir_cc / Path(model).name}.npz"
+
+        reference_results = dict(np.load(out_file))
+        inference_results = dict(np.load(out_file_cc))
+        assert sorted(reference_results.keys()) == sorted(
+            inference_results.keys()
+        ), "Results have different number of layers"
+        for layer in reference_results.keys():
+            assert np.allclose(
+                reference_results[layer], inference_results[layer], tolerance
+            ), "Reference and inference results differ"
 
 
 @pytest.mark.dependency(depends=["cc_collect", "minimized_pkg"])
-def test_libs_size(test_id, model, openvino_ref, artifacts):  # pylint: disable=unused-argument
+def test_libs_size(test_id, models, openvino_ref, artifacts):  # pylint: disable=unused-argument
     """Test if libraries haven't increased in size after conditional compilation."""
     libraries = ["inference_engine_transformations", "MKLDNNPlugin", "ngraph"]
     minimized_pkg = artifacts / test_id / "install_pkg"
diff --git a/tests/conditional_compilation/test_config.yml b/tests/conditional_compilation/test_config.yml
index a9f8a463d7a..bc56db783b0 100644
--- a/tests/conditional_compilation/test_config.yml
+++ b/tests/conditional_compilation/test_config.yml
@@ -1,15 +1,31 @@
 # Using models from https://github.com/openvinotoolkit/testdata
 # $find models -wholename "*.xml"
 
-- model:
-    path: ${TESTDATA}/models/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224_i8.xml
-- model:
-    path: ${TESTDATA}/models/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224_i8.xml
-- model:
-    path: ${TESTDATA}/models/inception_v3/inception_v3_i8.xml
-#- model:
-#    path: ${TESTDATA}/models/resnet_v1_50/resnet_v1_50_i8.xml
-- model:
-    path: ${TESTDATA}/models/test_model/test_model_fp16.xml
-- model:
-    path: ${TESTDATA}/models/test_model/test_model_fp32.xml
+-
+    - model:
+        path: ${TESTDATA}/models/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224_i8.xml
+-
+    - model:
+        path: ${TESTDATA}/models/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224_i8.xml
+-
+    - model:
+        path: ${TESTDATA}/models/test_model/test_model_fp16.xml
+-
+    - model:
+        path: ${TESTDATA}/models/test_model/test_model_fp32.xml
+-
+    - model:
+        path: ${TESTDATA}/models/resnet_v1_50/resnet_v1_50_i8.xml
+-
+    - model:
+        path: ${TESTDATA}/models/inception_v3/inception_v3_i8.xml
+-
+    - model:
+        path: ${TESTDATA}/models/test_model/test_model_fp16.xml
+    - model:
+        path: ${TESTDATA}/models/test_model/test_model_fp32.xml
+-
+    - model:
+        path: ${TESTDATA}/models/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224_i8.xml
+    - model:
+        path: ${TESTDATA}/models/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224_i8.xml
diff --git a/tests/conditional_compilation/test_utils.py b/tests/conditional_compilation/test_utils.py
index 499a760bbab..9fa503efa83 100644
--- a/tests/conditional_compilation/test_utils.py
+++ b/tests/conditional_compilation/test_utils.py
@@ -59,13 +59,16 @@ def write_session_info(path: Path = Path(getsourcefile(lambda: 0)).parent / SESS
         json.dump(data, json_file, indent=4)
 
 
-def run_infer(model, out_file, install_dir):
+def run_infer(models, out_dir, install_dir):
     """ Function running inference
     """
+    out_dir.mkdir(parents=True, exist_ok=True)
     return_code, output = cmd_exec(
         [sys.executable,
          infer_tool,
-         "-d=CPU", f"-m={model}", f"-r={out_file}"
+         "-d=CPU",
+         *[f"-m={model}" for model in models],
+         f"-r={out_dir}"
          ],
         env=get_openvino_environment(install_dir),
     )
diff --git a/tests/conditional_compilation/tools/infer_tool.py b/tests/conditional_compilation/tools/infer_tool.py
index 9cc705d5c5f..54bafd78d1e 100644
--- a/tests/conditional_compilation/tools/infer_tool.py
+++ b/tests/conditional_compilation/tools/infer_tool.py
@@ -9,8 +9,10 @@
 """
 import argparse
 import logging as log
-import sys
 import os
+import sys
+from pathlib import Path
+
 import numpy as np
 from openvino.inference_engine import IECore
 
@@ -58,22 +60,30 @@ def cli_parser():
     :return: ir path, device and output folder path variables.
     """
     parser = argparse.ArgumentParser(description='Arguments for python API inference')
-    parser.add_argument('-m', dest='ir_path', required=True, help='Path to XML file of IR')
+    parser.add_argument('-m', dest='ir_path', required=True, help='Path to XML file of IR',  action="append")
     parser.add_argument('-d', dest='device', required=True, help='Target device to infer on')
-    parser.add_argument('-r', dest='out_path', required=True,
+    parser.add_argument('-r', dest='out_path', required=True, type=Path,
                         help='Dumps results to the output file')
+    parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
+                        help='Increase output verbosity')
     args = parser.parse_args()
     ir_path = args.ir_path
     device = args.device
     out_path = args.out_path
+    if args.verbose:
+        log.getLogger().setLevel(log.DEBUG)
     return ir_path, device, out_path
 
 
 if __name__ == "__main__":
     ir_path, device, out_path = cli_parser()
-    results = infer(ir_path=ir_path, device=device)
-    np.savez(out_path, **results)
-    log.info("Path for inference results: {}".format(out_path))
-    log.info("Inference results:")
-    log.info(results)
-    log.info("SUCCESS!")
+
+    for model in ir_path:
+        result = infer(ir_path=model, device=device)
+
+        np.savez(out_path / f"{Path(model).name}.npz", **result)
+
+        log.info("Path for inference results: {}".format(out_path))
+        log.debug("Inference results:")
+        log.debug(result)
+        log.debug("SUCCESS!")
diff --git a/tests/fuzz/src/import_pdpd-fuzzer.cc b/tests/fuzz/src/import_pdpd-fuzzer.cc
index 848dd1195cb..b25338c4356 100644
--- a/tests/fuzz/src/import_pdpd-fuzzer.cc
+++ b/tests/fuzz/src/import_pdpd-fuzzer.cc
@@ -9,7 +9,7 @@
 #define COUNT_OF(A) (sizeof(A) / sizeof(A[0]))
 const char split_sequence[] = {'F', 'U', 'Z', 'Z', '_', 'N', 'E', 'X',
                                'T', '_', 'F', 'I', 'E', 'L', 'D'};
-const char *PDPD = "pdpd";
+const char *PDPD = "paddle";
 
 using namespace ngraph;
 using namespace ngraph::frontend;
@@ -28,12 +28,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
         frontend_manager.load_by_framework(PDPD);
     ngraph::frontend::InputModel::Ptr input_model;
     std::stringstream model;
+    std::stringstream params;
     model << std::string((const char *)model_buf, model_size);
-    std::shared_ptr<std::istream> in_model(&model);
+    std::istream* in_model(&model);
     if (params_buf) {
-      std::stringstream params;
       params << std::string((const char *)params_buf, params_size);
-      std::shared_ptr<std::istream> in_params(&params);
+      std::istream* in_params(&params);
       input_model = frontend->load(in_model, in_params);
     } else
       input_model = frontend->load(in_model);
diff --git a/tests/stress_tests/common/tests_utils.cpp b/tests/stress_tests/common/tests_utils.cpp
index f872400d897..82cd7c5ab68 100644
--- a/tests/stress_tests/common/tests_utils.cpp
+++ b/tests/stress_tests/common/tests_utils.cpp
@@ -94,7 +94,7 @@ void runTest(const std::function<void(std::string, std::string, int)> &tests_pip
 #if DEBUG_MODE
     tests_pipeline(params.model, params.device, params.numiters);
 #else
-    int status = run_in_processes(params.numprocesses, _runTest, tests_pipeline, params);
+    int status = run_in_processes(params.numprocesses, [&](){ _runTest(tests_pipeline, params); });
     ASSERT_EQ(status, 0) << "Test failed with exitcode " << std::to_string(status);
 #endif
 }
diff --git a/tests/stress_tests/common/tests_utils.h b/tests/stress_tests/common/tests_utils.h
index 62cd8137a14..42cb40fcdf0 100644
--- a/tests/stress_tests/common/tests_utils.h
+++ b/tests/stress_tests/common/tests_utils.h
@@ -11,8 +11,7 @@
 #include <string>
 #include <vector>
 #include <thread>
-#include <unistd.h>
-#include <sys/wait.h>
+
 
 enum TestStatus
 {
diff --git a/tests/stress_tests/common/utils.cpp b/tests/stress_tests/common/utils.cpp
index 4baa8c0f1bb..33a8b392a19 100644
--- a/tests/stress_tests/common/utils.cpp
+++ b/tests/stress_tests/common/utils.cpp
@@ -7,6 +7,16 @@
 #include <string>
 #include <string.h>
 
+#ifdef _WIN32
+#include <windows.h>
+#include <psapi.h>
+#include <tlhelp32.h>
+#else
+#include <sys/unistd.h>
+#include <sys/wait.h>
+#endif
+
+
 std::string OS_PATH_JOIN(std::initializer_list<std::string> list) {
     if (!list.size())
         return "";
@@ -37,13 +47,52 @@ static int parseLine(std::string line) {
 }
 
 #ifdef _WIN32
+static PROCESS_MEMORY_COUNTERS getMemoryInfo() {
+    static PROCESS_MEMORY_COUNTERS pmc;
+    pmc.cb = sizeof(PROCESS_MEMORY_COUNTERS);
+    GetProcessMemoryInfo(GetCurrentProcess(),&pmc, pmc.cb);
+    return pmc;
+}
+
 size_t getVmSizeInKB() {
-                // TODO rewrite for Virtual Memory
-                PROCESS_MEMORY_COUNTERS pmc;
-                pmc.cb = sizeof(PROCESS_MEMORY_COUNTERS);
-                GetProcessMemoryInfo(GetCurrentProcess(),&pmc, pmc.cb);
-                return pmc.WorkingSetSize;
-	    }
+    return getMemoryInfo().PagefileUsage / 1024;
+    }
+
+size_t getVmPeakInKB() {
+    return getMemoryInfo().PeakPagefileUsage / 1024;
+    }
+
+size_t getVmRSSInKB() {
+    return getMemoryInfo().WorkingSetSize / 1024;
+    }
+
+size_t getVmHWMInKB() {
+    return getMemoryInfo().PeakWorkingSetSize / 1024;
+    }
+
+size_t getThreadsNum() {
+    // first determine the id of the current process
+    DWORD const  id = GetCurrentProcessId();
+
+    // then get a process list snapshot.
+    HANDLE const  snapshot = CreateToolhelp32Snapshot( TH32CS_SNAPALL, 0 );
+
+    // initialize the process entry structure.
+    PROCESSENTRY32 entry = { 0 };
+    entry.dwSize = sizeof( entry );
+
+    // get the first process info.
+    BOOL  ret = true;
+    ret = Process32First( snapshot, &entry );
+    while( ret && entry.th32ProcessID != id ) {
+        ret = Process32Next( snapshot, &entry );
+    }
+    CloseHandle( snapshot );
+    return ret 
+        ?   entry.cntThreads
+        :   -1;
+    }
+
 #else
 size_t getSystemDataByName(char *name){
     FILE* file = fopen("/proc/self/status", "r");
@@ -70,6 +119,35 @@ size_t getThreadsNum() {return getSystemDataByName((char*) "Threads:");}
 
 #endif
 
+int run_in_processes(const int &numprocesses, const std::function<void()> &function) {
+#ifdef _WIN32
+    // TODO: implement run in separate process by using WinAPI
+    function;
+    return 0;
+#else
+    std::vector<pid_t> child_pids(numprocesses);
+
+    for (int i = 0; i < numprocesses; i++) {
+        child_pids[i] = fork();
+        if (child_pids[i] == 0) {
+            function;
+            exit(EXIT_SUCCESS);
+        }
+    }
+
+    int status = 0;
+    for (int i = 0; i < numprocesses; i++) {
+        int _status = 0;
+        waitpid(child_pids[i], &_status, WSTOPPED);
+        if (_status) {
+            log_err("Process run # " << i << " failed with exitcode " << _status);
+            status = _status;
+        }
+    }
+    return status;
+#endif
+}
+
 void auto_expand_env_vars(std::string &input) {
     const static std::string pattern1 = "${", pattern2 = "}";
     size_t pattern1_pos, pattern2_pos, envvar_start_pos, envvar_finish_pos;
diff --git a/tests/stress_tests/common/utils.h b/tests/stress_tests/common/utils.h
index 407c9f77cdd..499da23c0ab 100644
--- a/tests/stress_tests/common/utils.h
+++ b/tests/stress_tests/common/utils.h
@@ -9,8 +9,6 @@
 #include <vector>
 #include <thread>
 #include <functional>
-#include <sys/unistd.h>
-#include <sys/wait.h>
 
 #ifdef _WIN32
 #define OS_SEP std::string("\\")
@@ -39,29 +37,7 @@ size_t getVmRSSInKB();
 size_t getVmHWMInKB();
 size_t getThreadsNum();
 
-template<typename Function, typename ... Args>
-int run_in_processes(const int &numprocesses, Function const &function, Args ... args) {
-    std::vector<pid_t> child_pids(numprocesses);
-
-    for (int i = 0; i < numprocesses; i++) {
-        child_pids[i] = fork();
-        if (child_pids[i] == 0) {
-            function(args...);
-            exit(EXIT_SUCCESS);
-        }
-    }
-
-    int status = 0;
-    for (int i = 0; i < numprocesses; i++) {
-        int _status = 0;
-        waitpid(child_pids[i], &_status, WSTOPPED);
-        if (_status) {
-            log_err("Process run # " << i << " failed with exitcode " << _status);
-            status = _status;
-        }
-    }
-    return status;
-}
+int run_in_processes(const int &numprocesses, const std::function<void()> &function);
 
 template<typename Function, typename ... Args>
 inline void run_in_threads(const int &numthreads, Function const &function, Args ... args) {
diff --git a/tests/stress_tests/memcheck_tests/tests_utils.h b/tests/stress_tests/memcheck_tests/tests_utils.h
index 2ca9210c350..2f40b21f406 100644
--- a/tests/stress_tests/memcheck_tests/tests_utils.h
+++ b/tests/stress_tests/memcheck_tests/tests_utils.h
@@ -6,6 +6,7 @@
 
 #include "../common/tests_utils.h"
 
+#include <array>
 #include <pugixml.hpp>
 
 // Measure values
diff --git a/tests/time_tests/test_runner/requirements.txt b/tests/time_tests/test_runner/requirements.txt
index 7477a75942f..37f153fdf6e 100644
--- a/tests/time_tests/test_runner/requirements.txt
+++ b/tests/time_tests/test_runner/requirements.txt
@@ -4,3 +4,5 @@ PyYAML==5.4.1
 jsonschema==3.2.0
 distro==1.5.0
 numpy==1.18.5
+pymongo
+pytest-html
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index 8991c636e37..fa89cefc5c7 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -64,6 +64,7 @@ function(add_gflags)
     set(BUILD_SHARED_LIBS OFF)
 
     add_subdirectory(gflags EXCLUDE_FROM_ALL)
+
     set_target_properties(gflags_nothreads_static PROPERTIES FOLDER thirdparty)
     openvino_developer_export_targets(COMPONENT openvino_common TARGETS gflags)
 endfunction()
@@ -76,12 +77,6 @@ add_gflags()
 
 function(add_gtest_libraries)
     set(gtest_force_shared_crt ON CACHE BOOL "disable static CRT for google test")
-    if (UNIX)
-        ie_add_compiler_flags(-Wno-undef)
-        if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            ie_add_compiler_flags(-Wno-deprecated-copy)
-        endif()
-    endif()
 
     set(BUILD_SHARED_LIBS OFF)
     set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
@@ -93,25 +88,32 @@ function(add_gtest_libraries)
     get_target_property(gmock_include_dirs gtest INTERFACE_INCLUDE_DIRECTORIES)
     set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${gmock_include_dirs};${gmock_SOURCE_DIR}/include")
 
+    set(gtest_targets gtest gtest_main gmock gmock_main)
+
+    foreach(target IN LISTS gtest_targets)
     # If we have specified /Z7 option, remove -Zi option which comes from gtest
-    if (WIN32)
-        set(gtest_targets "gtest;gtest_main;gmock;gmock_main")
-        foreach(target_name ${gtest_targets})
-            if(TARGET "${target_name}")
-                get_target_property(_target_cxx_flags ${target_name} COMPILE_OPTIONS)
-                if(_target_cxx_flags)
-                    if(CMAKE_CXX_FLAGS_DEBUG MATCHES ".+/Z7.+" OR CMAKE_CXX_FLAGS_RELWITHDEBINFO MATCHES ".+/Z7.+")
-                        string(REPLACE "-Zi" " " _target_cxx_flags ${_target_cxx_flags})
-                        message(STATUS "Removing -Zi flag from target " ${target_name})
-                        set_target_properties(${target_name} PROPERTIES COMPILE_OPTIONS "${_target_cxx_flags}")
-                    endif()
+        if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+            get_target_property(_target_cxx_flags ${target} COMPILE_OPTIONS)
+            if(_target_cxx_flags)
+                if(CMAKE_CXX_FLAGS_DEBUG MATCHES ".+/Z7.+" OR CMAKE_CXX_FLAGS_RELWITHDEBINFO MATCHES ".+/Z7.+")
+                    string(REPLACE "-Zi" " " _target_cxx_flags ${_target_cxx_flags})
+                    message(STATUS "Removing -Zi flag from target " ${target})
+                    set_target_properties(${target} PROPERTIES COMPILE_OPTIONS "${_target_cxx_flags}")
                 endif()
             endif()
-        endforeach()
-    endif()
+        elseif(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG OR
+            CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+            target_compile_options(${target} PRIVATE -Wno-undef)
+            if(CMAKE_COMPILER_IS_GNUCXX)
+                target_compile_options(${target} PRIVATE -Wno-deprecated-copy)
+            endif()
+        endif()
+        ov_disable_all_warnings(${target})
+    endforeach()
+
     set_target_properties(gtest gtest_main gmock gmock_main
                           PROPERTIES FOLDER thirdparty)
-    
+
     openvino_developer_export_targets(COMPONENT inference_engine_tests
                                       TARGETS gmock gmock_main gtest gtest_main)
 endfunction()
@@ -132,15 +134,10 @@ if(NGRAPH_PDPD_FRONTEND_ENABLE OR NGRAPH_ONNX_IMPORT_ENABLE)
         endif()
         find_package(Protobuf 3.9.0 REQUIRED)
         set(Protobuf_LITE_LIBRARIES protobuf::libprotobuf-lite)
-        if(NGRAPH_USE_PROTOBUF_LITE)
-            set(Protobuf_LIBRARIES ${Protobuf_LITE_LIBRARIES})
-        else()
-            set(Protobuf_LIBRARIES protobuf::libprotobuf)
-        endif()
         set(SYSTEM_PROTOC protobuf::protoc)
         set(PROTOC_EXECUTABLE ${SYSTEM_PROTOC})
 
-        foreach(target ${SYSTEM_PROTOC} ${Protobuf_LIBRARIES} ${Protobuf_LITE_LIBRARIES})
+        foreach(target ${SYSTEM_PROTOC} ${Protobuf_LITE_LIBRARIES})
             set_property(TARGET ${target} PROPERTY IMPORTED_GLOBAL TRUE)
         endforeach()
     else()
@@ -150,12 +147,11 @@ if(NGRAPH_PDPD_FRONTEND_ENABLE OR NGRAPH_ONNX_IMPORT_ENABLE)
     # forward variables used in the other places
     set(SYSTEM_PROTOC ${SYSTEM_PROTOC} PARENT_SCOPE)
     set(PROTOC_EXECUTABLE ${PROTOC_EXECUTABLE} PARENT_SCOPE)
-    set(Protobuf_LIBRARIES ${Protobuf_LIBRARIES} PARENT_SCOPE)
     set(Protobuf_LITE_LIBRARIES ${Protobuf_LITE_LIBRARIES} PARENT_SCOPE)
     set(Protobuf_INCLUDE_DIRS ${Protobuf_INCLUDE_DIRS} PARENT_SCOPE)
 
     # set public / interface compile options
-    foreach(target IN LISTS Protobuf_LITE_LIBRARIES Protobuf_LIBRARIES)
+    foreach(target IN LISTS Protobuf_LITE_LIBRARIES)
         set(link_type PUBLIC)
         if(NGRAPH_USE_SYSTEM_PROTOBUF)
             set(link_type INTERFACE)
diff --git a/thirdparty/cnpy/CMakeLists.txt b/thirdparty/cnpy/CMakeLists.txt
index 5571cd5f785..28c4caee429 100644
--- a/thirdparty/cnpy/CMakeLists.txt
+++ b/thirdparty/cnpy/CMakeLists.txt
@@ -1,21 +1,18 @@
-CMAKE_MINIMUM_REQUIRED(VERSION 3.0 FATAL_ERROR)
-if(COMMAND cmake_policy)
-	cmake_policy(SET CMP0003 NEW)
-endif(COMMAND cmake_policy)
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
 
-project(CNPY)
+project(cnpy)
 
 set(TARGET_NAME "cnpy")
-add_library(cnpy STATIC  "cnpy.cpp")
+add_library(cnpy STATIC cnpy.cpp)
 
-if(NOT ${CMAKE_CXX_COMPILER_ID} STREQUAL "MSVC")
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-all")
-    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-all")
-    target_compile_options(${TARGET_NAME} PUBLIC -Wno-unused-variable)
+if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    target_compile_options(${TARGET_NAME} PUBLIC -Wno-unused-variable
+                                          PRIVATE -Wno-all)
 endif()
 
 target_link_libraries(${TARGET_NAME} PUBLIC zlib)
-target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}"
-                                                 "${CMAKE_CURRENT_SOURCE_DIR}/..")
+target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
 
-set_target_properties(cnpy PROPERTIES FOLDER thirdparty)
\ No newline at end of file
+set_target_properties(cnpy PROPERTIES FOLDER thirdparty)
diff --git a/thirdparty/cnpy/cnpy.cpp b/thirdparty/cnpy/cnpy.cpp
index ed277deb5fe..ea89bea9c08 100644
--- a/thirdparty/cnpy/cnpy.cpp
+++ b/thirdparty/cnpy/cnpy.cpp
@@ -12,6 +12,10 @@
 #include<stdexcept>
 #include <regex>
 
+#define assert_throw(expression)         \
+    if (!(expression))                   \
+        throw std::runtime_error(#expression)
+
 char cnpy::BigEndianTest() {
     int x = 1;
     return (((char *)&x)[0]) ? '<' : '>';
@@ -93,10 +97,10 @@ void cnpy::parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector
     bool littleEndian = false;
     if (loc1 < header.size())
         littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
-    assert(littleEndian);
+    assert_throw(littleEndian);
 
     //char type = header[loc1+1];
-    //assert(type == map_type(T));
+    //assert_throw(type == map_type(T));
 
     std::string str_ws = header.substr(loc1+2);
     loc2 = str_ws.find("'");
@@ -116,7 +120,7 @@ void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& sh
     else {
         header = "";
     }
-    assert(header[header.size()-1] == '\n');
+    assert_throw(header[header.size()-1] == '\n');
 
     size_t loc1, loc2;
 
@@ -153,10 +157,10 @@ void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& sh
     bool littleEndian = false;
     if (loc1 < header.size())
         littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
-    assert(littleEndian);
+    assert_throw(littleEndian);
 
     //char type = header[loc1+1];
-    //assert(type == map_type(T));
+    //assert_throw(type == map_type(T));
 
     std::string str_ws = header.substr(loc1+2);
     loc2 = str_ws.find("'");
@@ -180,10 +184,10 @@ void cnpy::parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_siz
     global_header_offset = *(uint32_t*) &footer[16];
     comment_len = *(uint16_t*) &footer[20];
 
-    assert(disk_no == 0);
-    assert(disk_start == 0);
-    assert(nrecs_on_disk == nrecs);
-    assert(comment_len == 0);
+    assert_throw(disk_no == 0);
+    assert_throw(disk_start == 0);
+    assert_throw(nrecs_on_disk == nrecs);
+    assert_throw(comment_len == 0);
 }
 
 cnpy::NpyArray load_the_npy_file(FILE* fp) {
@@ -356,11 +360,15 @@ cnpy::NpyArray cnpy::npy_load(std::string fname) {
 
     if(!fp) throw std::runtime_error("npy_load: Unable to open file "+fname);
 
-    NpyArray arr = load_the_npy_file(fp);
+    try {
+        NpyArray arr = load_the_npy_file(fp);
+        fclose(fp);
+        return arr;
+    } catch (...) {
+        fclose(fp);
+        throw;
+    }
 
-    fclose(fp);
-    return arr;
 }
 
 
-
diff --git a/thirdparty/ocv/opencv_hal_avx512.hpp b/thirdparty/ocv/opencv_hal_avx512.hpp
index 7e49ea3f40d..bf9f7a3664b 100644
--- a/thirdparty/ocv/opencv_hal_avx512.hpp
+++ b/thirdparty/ocv/opencv_hal_avx512.hpp
@@ -3257,7 +3257,7 @@ static inline void v_set(v_uint8x64& val_0, v_uint8x64& val_1,
                                   *reinterpret_cast<int64_t*>(&tmp[4 * (*(mapsx + x + 3 * shift + 7))]));
 }
 
-#if defined(__GNUC__)
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
 
 int _mm512_cvtsi512_si32(__m512i a)
 {
diff --git a/thirdparty/onnx/CMakeLists.txt b/thirdparty/onnx/CMakeLists.txt
index 7826e944dd8..8c9d8b6e8c6 100644
--- a/thirdparty/onnx/CMakeLists.txt
+++ b/thirdparty/onnx/CMakeLists.txt
@@ -2,12 +2,11 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE OFF)
-
 #------------------------------------------------------------------------------
 # Configure and install libonnx ...
 #------------------------------------------------------------------------------
 
+set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE OFF)
 set(NGRAPH_ONNX_NAMESPACE ngraph_onnx)
 set(BUILD_SHARED_LIBS OFF)
 
@@ -19,14 +18,7 @@ macro(onnx_set_target_properties)
     target_include_directories(onnx SYSTEM PRIVATE "${Protobuf_INCLUDE_DIRS}")
     target_include_directories(onnx_proto SYSTEM PRIVATE "${Protobuf_INCLUDE_DIRS}")
 
-    if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-        target_compile_options(onnx PRIVATE /WX-)
-    elseif(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
-        target_compile_options(onnx PRIVATE -Wno-all)
-        target_compile_options(onnx_proto PRIVATE -Wno-all -Wno-unused-variable)
-    endif()
-
-    target_compile_definitions(onnx PUBLIC ONNX_BUILD_SHARED_LIBS)
+    ov_disable_all_warnings(onnx onnx_proto)
 endmacro()
 
 set(ONNX_USE_PROTOBUF_SHARED_LIBS ${BUILD_SHARED_LIBS} CACHE BOOL "Use dynamic protobuf by ONNX library" FORCE)
diff --git a/thirdparty/onnx/onnx b/thirdparty/onnx/onnx
index 0807930c7f4..685898bc6f3 160000
--- a/thirdparty/onnx/onnx
+++ b/thirdparty/onnx/onnx
@@ -1 +1 @@
-Subproject commit 0807930c7f46f3bac1f520c4a2e78710aa5d0af7
+Subproject commit 685898bc6f3fd6eb16ba129ea990f96330537e38
diff --git a/thirdparty/protobuf/CMakeLists.txt b/thirdparty/protobuf/CMakeLists.txt
index 5c5cb3c2fbe..821c6764666 100644
--- a/thirdparty/protobuf/CMakeLists.txt
+++ b/thirdparty/protobuf/CMakeLists.txt
@@ -6,7 +6,6 @@
 # Configure Google Protobuf ...
 #------------------------------------------------------------------------------
 
-set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE OFF)
 set(BUILD_SHARED_LIBS OFF)
 
 if(SUGGEST_OVERRIDE_SUPPORTED)
@@ -52,47 +51,28 @@ get_directory_property(protobuf_VERSION DIRECTORY protobuf/cmake DEFINITION prot
 
 set(Protobuf_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/protobuf/src)
 
-if(NGRAPH_USE_PROTOBUF_LITE)
-    set(Protobuf_LIBRARIES libprotobuf-lite)
-else()
-    set(Protobuf_LIBRARIES libprotobuf)
-endif()
+# to hide libprotobuf warnings
+target_include_directories(libprotobuf-lite SYSTEM PRIVATE "${Protobuf_INCLUDE_DIRS}")
 
-if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
-    set(_proto_libs ${Protobuf_LIBRARIES})
-    if(TARGET libprotoc)
-        list(APPEND _proto_libs libprotoc)
-        target_compile_options(libprotoc PRIVATE -Wno-all -Wno-unused-variable)
-        # libprotobuf is always built for protoc
-        target_compile_options(libprotobuf PUBLIC -Wno-undef)
+if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG OR
+   CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+    if(protobuf_BUILD_PROTOC_BINARIES)
+        list(APPEND _protoc_libs protoc libprotoc libprotobuf)
+        set_target_properties(${_protoc_libs} PROPERTIES
+            CXX_VISIBILITY_PRESET default
+            C_VISIBILITY_PRESET default
+            VISIBILITY_INLINES_HIDDEN OFF
+            INTERPROCEDURAL_OPTIMIZATION_RELEASE OFF)
     endif()
-    set_target_properties(${_proto_libs} PROPERTIES
-        CXX_VISIBILITY_PRESET default
-        C_VISIBILITY_PRESET default
-        VISIBILITY_INLINES_HIDDEN OFF)
-    foreach(target IN LISTS Protobuf_LIBRARIES)
-        target_compile_options(${target}
-            PRIVATE -Wno-all -Wno-unused-variable)
-    endforeach()
-endif()
-
-if(NGRAPH_USE_PROTOBUF_LITE)
-    # if only libprotobuf-lite is used, both libprotobuf and libprotobuf-lite are built
-    # libprotoc target needs symbols from libprotobuf, even in libprotobuf-lite configuration
-    set_target_properties(libprotobuf PROPERTIES
-        CXX_VISIBILITY_PRESET default
-        C_VISIBILITY_PRESET default
-        VISIBILITY_INLINES_HIDDEN OFF)
+    ov_disable_all_warnings(${_protoc_libs} libprotobuf-lite)
+    set_target_properties(libprotobuf-lite PROPERTIES
+        INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
 endif()
 
 if(protobuf_VERSION VERSION_LESS "3.9" AND NGRAPH_USE_PROTOBUF_LITE)
     message(FATAL_ERROR "Minimum supported version of protobuf-lite library is 3.9.0 (provided ${protobuf_VERSION})")
 endif()
 
-if(ENABLE_LTO AND protobuf_VERSION VERSION_GREATER_EQUAL "3.8")
-    message(WARNING "Protobuf in version 3.8.0+ can throw runtime exceptions if LTO is enabled.")
-endif()
-
 if(CMAKE_CROSSCOMPILING AND NOT PROTOC_VERSION VERSION_EQUAL protobuf_VERSION)
     message(WARNING "system protobuf version does not match with the compiled one, please update system protobuf or submodule")
 endif()
@@ -107,5 +87,4 @@ endif()
 
 set(protobuf_VERSION ${protobuf_VERSION} PARENT_SCOPE)
 set(Protobuf_LITE_LIBRARIES libprotobuf-lite PARENT_SCOPE)
-set(Protobuf_LIBRARIES ${Protobuf_LIBRARIES} PARENT_SCOPE)
 set(Protobuf_INCLUDE_DIRS ${Protobuf_INCLUDE_DIRS} PARENT_SCOPE)
diff --git a/thirdparty/zlib/CMakeLists.txt b/thirdparty/zlib/CMakeLists.txt
index 33b9222ec53..d887e275611 100644
--- a/thirdparty/zlib/CMakeLists.txt
+++ b/thirdparty/zlib/CMakeLists.txt
@@ -2,16 +2,16 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
-PROJECT(zlib)
-
-if(NOT WIN32)
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-all")
-    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-all")
-endif()
-
+project(zlib)
 
 if(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
-    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS}  /MP /wd4996  /W3")
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS}  /wd4996  /W3")
+else()
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-all")
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+        #266: function "XXX" declared implicitly
+        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -diag-disable=266")
+    endif()
 endif()
 
 set(TARGET_NAME "zlib")
@@ -49,8 +49,6 @@ set(lib_hdrs
 set(lib_ext_hdrs "zlib/zlib.h" "zlib/zconf.h")
 add_library(${TARGET_NAME} STATIC ${lib_srcs} ${lib_hdrs} ${lib_ext_hdrs})
 
+target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/zlib")
 
-target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/zlib"
-                                                 "${CMAKE_CURRENT_SOURCE_DIR}/zlib/..")
-
-set_target_properties(zlib PROPERTIES FOLDER thirdparty)
\ No newline at end of file
+set_target_properties(zlib PROPERTIES FOLDER thirdparty)
diff --git a/tools/benchmark_tool/openvino/__init__.py b/tools/benchmark_tool/openvino/__init__.py
new file mode 100644
index 00000000000..9dcdeb46b4d
--- /dev/null
+++ b/tools/benchmark_tool/openvino/__init__.py
@@ -0,0 +1,3 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)
diff --git a/tools/benchmark_tool/openvino/tools/__init__.py b/tools/benchmark_tool/openvino/tools/__init__.py
new file mode 100644
index 00000000000..60324954ef5
--- /dev/null
+++ b/tools/benchmark_tool/openvino/tools/__init__.py
@@ -0,0 +1,3 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py
index 03fae003c07..b218749f7d3 100644
--- a/tools/benchmark_tool/openvino/tools/benchmark/main.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py
@@ -201,7 +201,7 @@ def run(args):
                                           [
                                               ('load network time (ms)', duration_ms)
                                           ])
-            app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, exe_network.input_info)
+            app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.input_info)
             if batch_size == 0:
                 batch_size = 1
         elif not is_network_compiled:
@@ -222,7 +222,7 @@ def run(args):
             # --------------------- 5. Resizing network to match image sizes and given batch ---------------------------
             next_step()
 
-            app_inputs_info, reshape = get_inputs_info(args.shape, args.layout, args.batch_size, ie_network.input_info)
+            app_inputs_info, reshape = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean,  ie_network.input_info)
             if reshape:
                 start_time = datetime.utcnow()
                 shapes = { k : v.shape for k,v in app_inputs_info.items() }
@@ -280,7 +280,7 @@ def run(args):
                                           [
                                               ('import network time (ms)', duration_ms)
                                           ])
-            app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, exe_network.input_info)
+            app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.input_info)
             if batch_size == 0:
                 batch_size = 1
 
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
index 0c51db1b491..2eb6f45f0ec 100644
--- a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py
@@ -130,6 +130,12 @@ def parse_args():
                       help="Optional. Enable model caching to specified directory")
     args.add_argument('-lfile', '--load_from_file', required=False, nargs='?', default=argparse.SUPPRESS,
                       help="Optional. Loads model from file directly without read_network.")
+    args.add_argument('-iscale', '--input_scale', type=str, required=False, default='',
+                      help="Optional. Scale values to be used for the input image per channel.\n Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n"
+                           "Example: -iscale data[255,255,255],info[255,255,255]\n")
+    args.add_argument('-imean', '--input_mean', type=str, required=False, default='',
+                      help="Optional. Mean values to be used for the input image per channel.\n Values to be provided in the [R, G, B] format. Can be defined for desired input of the model.\n"
+                           "Example: -imean data[255,255,255],info[255,255,255]\n")
     parsed_args = parser.parse_args()
 
     return parsed_args
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/inputs_filling.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/inputs_filling.py
index ed06ca43afa..0566b9b6ea8 100644
--- a/tools/benchmark_tool/openvino/tools/benchmark/utils/inputs_filling.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/inputs_filling.py
@@ -126,6 +126,9 @@ def fill_blob_with_image(image_paths, request_id, batch_size, input_id, input_si
     shape = info.shape
     images = np.ndarray(shape)
     image_index = request_id * batch_size * input_size + input_id
+
+    scale_mean = (not np.array_equal(info.scale, (1.0, 1.0, 1.0)) or not np.array_equal(info.mean, (0.0, 0.0, 0.0)))
+
     for b in range(batch_size):
         image_index %= len(image_paths)
         image_filename = image_paths[image_index]
@@ -135,8 +138,20 @@ def fill_blob_with_image(image_paths, request_id, batch_size, input_id, input_si
         if image.shape[:-1] != new_im_size:
             logger.warning(f"Image is resized from ({image.shape[:-1]}) to ({new_im_size})")
             image = cv2.resize(image, new_im_size)
+
+        if scale_mean:
+            blue, green, red = cv2.split(image)
+            blue = np.subtract(blue, info.mean[0])
+            blue = np.divide(blue, info.scale[0])
+            green = np.subtract(green, info.mean[1])
+            green = np.divide(green, info.scale[1])
+            red = np.subtract(red, info.mean[2])
+            red = np.divide(red, info.scale[2])
+            image = cv2.merge([blue, green, red])
+
         if info.layout in ['NCHW', 'CHW']:
             image = image.transpose((2, 0, 1))
+
         images[b] = image
 
         image_index += input_size
diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
index 422bc9e26e4..ed6b76a5317 100644
--- a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
+++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py
@@ -9,6 +9,7 @@ from .logging import logger
 
 import json
 import re
+import numpy as np
 
 def static_vars(**kwargs):
     def decorate(func):
@@ -291,11 +292,33 @@ def parse_input_parameters(parameter_string, input_info):
             raise Exception(f"Can't parse input parameter: {parameter_string}")
     return return_value
 
+def parse_scale_or_mean(parameter_string, input_info):
+    # Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all inputs)
+    return_value = {}
+    if parameter_string:
+        matches = re.findall(r'(.*?)\[(.*?)\],?', parameter_string)
+        if matches:
+            for match in matches:
+                input_name, value = match
+                f_value = np.array(value.split(",")).astype(np.float)
+                if input_name != '':
+                    return_value[input_name] = f_value
+                else:
+                    print("input_info: ", input_info)
+                    for name, description in input_info.items():
+                        if description.is_image:
+                            return_value[name] = f_value
+        else:
+            raise Exception(f"Can't parse input parameter: {parameter_string}")
+    return return_value
+
 class InputInfo:
     def __init__(self):
         self.precision = None
         self.layout = ""
         self.shape = []
+        self.scale = []
+        self.mean = []
 
     @property
     def is_image(self):
@@ -334,7 +357,7 @@ class InputInfo:
     def depth(self):
         return self.getDimentionByLayout("D")
 
-def get_inputs_info(shape_string, layout_string, batch_size, input_info):
+def get_inputs_info(shape_string, layout_string, batch_size, scale_string, mean_string, input_info):
     shape_map = parse_input_parameters(shape_string, input_info)
     layout_map = parse_input_parameters(layout_string, input_info)
     reshape = False
@@ -359,6 +382,21 @@ def get_inputs_info(shape_string, layout_string, batch_size, input_info):
                 info.shape[batch_index] = batch_size
                 reshape = True
         info_map[name] = info
+
+    # Update scale, mean
+    scale_map = parse_scale_or_mean(scale_string, info_map)
+    mean_map = parse_scale_or_mean(mean_string, info_map)
+
+    for name, descriptor in info_map.items():
+        if descriptor.is_image:
+            descriptor.scale = np.ones(3)
+            descriptor.mean = np.zeros(3)
+
+            if name in scale_map:
+                descriptor.scale = scale_map[name]
+            if name in mean_map:
+                descriptor.mean = mean_map[name]
+
     return info_map, reshape
 
 def get_batch_size(inputs_info):