Merge remote-tracking branch 'github/master' into auto-batch-master

# Conflicts: # inference-engine/src/CMakeLists.txt
2021-11-29 16:08:31 +03:00 · 2021-11-29 16:08:31 +03:00 · d17b4acbf7
commit d17b4acbf7
parent a32d006465 980ad59ac4
4669 changed files with 21581 additions and 10998 deletions
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@ -117,9 +117,9 @@ jobs:
      # For running Python API tests
      python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
      # For running PaddlePaddle frontend unit tests
-      python3 -m pip install -r $(REPO_DIR)/ngraph/test/frontend/paddlepaddle/requirements_dev.txt
+      python3 -m pip install -r $(REPO_DIR)/src/core/tests/frontend/paddlepaddle/requirements_dev.txt
      # For running ONNX frontend unit tests
-      python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test_onnx.txt
+      python3 -m pip install -r $(REPO_DIR)/src/core/tests/requirements_test_onnx.txt
      # For MO unit tests
      python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements.txt
      python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements_dev.txt
--- a/.ci/azure/linux_lohika.yml
+++ b/.ci/azure/linux_lohika.yml
@ -86,9 +86,9 @@ jobs:
      # For running Python API tests
      python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt
      # For running PaddlePaddle frontend unit tests
-      python3 -m pip install -r $(REPO_DIR)/ngraph/test/frontend/paddlepaddle/requirements_dev.txt
+      python3 -m pip install -r $(REPO_DIR)/src/core/tests/frontend/paddlepaddle/requirements_dev.txt
      # For running ONNX frontend unit tests
-      python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test_onnx.txt
+      python3 -m pip install -r $(REPO_DIR)/src/core/tests/requirements_test_onnx.txt
      # For MO unit tests
      python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements.txt
      python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements_dev.txt
--- a/.ci/azure/linux_ngraph_onnx.yml
+++ b/.ci/azure/linux_ngraph_onnx.yml
@ -79,7 +79,7 @@ jobs:
    workingDirectory: $(WORK_DIR)
    displayName: 'Install dependencies'

-  - script: runtime/bindings/python/tests/test_onnx/model_zoo_preprocess.sh -d $(MODELS_DIR)/models_data -o -s "$(ONNX_MODEL_ZOO_SHA)"
+  - script: src/bindings/python/tests/test_onnx/model_zoo_preprocess.sh -d $(MODELS_DIR)/models_data -o -s "$(ONNX_MODEL_ZOO_SHA)"
    displayName: 'Update models'
    condition: ne(variables['BUILD_TYPE'], 'Debug')

--- a/.ci/azure/mac.yml
+++ b/.ci/azure/mac.yml
@ -84,7 +84,7 @@ jobs:
  - script: |
      brew install cython
      brew install automake
-      python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test_onnx.txt
+      python3 -m pip install -r $(REPO_DIR)/src/core/tests/requirements_test_onnx.txt
      # Speed up build
      brew install ninja
      # Speed up tests
--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@ -110,9 +110,9 @@ jobs:
      rem For running Python API tests
      python -m pip install -r $(REPO_DIR)\inference-engine\ie_bridges\python\src\requirements-dev.txt
      rem For running PaddlePaddle frontend unit tests
-      python -m pip install -r $(REPO_DIR)\ngraph\test\frontend\paddlepaddle\requirements_dev.txt
+      python -m pip install -r $(REPO_DIR)\src\core\tests\frontend\paddlepaddle\requirements_dev.txt
      rem For running ONNX frontend unit tests
-      python -m pip install -r $(REPO_DIR)\ngraph\test\requirements_test_onnx.txt
+      python -m pip install -r $(REPO_DIR)\src\core\tests\requirements_test_onnx.txt
      rem For MO unit tests
      python -m pip install -r $(REPO_DIR)\model-optimizer\requirements.txt
      python -m pip install -r $(REPO_DIR)\model-optimizer\requirements_dev.txt
@ -132,7 +132,7 @@ jobs:

  - script: |
      set PATH=$(WORK_DIR)\ninja-win;%PATH%
-      call "$(MSVS_VARS_PATH)" && $(CMAKE_CMD) -GNinja -DENABLE_ONEDNN_FOR_GPU=OFF -DENABLE_GNA=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_CLDNN=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_GAPI_PREPROCESSING=$(CMAKE_BUILD_SHARED_LIBS) -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DPYTHON_INCLUDE_DIR="C:\hostedtoolcache\windows\Python\3.7.6\x64\include" -DPYTHON_LIBRARY="C:\hostedtoolcache\windows\Python\3.7.6\x64\libs\python37.lib" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
+      call "$(MSVS_VARS_PATH)" && $(CMAKE_CMD) -G "Ninja Multi-Config" -DENABLE_ONEDNN_FOR_GPU=OFF -DENABLE_GNA=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_CLDNN=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_GAPI_PREPROCESSING=$(CMAKE_BUILD_SHARED_LIBS) -DBUILD_SHARED_LIBS=$(CMAKE_BUILD_SHARED_LIBS) -DENABLE_REQUIREMENTS_INSTALL=OFF -DENABLE_FASTER_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DENABLE_PYTHON=ON -DPYTHON_EXECUTABLE="C:\hostedtoolcache\windows\Python\3.7.6\x64\python.exe" -DPYTHON_INCLUDE_DIR="C:\hostedtoolcache\windows\Python\3.7.6\x64\include" -DPYTHON_LIBRARY="C:\hostedtoolcache\windows\Python\3.7.6\x64\libs\python37.lib" -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
    workingDirectory: $(BUILD_DIR)
    displayName: 'CMake'

@ -210,12 +210,10 @@ jobs:
  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\paddlepaddle_tests --gtest_print_time=1 --gtest_output=xml:TEST-PaddlePaddle.xml
    displayName: 'PaddlePaddle Frontend UT'
    continueOnError: false
-    condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')

  - script: call $(SETUPVARS) && $(INSTALL_TEST_DIR)\tensorflow_tests --gtest_print_time=1 --gtest_output=xml:TEST-Tensorflow.xml
    displayName: 'Tensorflow Frontend UT'
    continueOnError: false
-    condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')

  - script: |
      set PATH=$(IB_DIR);%PATH%
--- a/.ci/openvino-onnx/Dockerfile
+++ b/.ci/openvino-onnx/Dockerfile
@ -72,7 +72,7 @@ RUN cmake .. \
 RUN make -j $(nproc) install

 # Run tests via tox
-WORKDIR /openvino/runtime/bindings/python
+WORKDIR /openvino/src/bindings/python
 ENV OpenVINO_DIR=/openvino/dist/runtime/cmake
 ENV LD_LIBRARY_PATH=/openvino/dist/runtime/lib:/openvino/dist/runtime/3rdparty/tbb/lib
 ENV PYTHONPATH=/openvino/bin/intel64/${BUILD_TYPE}/lib/python_api/python3.8:${PYTHONPATH}
--- a/.ci/openvino-onnx/Jenkinsfile
+++ b/.ci/openvino-onnx/Jenkinsfile
@ -93,7 +93,7 @@ def prepare_repository(String workdir) {

 def updateModels() {
    sh """
-        ./runtime/bindings/python/tests/test_onnx/model_zoo_preprocess.sh -d ${HOME}/ONNX_CI/models_data -o -s ${ONNX_MODEL_ZOO_SHA}
+        ./src/bindings/python/tests/test_onnx/model_zoo_preprocess.sh -d ${HOME}/ONNX_CI/models_data -o -s ${ONNX_MODEL_ZOO_SHA}
    """
 }

--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -4,7 +4,7 @@ version: 2
 updates:
  # Enable version updates for nGraph Python API
  - package-ecosystem: pip
-    directory: "/runtime/bindings/python"
+    directory: "/src/bindings/python"
    schedule:
      interval: weekly
      day: monday
--- a/.gitmodules
+++ b/.gitmodules
@ -44,8 +44,8 @@
 [submodule "thirdparty/protobuf"]
 	path = thirdparty/protobuf/protobuf
 	url = https://github.com/protocolbuffers/protobuf.git
-[submodule "runtime/bindings/python/thirdparty/pybind11"]
-	path = runtime/bindings/python/thirdparty/pybind11
+[submodule "src/bindings/python/thirdparty/pybind11"]
+	path = src/bindings/python/thirdparty/pybind11
 	url = https://github.com/pybind/pybind11.git
 [submodule "thirdparty/ittapi/ittapi"]
 	path = thirdparty/ittapi/ittapi
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -32,10 +32,12 @@ endif()

 # resolving dependencies for the project
 message (STATUS "PROJECT ............................... " ${PROJECT_NAME})
+message (STATUS "CMAKE_VERSION ......................... " ${CMAKE_VERSION})
 message (STATUS "CMAKE_BINARY_DIR ...................... " ${CMAKE_BINARY_DIR})
 message (STATUS "OpenVINO_SOURCE_DIR ................... " ${OpenVINO_SOURCE_DIR})
 message (STATUS "CMAKE_GENERATOR ....................... " ${CMAKE_GENERATOR})
 message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID})
+message (STATUS "CMAKE_CXX_COMPILER_ID ................. " ${CMAKE_CXX_COMPILER_ID})
 message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE})
 message (STATUS "CMAKE_TOOLCHAIN_FILE .................. " ${CMAKE_TOOLCHAIN_FILE})

@ -91,11 +93,11 @@ ie_cpack_add_component(ngraph_dev REQUIRED DEPENDS ngraph)
 include(cmake/test_model_zoo.cmake)

 add_subdirectory(thirdparty)
-add_subdirectory(openvino)
-add_subdirectory(ngraph)
-add_subdirectory(inference-engine)
-add_subdirectory(runtime)
+add_subdirectory(inference-engine/thirdparty)
+add_subdirectory(inference-engine/src)
+add_subdirectory(src)
 add_subdirectory(samples)
+add_subdirectory(inference-engine)
 include(cmake/extra_modules.cmake)
 add_subdirectory(model-optimizer)
 add_subdirectory(docs)
--- a/23
+++ b/23
@ -19,24 +19,26 @@ azure-pipelines.yml  @openvinotoolkit/openvino-admins
 # IE Core:
 /inference-engine/  @openvinotoolkit/openvino-ie-maintainers
 /inference-engine/ie_bridges/python @openvinotoolkit/openvino-ie-python-api-maintainers
-/inference-engine/src/transformations/  @GlebKazantaev @ilyachur
-/inference-engine/src/legacy_api/  @openvinotoolkit/openvino-ngraph-maintainers
-/inference-engine/src/readers/  @openvinotoolkit/openvino-ngraph-maintainers
+/src/common/transformations/  @GlebKazantaev @ilyachur
+/src/common/legacy/  @openvinotoolkit/openvino-ngraph-maintainers
+/src/common/  @openvinotoolkit/openvino-ie-maintainers
+/inference-engine/tests_deprecated/readers/  @openvinotoolkit/openvino-ngraph-maintainers

 # IE CPU:
 /inference-engine/src/mkldnn_plugin/  @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers
-/inference-engine/src/low_precision_transformations/  @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers
+/src/common/low_precision_transformations/  @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers
 /inference-engine/thirdparty/mkl-dnn/  @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers

 # IE GPU:
 /inference-engine/src/cldnn_engine/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
-/inference-engine/src/inference_engine/include/gpu/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
-/inference-engine/src/inference_engine/include/cldnn/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
+/src/inference/include/ie/gpu/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
+/src/inference/include/ie/cldnn/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
+/src/inference/include/openvino/runtime/gpu/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers
 /inference-engine/thirdparty/clDNN/  @openvinotoolkit/openvino-ie-gpu-maintainers @openvinotoolkit/openvino-ie-gpu-developers

 # IE VPU:
 /inference-engine/src/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
-/inference-engine/src/inference_engine/include/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
+/src/inference/include/ie/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
 /inference-engine/thirdparty/movidius/  @openvinotoolkit/openvino-ie-vpu-maintainers
 /inference-engine/tests_deprecated/unit/engines/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
 /inference-engine/tests_deprecated/functional/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
@ -48,11 +50,11 @@ azure-pipelines.yml  @openvinotoolkit/openvino-admins

 # IE GNA:
 /inference-engine/src/gna_plugin/  @openvinotoolkit/openvino-ie-gna-maintainers
-/inference-engine/src/inference_engine/include/gna/  @openvinotoolkit/openvino-ie-gna-maintainers
+/src/inference/include/ie/gna/  @openvinotoolkit/openvino-ie-gna-maintainers

 # IE MULTI:
 /inference-engine/src/multi_device/  @openvinotoolkit/openvino-ie-multi-maintainers
-/inference-engine/src/inference_engine/include/multi-device/  @openvinotoolkit/openvino-ie-multi-maintainers
+/src/inference/include/ie/multi-device/  @openvinotoolkit/openvino-ie-multi-maintainers

 # IE Tests:
 /inference-engine/tests/  @openvinotoolkit/openvino-ie-tests-maintainers
@ -64,7 +66,8 @@ azure-pipelines.yml  @openvinotoolkit/openvino-admins
 /model-optimizer/  @openvinotoolkit/openvino-mo-maintainers

 # nGraph:
-/ngraph/  @openvinotoolkit/openvino-ngraph-maintainers
+/src/core/  @openvinotoolkit/openvino-ngraph-maintainers
+/src/frontends/  @openvinotoolkit/openvino-ngraph-maintainers

 # POT Tools
 /tools/pot/  @openvinotoolkit/openvino-pot-maintainers
--- a/cmake/coverage.cmake
+++ b/cmake/coverage.cmake
@ -12,88 +12,82 @@ ie_coverage_capture(INFO_FILE "openvino"

 # Generate reports

-ie_coverage_extract(INPUT "openvino" OUTPUT "inference_engine"
-                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/inference_engine/*"
-                             "${OV_COVERAGE_BASE_DIRECTORY}/plugin_api/*")
+ie_coverage_extract(INPUT "openvino" OUTPUT "inference"
+                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/inference/*")
+
 ie_coverage_genhtml(INFO_FILE "inference_engine"
                    PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")

-ie_coverage_extract(INPUT "openvino" OUTPUT "inference_engine_ir_v10_reader"
-                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/readers/ir_reader/*"
-                             "${OV_COVERAGE_BASE_DIRECTORY}/readers/reader_api/*")
-ie_coverage_genhtml(INFO_FILE "inference_engine_ir_v10_reader"
-                    PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
-
 ie_coverage_extract(INPUT "openvino" OUTPUT "inference_engine_legacy"
-                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/legacy_api/*")
+                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/common/legacy/*")
 ie_coverage_genhtml(INFO_FILE "inference_engine_legacy"
                    PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")

-ie_coverage_extract(INPUT "openvino" OUTPUT "hetero_plugin"
-                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/hetero_plugin/*")
-ie_coverage_genhtml(INFO_FILE "hetero_plugin"
+ie_coverage_extract(INPUT "openvino" OUTPUT "ov_hetero_plugin"
+                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/plugins/hetero/*")
+ie_coverage_genhtml(INFO_FILE "ov_hetero_plugin"
                    PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")

 ie_coverage_extract(INPUT "openvino" OUTPUT "multi_device"
-                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/multi_device/*")
+                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/inference-engine/src/multi_device/*")
 ie_coverage_genhtml(INFO_FILE "multi_device"
                    PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")

 ie_coverage_extract(INPUT "openvino" OUTPUT "preprocessing"
-                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/preprocessing/*")
+                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}src/common/preprocessing/*")
 ie_coverage_genhtml(INFO_FILE "preprocessing"
                    PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")

 ie_coverage_extract(INPUT "openvino" OUTPUT "inference_engine_transformations"
-                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/inference_engine_transformations/*")
+                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/common/transformations/*")
 ie_coverage_genhtml(INFO_FILE "inference_engine_transformations"
                    PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")

 ie_coverage_extract(INPUT "openvino" OUTPUT "inference_engine_snippets"
-                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/snippets/*")
+                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/common/snippets/*")
 ie_coverage_genhtml(INFO_FILE "inference_engine_snippets"
                    PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")

 ie_coverage_extract(INPUT "openvino" OUTPUT "low_precision_transformations"
-                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/low_precision_transformations/*")
+                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/common/low_precision_transformations/*")
 ie_coverage_genhtml(INFO_FILE "low_precision_transformations"
                    PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")

 ie_coverage_extract(INPUT "openvino" OUTPUT "template_plugin"
-                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/template_plugin/*")
+                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/docs/template_plugin/*")
 ie_coverage_genhtml(INFO_FILE "template_plugin"
                    PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")

 if(ENABLE_MKL_DNN)
    ie_coverage_extract(INPUT "openvino" OUTPUT "mkldnn_plugin"
-                        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/mkldnn_plugin/*")
+                        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/inference-engine/src/mkldnn_plugin/*")
    ie_coverage_genhtml(INFO_FILE "mkldnn_plugin"
                        PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
 endif()

 if(ENABLE_CLDNN)
    ie_coverage_extract(INPUT "openvino" OUTPUT "cldnn_engine"
-                        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/cldnn_engine/*")
+                        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/inference-engine/src/cldnn_engine/*")
    ie_coverage_genhtml(INFO_FILE "cldnn_engine"
                        PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
 endif()

 if(ENABLE_GNA)
    ie_coverage_extract(INPUT "openvino" OUTPUT "gna_plugin"
-                        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/gna_plugin/*")
+                        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/inference-engine/src/gna_plugin/*")
    ie_coverage_genhtml(INFO_FILE "gna_plugin"
                        PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
 endif()

-ie_coverage_extract(INPUT "openvino" OUTPUT "ngraph"
-                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/core/*")
-ie_coverage_genhtml(INFO_FILE "ngraph"
+ie_coverage_extract(INPUT "openvino" OUTPUT "core"
+                    PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/core/*")
+ie_coverage_genhtml(INFO_FILE "core"
                    PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")

 if(NGRAPH_ONNX_FRONTEND_ENABLE)
    ie_coverage_extract(INPUT "openvino" OUTPUT "onnx"
-        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/*"
-        "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/*")
+        PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/src/frontends/onnx/*"
+        "${OV_COVERAGE_BASE_DIRECTORY}/src/frontends/onnx/*")
    ie_coverage_genhtml(INFO_FILE "onnx"
        PREFIX "${OV_COVERAGE_BASE_DIRECTORY}")
 endif()
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@ -265,7 +265,7 @@ else()
    reset_deps_cache(OpenCV_DIR)
 endif()

-include(${IE_MAIN_SOURCE_DIR}/cmake/ie_parallel.cmake)
+include(${OpenVINO_SOURCE_DIR}/cmake/ie_parallel.cmake)

 if(ENABLE_GNA)
    reset_deps_cache(
--- a/cmake/developer_package/IEDevScriptsConfig.cmake
+++ b/cmake/developer_package/IEDevScriptsConfig.cmake
@ -106,17 +106,11 @@ else()
    set(BIN_FOLDER "bin/${ARCH_FOLDER}")
 endif()

-if(NOT DEFINED CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "")
-    message(STATUS "CMAKE_BUILD_TYPE not defined, 'Release' will be used")
-    set(CMAKE_BUILD_TYPE "Release")
-else()
-    set(RELEASE_TYPES "Debug" "Release" "RelWithDebInfo" "MinSizeRel")
-    list(FIND RELEASE_TYPES ${CMAKE_BUILD_TYPE} INDEX_FOUND)
-    if (INDEX_FOUND EQUAL -1)
-        message(FATAL_ERROR "CMAKE_BUILD_TYPE must be one of Debug, Release, RelWithDebInfo, or MinSizeRel")
-    endif()
+set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type")
+set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release;Debug;RelWithDebInfo;MinSizeRel")
+if(CMAKE_GENERATOR MATCHES "^Ninja Multi-Config$")
+    set(CMAKE_DEFAULT_BUILD_TYPE "Release" CACHE STRING "CMake default build type")
 endif()
-message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")

 if(USE_BUILD_TYPE_SUBFOLDER)
    set(BIN_FOLDER "${BIN_FOLDER}/${CMAKE_BUILD_TYPE}")
@ -152,8 +146,8 @@ endif()
 set(CMAKE_DEBUG_POSTFIX ${IE_DEBUG_POSTFIX})
 set(CMAKE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX})

-if (MSVC OR CMAKE_GENERATOR STREQUAL "Xcode")
-    # Support CMake multiconfiguration for Visual Studio or Xcode build
+# Support CMake multi-configuration for Visual Studio / Ninja or Xcode build
+if (OV_GENERATOR_MULTI_CONFIG)
    set(IE_BUILD_POSTFIX $<$<CONFIG:Debug>:${IE_DEBUG_POSTFIX}>$<$<CONFIG:Release>:${IE_RELEASE_POSTFIX}>)
 else ()
    if (CMAKE_BUILD_TYPE STREQUAL "Debug")
@ -162,7 +156,6 @@ else ()
        set(IE_BUILD_POSTFIX ${IE_RELEASE_POSTFIX})
    endif()
 endif()
-
 add_definitions(-DIE_BUILD_POSTFIX=\"${IE_BUILD_POSTFIX}\")

 if(NOT UNIX)
@ -235,6 +228,7 @@ include(api_validator/api_validator)

 include(vs_version/vs_version)
 include(plugins/plugins)
+include(frontends/frontends)
 include(add_ie_target)
 include(CMakePackageConfigHelpers)

--- a/cmake/developer_package/api_validator/api_validator.cmake
+++ b/cmake/developer_package/api_validator/api_validator.cmake
@ -107,10 +107,14 @@ function(_ie_add_api_validator_post_build_step)

    foreach(target IN LISTS API_VALIDATOR_TARGETS)
        api_validator_get_target_name()
-        set(output_file "${CMAKE_BINARY_DIR}/api_validator/${target_name}.txt")
+        if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.21 AND OV_GENERATOR_MULTI_CONFIG)
+            set(output_file "${CMAKE_BINARY_DIR}/api_validator/$<CONFIG>/${target_name}.txt")
+        else()
+            set(output_file "${CMAKE_BINARY_DIR}/api_validator/${target_name}.txt")
+        endif()

        add_custom_command(TARGET ${API_VALIDATOR_TARGET} POST_BUILD
-            COMMAND ${CMAKE_COMMAND}
+            COMMAND ${CMAKE_COMMAND} --config $<CONFIG>
                -D UWP_API_VALIDATOR=${UWP_API_VALIDATOR}
                -D UWP_API_VALIDATOR_TARGET=$<TARGET_FILE:${target}>
                -D UWP_API_VALIDATOR_APIS=${UWP_API_VALIDATOR_APIS}
--- a/cmake/developer_package/compile_flags/os_flags.cmake
+++ b/cmake/developer_package/compile_flags/os_flags.cmake
@ -28,6 +28,8 @@ macro(disable_deprecated_warnings)
        message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}")
    endif()

+    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${ie_c_cxx_deprecated}")
+    set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${ie_c_cxx_deprecated}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ie_c_cxx_deprecated}")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ie_c_cxx_deprecated}")
 endmacro()
@ -42,7 +44,9 @@ macro(ie_deprecated_no_errors)
            set(ie_c_cxx_deprecated_no_errors "/Qdiag-warning:1478,1786")
        elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
            # show 4996 only for /w4
-            set(ie_c_cxx_deprecated_no_errors "/w44996")
+            set(ie_c_cxx_deprecated_no_errors "/wd4996")
+            # WA for VPUX plugin
+            set(ie_c_cxx_deprecated_no_errors "${ie_c_cxx_deprecated_no_errors} /wd4146 /wd4703")
        endif()
    else()
        if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
@ -56,6 +60,8 @@ macro(ie_deprecated_no_errors)
        endif()
    endif()

+    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${ie_c_cxx_deprecated_no_errors}")
+    set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${ie_c_cxx_deprecated_no_errors}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ie_c_cxx_deprecated_no_errors}")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ie_c_cxx_deprecated_no_errors}")
 endmacro()
--- a/cmake/developer_package/compile_flags/sdl.cmake
+++ b/cmake/developer_package/compile_flags/sdl.cmake
@ -2,49 +2,47 @@
 # SPDX-License-Identifier: Apache-2.0
 #

-if (CMAKE_BUILD_TYPE STREQUAL "Release")
-    if(UNIX)
-        set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -Wformat -Wformat-security")
-        if (NOT ENABLE_SANITIZER)
-            # ASan does not support fortification https://github.com/google/sanitizers/issues/247
-            set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -D_FORTIFY_SOURCE=2")
-        endif()
-        if(NOT APPLE)
-            set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -pie")
-        endif()
-
-        if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            set(IE_LINKER_FLAGS "${IE_LINKER_FLAGS} -z noexecstack -z relro -z now")
-            if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9)
-                set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-all")
-            else()
-                set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-strong")
-            endif()
-            if (NOT ENABLE_SANITIZER)
-                set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -s")
-            endif()
-        elseif(OV_COMPILER_IS_CLANG)
-            set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-all")
-        elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
-            if (NOT ENABLE_SANITIZER)
-                set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -Wl,--strip-all")
-            endif()
-            set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-strong")
-            set(IE_LINKER_FLAGS "${IE_LINKER_FLAGS} -z noexecstack -z relro -z now")
-        endif()
-    else()
-        if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-            set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} /sdl")
-        endif()
-        set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} /guard:cf")
-        if(ENABLE_INTEGRITYCHECK)
-            set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /INTEGRITYCHECK")
-        endif()
+if(UNIX)
+    set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -Wformat -Wformat-security")
+    if (NOT ENABLE_SANITIZER)
+        # ASan does not support fortification https://github.com/google/sanitizers/issues/247
+        set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -D_FORTIFY_SOURCE=2")
+    endif()
+    if(NOT APPLE)
+        set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -pie")
    endif()

-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${IE_C_CXX_FLAGS}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${IE_C_CXX_FLAGS}")
-    set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} ${IE_LINKER_FLAGS}")
-    set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} ${IE_LINKER_FLAGS}")
-    set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} ${IE_LINKER_FLAGS}")
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+        set(IE_LINKER_FLAGS "${IE_LINKER_FLAGS} -z noexecstack -z relro -z now")
+        if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9)
+            set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-all")
+        else()
+            set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-strong")
+        endif()
+        if (NOT ENABLE_SANITIZER)
+            set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -s")
+        endif()
+    elseif(OV_COMPILER_IS_CLANG)
+        set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-all")
+    elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+        if (NOT ENABLE_SANITIZER)
+            set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -Wl,--strip-all")
+        endif()
+        set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-strong")
+        set(IE_LINKER_FLAGS "${IE_LINKER_FLAGS} -z noexecstack -z relro -z now")
+    endif()
+else()
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+        set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} /sdl")
+    endif()
+    set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} /guard:cf")
+    if(ENABLE_INTEGRITYCHECK)
+        set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /INTEGRITYCHECK")
+    endif()
 endif()
+
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${IE_C_CXX_FLAGS}")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${IE_C_CXX_FLAGS}")
+set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} ${IE_LINKER_FLAGS}")
+set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} ${IE_LINKER_FLAGS}")
+set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} ${IE_LINKER_FLAGS}")
--- a/cmake/developer_package/features.cmake
+++ b/cmake/developer_package/features.cmake
@ -11,10 +11,10 @@ ie_dependent_option (ENABLE_LTO "Enable Link Time Optimization" OFF "LINUX;NOT C

 ie_option (OS_FOLDER "create OS dedicated folder in output" OFF)

-if(UNIX)
-    ie_option(USE_BUILD_TYPE_SUBFOLDER "Create dedicated sub-folder per build type for output binaries" ON)
-else()
+if(OV_GENERATOR_MULTI_CONFIG)
    ie_option(USE_BUILD_TYPE_SUBFOLDER "Create dedicated sub-folder per build type for output binaries" OFF)
+else()
+    ie_option(USE_BUILD_TYPE_SUBFOLDER "Create dedicated sub-folder per build type for output binaries" ON)
 endif()

 # FIXME: ARM cross-compiler generates several "false positive" warnings regarding __builtin_memcpy buffer overflow
--- a/cmake/developer_package/frontends/create_frontends_hpp.cmake
+++ b/cmake/developer_package/frontends/create_frontends_hpp.cmake
@ -0,0 +1,34 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+foreach(var OV_FRONTENDS_HPP_HEADER_IN OV_FRONTENDS_HPP_HEADER FRONTEND_NAMES)
+    if(NOT DEFINED ${var})
+        message(FATAL_ERROR "${var} is required, but not defined")
+    endif()
+endforeach()
+
+# configure variables
+
+set(OV_FRONTEND_DECLARATIONS "")
+set(OV_FRONTEND_MAP_DEFINITION "    FrontendsStaticRegistry registry = {")
+
+foreach(frontend IN LISTS FRONTEND_NAMES)
+    # common
+    set(_OV_FRONTEND_DATA_FUNC "GetFrontEndData${frontend}")
+    set(_OV_VERSION_FUNC "GetAPIVersion${frontend}")
+
+    # declarations
+    set(OV_FRONTEND_DECLARATIONS "${OV_FRONTEND_DECLARATIONS}
+ov::frontend::FrontEndVersion ${_OV_VERSION_FUNC}();
+void* ${_OV_FRONTEND_DATA_FUNC}();")
+
+    set(OV_FRONTEND_MAP_DEFINITION "${OV_FRONTEND_MAP_DEFINITION}
+        { Value { ${_OV_FRONTEND_DATA_FUNC}, ${_OV_VERSION_FUNC} } },")
+endforeach()
+
+set(OV_FRONTEND_MAP_DEFINITION "${OV_FRONTEND_MAP_DEFINITION}
+    };
+    return registry;")
+
+configure_file("${OV_FRONTENDS_HPP_HEADER_IN}" "${OV_FRONTENDS_HPP_HEADER}" @ONLY)
--- a/cmake/developer_package/frontends/frontends.cmake
+++ b/cmake/developer_package/frontends/frontends.cmake
@ -0,0 +1,241 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+set(FRONTEND_INSTALL_INCLUDE "runtime/include/ngraph/frontend")
+set(FRONTEND_NAME_SUFFIX "_ov_frontend")
+
+set(FRONTEND_NAMES "" CACHE INTERNAL "")
+
+if(NOT TARGET ov_frontends)
+    add_custom_target(ov_frontends)
+endif()
+
+#
+# ov_target_link_frontends(<TARGET_NAME>)
+#
+function(ov_target_link_frontends TARGET_NAME)
+    if(BUILD_SHARED_LIBS)
+        return()
+    endif()
+
+    foreach(name IN LISTS FRONTEND_NAMES)
+        set(frontend_target_name "${name}${FRONTEND_NAME_SUFFIX}")
+        target_link_libraries(${TARGET_NAME} PRIVATE ${frontend_target_name})
+    endforeach()
+endfunction()
+
+#
+# ov_generate_frontends_hpp()
+#
+function(ov_generate_frontends_hpp)
+    if(BUILD_SHARED_LIBS)
+        return()
+    endif()
+
+    # add frontends to libraries including ov_frontends.hpp
+    ov_target_link_frontends(frontend_common)
+
+    set(ov_frontends_hpp "${CMAKE_BINARY_DIR}/src/frontends/common/src/ov_frontends.hpp")
+    set(frontends_hpp_in "${IEDevScripts_DIR}/frontends/ov_frontends.hpp.in")
+
+    add_custom_command(OUTPUT "${ov_frontends_hpp}"
+                       COMMAND
+                        "${CMAKE_COMMAND}"
+                        -D "OV_FRONTENDS_HPP_HEADER_IN=${frontends_hpp_in}"
+                        -D "OV_FRONTENDS_HPP_HEADER=${ov_frontends_hpp}"
+                        -D "FRONTEND_NAMES=${FRONTEND_NAMES}"
+                        -P "${IEDevScripts_DIR}/frontends/create_frontends_hpp.cmake"
+                       DEPENDS
+                         "${frontends_hpp_in}"
+                         "${IEDevScripts_DIR}/frontends/create_frontends_hpp.cmake"
+                       COMMENT
+                         "Generate ov_frontends.hpp for static build"
+                       VERBATIM)
+
+    # for some reason dependency on source files does not work
+    # so, we have to use explicit target and make it dependency for frontend_common
+    add_custom_target(_ov_frontends_hpp DEPENDS ${ov_frontends_hpp})
+    add_dependencies(frontend_common _ov_frontends_hpp)
+
+    # add dependency for object files
+    get_target_property(sources frontend_common::static SOURCES)
+    foreach(source IN LISTS sources)
+        if("${source}" MATCHES "\\$\\<TARGET_OBJECTS\\:([A-Za-z0-9_]*)\\>")
+            # object library
+            set(obj_library ${CMAKE_MATCH_1})
+            get_target_property(obj_sources ${obj_library} SOURCES)
+            list(APPEND all_sources ${obj_sources})
+        else()
+            # usual source
+            list(APPEND all_sources ${source})
+        endif()
+    endforeach()
+
+    # add dependency on header file generation for all inference_engine source files
+    set_source_files_properties(${all_sources} PROPERTIES OBJECT_DEPENDS ${ov_frontends_hpp})
+endfunction()
+
+unset(protobuf_lite_installed CACHE)
+unset(protobuf_installed CACHE)
+
+#
+# ov_add_frontend(NAME <IR|ONNX|...>
+#                 FILEDESCRIPTION <description>
+#                 [LINKABLE_FRONTEND]
+#                 [SKIP_INSTALL]
+#                 [PROTOBUF_LITE]
+#                 [LINK_LIBRARIES <lib1 lib2 ...>])
+#
+macro(ov_add_frontend)
+    set(options LINKABLE_FRONTEND PROTOBUF_LITE SKIP_NCC_STYLE SKIP_INSTALL)
+    set(oneValueArgs NAME FILEDESCRIPTION)
+    set(multiValueArgs LINK_LIBRARIES PROTO_FILES)
+    cmake_parse_arguments(OV_FRONTEND "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    foreach(prop NAME FILEDESCRIPTION)
+        if(NOT DEFINED OV_FRONTEND_${prop})
+            message(FATAL_ERROR "Frontend ${prop} property is not defined")
+        endif()
+    endforeach()
+
+    set(TARGET_NAME "${OV_FRONTEND_NAME}${FRONTEND_NAME_SUFFIX}")
+
+    list(APPEND FRONTEND_NAMES ${OV_FRONTEND_NAME})
+    set(FRONTEND_NAMES "${FRONTEND_NAMES}" CACHE INTERNAL "" FORCE)
+
+    file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp)
+    file(GLOB_RECURSE LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/src/*.hpp)
+    file(GLOB_RECURSE LIBRARY_PUBLIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp)
+
+    set(${TARGET_NAME}_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+    # Create named folders for the sources within the .vcproj
+    # Empty name lists them directly under the .vcproj
+
+    source_group("src" FILES ${LIBRARY_SRC})
+    source_group("include" FILES ${LIBRARY_HEADERS})
+    source_group("public include" FILES ${LIBRARY_PUBLIC_HEADERS})
+
+    # Generate protobuf file on build time for each '.proto' file in src/proto
+    file(GLOB proto_files ${CMAKE_CURRENT_SOURCE_DIR}/src/proto/*.proto)
+
+    foreach(INFILE IN LISTS proto_files)
+        get_filename_component(FILE_DIR ${INFILE} DIRECTORY)
+        get_filename_component(FILE_WE ${INFILE} NAME_WE)
+        set(OUTPUT_PB_SRC ${CMAKE_CURRENT_BINARY_DIR}/${FILE_WE}.pb.cc)
+        set(OUTPUT_PB_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${FILE_WE}.pb.h)
+        set(GENERATED_PROTO ${INFILE})
+        add_custom_command(
+                OUTPUT "${OUTPUT_PB_SRC}" "${OUTPUT_PB_HEADER}"
+                COMMAND ${PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${FILE_DIR} ${FILE_WE}.proto
+                DEPENDS ${PROTOC_EXECUTABLE} ${GENERATED_PROTO}
+                COMMENT "Running C++ protocol buffer compiler (${PROTOC_EXECUTABLE}) on ${GENERATED_PROTO}"
+                VERBATIM
+                COMMAND_EXPAND_LISTS)
+        list(APPEND PROTO_SRCS "${OUTPUT_PB_SRC}")
+        list(APPEND PROTO_HDRS "${OUTPUT_PB_HEADER}")
+    endforeach()
+
+    # Disable all warnings for generated code
+    set_source_files_properties(${PROTO_SRCS} ${PROTO_HDRS} PROPERTIES COMPILE_OPTIONS -w GENERATED TRUE)
+
+    # Create library
+    add_library(${TARGET_NAME} ${LIBRARY_SRC} ${LIBRARY_HEADERS} ${LIBRARY_PUBLIC_HEADERS} ${PROTO_SRCS} ${PROTO_HDRS})
+
+    if(OV_FRONTEND_LINKABLE_FRONTEND)
+        # create beautiful alias
+        add_library(openvino::frontend::${OV_FRONTEND_NAME} ALIAS ${TARGET_NAME})
+    endif()
+
+    if(NOT BUILD_SHARED_LIBS)
+        # override default function names
+        target_compile_definitions(${TARGET_NAME} PRIVATE
+            "-DGetFrontEndData=GetFrontEndData${OV_FRONTEND_NAME}"
+            "-DGetAPIVersion=GetAPIVersion${OV_FRONTEND_NAME}")
+    endif()
+
+    if(OV_FRONTEND_SKIP_NCC_STYLE)
+        # frontend's CMakeLists.txt must define its own custom 'ov_ncc_naming_style' step
+    else()
+        ov_ncc_naming_style(FOR_TARGET ${TARGET_NAME}
+                            INCLUDE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include"
+                            ADDITIONAL_INCLUDE_DIRECTORIES
+                                $<TARGET_PROPERTY:frontend_common::static,INTERFACE_INCLUDE_DIRECTORIES>)
+    endif()
+
+    target_include_directories(${TARGET_NAME}
+            PUBLIC
+                $<BUILD_INTERFACE:${${TARGET_NAME}_INCLUDE_DIR}>
+            PRIVATE
+                ${CMAKE_CURRENT_SOURCE_DIR}/src
+                ${CMAKE_CURRENT_BINARY_DIR})
+
+    ie_add_vs_version_file(NAME ${TARGET_NAME}
+                           FILEDESCRIPTION ${OV_FRONTEND_FILEDESCRIPTION})
+
+    ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME})
+
+    target_link_libraries(${TARGET_NAME} PRIVATE frontend_common::static ${OV_FRONTEND_LINK_LIBRARIES})
+
+    # WA for TF frontends which always requires protobuf (not protobuf-lite)
+    # if TF FE is built in static mode, use protobuf for all other FEs
+    if(FORCE_FRONTENDS_USE_PROTOBUF)
+        set(OV_FRONTEND_PROTOBUF_LITE OFF)
+    endif()
+
+    if(proto_files)
+        if(OV_FRONTEND_PROTOBUF_LITE)
+            if(NOT protobuf_lite_installed)
+                ov_install_static_lib(${Protobuf_LITE_LIBRARIES} ngraph)
+                set(protobuf_lite_installed ON CACHE INTERNAL "" FORCE)
+            endif()
+            link_system_libraries(${TARGET_NAME} PRIVATE ${Protobuf_LITE_LIBRARIES})
+        else()
+            if(NOT protobuf_installed)
+                ov_install_static_lib(${Protobuf_LIBRARIES} ngraph)
+                set(protobuf_installed ON CACHE INTERNAL "" FORCE)
+            endif()
+            link_system_libraries(${TARGET_NAME} PRIVATE ${Protobuf_LIBRARIES})
+        endif()
+
+        # prptobuf generated code emits -Wsuggest-override error
+        if(SUGGEST_OVERRIDE_SUPPORTED)
+            target_compile_options(${TARGET_NAME} PRIVATE -Wno-suggest-override)
+        endif()
+    endif()
+
+    add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}
+                            EXCLUDE_PATTERNS ${PROTO_SRCS} ${PROTO_HDRS})
+
+    add_dependencies(ov_frontends ${TARGET_NAME})
+
+    if(NOT OV_FRONTEND_SKIP_INSTALL)
+        if(BUILD_SHARED_LIBS)
+            if(OV_FRONTEND_LINKABLE_FRONTEND)
+                set(export_set EXPORT OpenVINOTargets)
+            endif()
+            install(TARGETS ${TARGET_NAME} ${export_set}
+                    RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT ngraph
+                    ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT ngraph
+                    LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT ngraph)
+        else()
+            ov_install_static_lib(${TARGET_NAME} ngraph)
+        endif()
+
+        if(OV_FRONTEND_LINKABLE_FRONTEND)
+            # install -dev part
+            install(DIRECTORY ${${TARGET_NAME}_INCLUDE_DIR}/${OV_FRONTEND_NAME}_frontend
+                    DESTINATION ${FRONTEND_INSTALL_INCLUDE}
+                    COMPONENT ngraph_dev
+                    FILES_MATCHING PATTERN "*.hpp")
+
+            set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME frontend::${OV_FRONTEND_NAME})
+            export(TARGETS ${TARGET_NAME} NAMESPACE openvino::
+                   APPEND FILE "${CMAKE_BINARY_DIR}/OpenVINOTargets.cmake")
+        endif()
+    else()
+        # skipped frontend has to be installed in static libraries case
+        ov_install_static_lib(${TARGET_NAME} ngraph)
+    endif()
+endmacro()
--- a/cmake/developer_package/frontends/ov_frontends.hpp.in
+++ b/cmake/developer_package/frontends/ov_frontends.hpp.in
@ -0,0 +1,27 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "common/frontend.hpp"
+
+@OV_FRONTEND_DECLARATIONS@
+
+namespace {
+
+using GetFrontEndDataFunc = void*();
+using GetAPIVersionFunc = ov::frontend::FrontEndVersion();
+
+struct Value {
+    GetFrontEndDataFunc* m_dataFunc;
+    GetAPIVersionFunc* m_versionFunc;
+};
+
+using FrontendsStaticRegistry = std::vector<Value>;
+
+const FrontendsStaticRegistry getStaticFrontendsRegistry() {
+@OV_FRONTEND_MAP_DEFINITION@
+}
+
+} // namespace
--- a/cmake/developer_package/ncc_naming_style/openvino.style
+++ b/cmake/developer_package/ncc_naming_style/openvino.style
@ -12,7 +12,7 @@ TemplateNonTypeParameter: '^\w*$'
 ClassTemplate: '^([A-Z][\w]+|element_type_traits)$'
 TemplateTypeParameter: '^\w*$'
 ParameterName: '^\w*$'
-FunctionTemplate: '^(operator.+|\w+)$'
+FunctionTemplate: '^(operator.+|[\w]+|Impl<.*>)$'
 TypeAliasName: '^\w+$'
 VariableReference: '^\w+$'

@ -25,7 +25,7 @@ TypedefName: '^.*$'
 CxxDynamicCastExpression: '^.*$'

 # not needed values
-ClassTemplatePartialSpecialization: 'XXXX'
+ClassTemplatePartialSpecialization: '^.*$'
 ConversionFunction: '^.*$'
 UsingDirective: 'XXXX'
 ClassAccessSpecifier: '^.*$' # looks like can be fixed
@ -64,9 +64,9 @@ GenericSelectionExpression: 'XXXX'
 GnuNullExpression: 'XXXX'
 CxxStaticCastExpression: '^.*$'
 CxxReinterpretCastExpression: '^.*$'
-CxxConstCastExpression: 'XXXX'
+CxxConstCastExpression: '^.*$'
 CxxFunctionalCastExpression: '^.*$'
-CxxTypeidExpression: 'XXXX'
+CxxTypeidExpression: '^.*$'
 CxxBoolLiteralExpression: '^.*$'
 CxxNullPointerLiteralExpression: '^.*$'
 CxxThisExpression: '^.*$'
--- a/cmake/developer_package/plugins/plugins.cmake
+++ b/cmake/developer_package/plugins/plugins.cmake
@ -75,8 +75,6 @@ function(ie_add_plugin)
                target_compile_definitions(${IE_PLUGIN_NAME} PRIVATE
                    IE_CREATE_EXTENSION=CreateExtensionShared${IE_PLUGIN_DEVICE_NAME})
            endif()
-            # install static plugins
-            ov_install_static_lib(${IE_PLUGIN_NAME} core)
        endif()

        ie_add_vs_version_file(NAME ${IE_PLUGIN_NAME}
@ -137,13 +135,17 @@ function(ie_add_plugin)
        endif()

        # install rules
-        if(NOT IE_PLUGIN_SKIP_INSTALL)
+        if(NOT IE_PLUGIN_SKIP_INSTALL OR NOT BUILD_SHARED_LIBS)
            string(TOLOWER "${IE_PLUGIN_DEVICE_NAME}" install_component)
            ie_cpack_add_component(${install_component} REQUIRED DEPENDS core)

-            install(TARGETS ${IE_PLUGIN_NAME}
-                    LIBRARY DESTINATION ${IE_CPACK_RUNTIME_PATH}
-                    COMPONENT ${install_component})
+            if(BUILD_SHARED_LIBS)
+                install(TARGETS ${IE_PLUGIN_NAME}
+                        LIBRARY DESTINATION ${IE_CPACK_RUNTIME_PATH}
+                        COMPONENT ${install_component})
+            else()
+                ov_install_static_lib(${IE_PLUGIN_NAME} ${install_component})
+            endif()
        endif()
    endif()

@ -244,12 +246,18 @@ macro(ie_register_plugins_dynamic)
                      VERBATIM)
 endmacro()

+#
+# ie_register_plugins()
+#
 macro(ie_register_plugins)
    if(BUILD_SHARED_LIBS)
        ie_register_plugins_dynamic(${ARGN})
    endif()
 endmacro()

+#
+# ie_target_link_plugins(<TARGET_NAME>)
+#
 function(ie_target_link_plugins TARGET_NAME)
    if(BUILD_SHARED_LIBS)
        return()
@ -311,7 +319,7 @@ function(ie_generate_plugins_hpp)
        ie_target_link_plugins(inference_engine_s)
    endif()

-    set(ie_plugins_hpp "${CMAKE_BINARY_DIR}/inference-engine/src/inference_engine/ie_plugins.hpp")
+    set(ie_plugins_hpp "${CMAKE_BINARY_DIR}/src/inference/ie_plugins.hpp")
    set(plugins_hpp_in "${IEDevScripts_DIR}/plugins/plugins.hpp.in")

    add_custom_command(OUTPUT "${ie_plugins_hpp}"
@ -332,8 +340,8 @@ function(ie_generate_plugins_hpp)

    # for some reason dependency on source files does not work
    # so, we have to use explicit target and make it dependency for inference_engine
-    add_custom_target(ie_generate_hpp DEPENDS ${ie_plugins_hpp})
-    add_dependencies(inference_engine ie_generate_hpp)
+    add_custom_target(_ie_plugins_hpp DEPENDS ${ie_plugins_hpp})
+    add_dependencies(inference_engine _ie_plugins_hpp)

    # add dependency for object files
    get_target_property(sources inference_engine SOURCES)
--- a/cmake/developer_package/target_flags.cmake
+++ b/cmake/developer_package/target_flags.cmake
@ -61,3 +61,5 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$")
 else()
    set(OV_COMPILER_IS_CLANG OFF)
 endif()
+
+get_property(OV_GENERATOR_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
--- a/cmake/developer_package/version.cmake
+++ b/cmake/developer_package/version.cmake
@ -44,12 +44,12 @@ macro(ie_parse_ci_build_number)
            return()
        endif()

-        set(ie_version_hpp "${OpenVINO_SOURCE_DIR}/inference-engine/src/inference_engine/include/ie/ie_version.hpp")
+        set(ie_version_hpp "${OpenVINO_SOURCE_DIR}/src/inference/include/ie/ie_version.hpp")
        if(NOT EXISTS ${ie_version_hpp})
            message(FATAL_ERROR "File ie_version.hpp with IE_VERSION definitions is not found")
        endif()

-        set(ov_version_hpp "${OpenVINO_SOURCE_DIR}/ngraph/core/include/openvino/core/version.hpp")
+        set(ov_version_hpp "${OpenVINO_SOURCE_DIR}/src/core/include/openvino/core/version.hpp")
        if(NOT EXISTS ${ov_version_hpp})
            message(FATAL_ERROR "File openvino/core/version.hpp with OPENVINO_VERSION definitions is not found")
        endif()
--- a/cmake/extra_modules.cmake
+++ b/cmake/extra_modules.cmake
@ -6,7 +6,7 @@ function(ie_generate_dev_package_config)
    # dummy check that OpenCV is here
    find_package(OpenCV QUIET)

-    set(all_dev_targets gflags ie_libraries)
+    set(all_dev_targets gflags ov_runtime_libraries)
    foreach(component IN LISTS openvino_export_components)
        # export all targets with prefix and use them during extra modules build
        export(TARGETS ${${component}} NAMESPACE IE::
@ -102,7 +102,7 @@ openvino_developer_export_targets(COMPONENT ngraph TARGETS ngraph_backend interp
 ie_generate_dev_package_config()

 # extra modules must be registered after inference_engine library
-# and all other IE common libraries (ie_libraries) are creared
+# and all other IE common libraries (ov_runtime_libraries) are creared
 # because 'register_extra_modules' creates fake InferenceEngineDeveloperPackageConfig.cmake
 # with all imported developer targets
 register_extra_modules()
--- a/cmake/features.cmake
+++ b/cmake/features.cmake
@ -163,13 +163,19 @@ ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" O
 ie_option(NGRAPH_IR_FRONTEND_ENABLE "Enable IR FrontEnd" ON)
 ie_dependent_option(NGRAPH_TF_FRONTEND_ENABLE "Enable TensorFlow FrontEnd" ON "protoc_available" OFF)
 ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF
-    "NGRAPH_ONNX_FRONTEND_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE OR NGRAPH_TF_FRONTEND_ENABLE" OFF)
+    "NGRAPH_ONNX_FRONTEND_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE OR NGRAPH_TF_FRONTEND_ENABLE;BUILD_SHARED_LIBS" OFF)
 ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF)
 ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON
    "NGRAPH_UNIT_TEST_ENABLE" OFF)
 ie_option(OPENVINO_DEBUG_ENABLE "Enable output for OPENVINO_DEBUG statements" OFF)
 ie_option(ENABLE_REQUIREMENTS_INSTALL "Dynamic dependencies install" ON)

+if(NOT BUILD_SHARED_LIBS AND NGRAPH_TF_FRONTEND_ENABLE)
+    set(FORCE_FRONTENDS_USE_PROTOBUF ON)
+else()
+    set(FORCE_FRONTENDS_USE_PROTOBUF OFF)
+endif()
+
 # WA for ngraph python build on Windows debug
 list(REMOVE_ITEM IE_OPTIONS NGRAPH_UNIT_TEST_ENABLE NGRAPH_UNIT_TEST_BACKENDS_ENABLE)

--- a/inference-engine/cmake/ie_parallel.cmake
+++ b/inference-engine/cmake/ie_parallel.cmake
--- a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in
+++ b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in
@ -45,7 +45,7 @@ find_dependency(InferenceEngine
                NO_DEFAULT_PATH)

 find_dependency(ngraph
-                PATHS "${CMAKE_CURRENT_LIST_DIR}/ngraph"
+                PATHS "${CMAKE_CURRENT_LIST_DIR}/src/core"
                NO_CMAKE_FIND_ROOT_PATH
                NO_DEFAULT_PATH)

--- a/cmake/test_model_zoo.cmake
+++ b/cmake/test_model_zoo.cmake
@ -5,7 +5,7 @@
 set_property(GLOBAL PROPERTY JOB_POOLS four_jobs=4)

 function(ov_model_convert SRC DST OUT)
-    set(onnx_gen_script ${OpenVINO_SOURCE_DIR}/ngraph/test/models/onnx/onnx_prototxt_converter.py)
+    set(onnx_gen_script ${OpenVINO_SOURCE_DIR}/src/core/tests/models/onnx/onnx_prototxt_converter.py)

    file(GLOB_RECURSE prototxt_models RELATIVE "${SRC}" "${SRC}/*.prototxt")
    file(GLOB_RECURSE xml_models RELATIVE "${SRC}" "${SRC}/*.xml")
@ -62,8 +62,8 @@ function(ov_model_convert SRC DST OUT)
    set(${OUT} ${files} PARENT_SCOPE)
 endfunction()

-ov_model_convert("${CMAKE_CURRENT_SOURCE_DIR}/ngraph/test"
-                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ngraph"
+ov_model_convert("${CMAKE_CURRENT_SOURCE_DIR}/src/core/tests"
+                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/core"
                  onnx_out_files)

 set(rel_path "inference-engine/tests/functional/plugin/shared/models")
@ -117,7 +117,7 @@ if(ENABLE_TESTS)
            list(APPEND args --use-feature=2020-resolver)
        endif()

-        set(reqs "${OpenVINO_SOURCE_DIR}/ngraph/test/requirements_test_onnx.txt")
+        set(reqs "${OpenVINO_SOURCE_DIR}/src/core/tests/requirements_test_onnx.txt")
        add_custom_target(test_pip_prerequsites ALL
                          "${PYTHON_EXECUTABLE}" -m pip install ${args} -r ${reqs}
                          COMMENT "Install requirements_test.txt"
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@ -65,13 +65,14 @@ function(build_docs)
    set(DOCS_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}")
    set(DOXYGEN_DIR "${OpenVINO_SOURCE_DIR}/docs/doxygen")
    set(IE_SOURCE_DIR "${OpenVINO_SOURCE_DIR}/inference-engine")
+    set(OV_INFERENCE_DIR "${OpenVINO_SOURCE_DIR}/src/inference")
    set(PYTHON_API_IN "${IE_SOURCE_DIR}/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx")
    set(PYTHON_API_OUT "${DOCS_BUILD_DIR}/python_api/ie_api.pyx")
-    set(C_API "${IE_SOURCE_DIR}/ie_bridges/c/include")
+    set(C_API "${OpenVINO_SOURCE_DIR}/bindings/c/include")
    set(PLUGIN_API_DIR "${DOCS_BUILD_DIR}/IE_PLUGIN_DG")
-    set(NGRAPH_DIR "${OpenVINO_SOURCE_DIR}/ngraph")
-    set(NGRAPH_PY_DIR "${OpenVINO_SOURCE_DIR}/runtime/bindings/python/src/compatibility/ngraph/")
-    set(NGRAPH_CPP_DIR "${NGRAPH_DIR}/core/include/" "${NGRAPH_DIR}/frontend/onnx_import/include")
+    set(CORE_DIR "${OpenVINO_SOURCE_DIR}/src/core")
+    set(FRONTENDS_DIR "${OpenVINO_SOURCE_DIR}/src/frontends")
+    set(NGRAPH_PY_DIR "${OpenVINO_SOURCE_DIR}/src/bindings/python/src/compatibility/ngraph/")

    # Preprocessing scripts
    set(DOXY_MD_FILTER "${DOXYGEN_DIR}/doxy_md_filter.py")
@ -222,10 +223,10 @@ function(build_docs)
        "${OpenVINO_SOURCE_DIR}/inference-engine/*.png"
        "${OpenVINO_SOURCE_DIR}/inference-engine/*.gif"
        "${OpenVINO_SOURCE_DIR}/inference-engine/*.jpg"
-        "${OpenVINO_SOURCE_DIR}/runtime/*.md"
-        "${OpenVINO_SOURCE_DIR}/runtime/*.png"
-        "${OpenVINO_SOURCE_DIR}/runtime/*.gif"
-        "${OpenVINO_SOURCE_DIR}/runtime/*.jpg"
+        "${OpenVINO_SOURCE_DIR}/src/*.md"
+        "${OpenVINO_SOURCE_DIR}/src/*.png"
+        "${OpenVINO_SOURCE_DIR}/src/*.gif"
+        "${OpenVINO_SOURCE_DIR}/src/*.jpg"
        "${OpenVINO_SOURCE_DIR}/samples/*.md"
        "${OpenVINO_SOURCE_DIR}/samples/*.png"
        "${OpenVINO_SOURCE_DIR}/samples/*.gif"
--- a/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md
+++ b/docs/IE_DG/Deep_Learning_Inference_Engine_DevGuide.md
@ -70,7 +70,7 @@ The table below shows the plugin libraries and additional dependencies for Linux
 | MYRIAD | `libmyriadPlugin.so`        | `libusb.so`,                                                | `myriadPlugin.dll`       | `usb.dll`                                                                                              | `libmyriadPlugin.so`      | `libusb.dylib`                              |
 | HDDL   | `libHDDLPlugin.so`          | `libbsl.so`, `libhddlapi.so`, `libmvnc-hddl.so`             | `HDDLPlugin.dll`         | `bsl.dll`, `hddlapi.dll`, `json-c.dll`, `libcrypto-1_1-x64.dll`, `libssl-1_1-x64.dll`, `mvnc-hddl.dll` |  Is not supported            |  -                                          |
 | GNA    | `libGNAPlugin.so`           | `libgna.so`,                                                | `GNAPlugin.dll`          | `gna.dll`                                                                                              |  Is not supported            |  -                                          |
-| HETERO | `libHeteroPlugin.so`        | Same as for selected plugins                                | `HeteroPlugin.dll`       | Same as for selected plugins                                                                           | `libHeteroPlugin.so`      |  Same as for selected plugins               |
+| HETERO | `libov_hetero_plugin.so`        | Same as for selected plugins                                | `ov_hetero_plugin.dll`       | Same as for selected plugins                                                                           | `libov_hetero_plugin.so`      |  Same as for selected plugins               |
 | MULTI  | `libMultiDevicePlugin.so`   | Same as for selected plugins                                | `MultiDevicePlugin.dll`  | Same as for selected plugins                                                                           | `libMultiDevicePlugin.so` |  Same as for selected plugins               |

 > **NOTE**: All plugin libraries also depend on core Inference Engine libraries.
--- a/docs/IE_DG/PythonPackage_Overview.md
+++ b/docs/IE_DG/PythonPackage_Overview.md
@ -7,7 +7,7 @@ The OpenVINO™ Python\* package available in the `<INSTALL_DIR>/python/python3.

 The OpenVINO™ Python\* package includes the following sub-packages:

- - [openvino.inference_engine](../../runtime/bindings/python/docs/api_overview.md) - Python\* wrapper on OpenVINO™ Inference Engine.
+ - [openvino.inference_engine](../../src/bindings/python/docs/api_overview.md) - Python\* wrapper on OpenVINO™ Inference Engine.
 - `openvino.tools.accuracy_checker` - Measure accuracy.
 - `openvino.tools.benchmark` - Measure latency and throughput.

--- a/docs/IE_DG/inference_engine_intro.md
+++ b/docs/IE_DG/inference_engine_intro.md
@ -75,7 +75,7 @@ The table below shows the plugin libraries and additional dependencies for Linux
 | MYRIAD | `libmyriadPlugin.so`        | `libusb.so`,                                                | `myriadPlugin.dll`       | `usb.dll`                                                                                              | `libmyriadPlugin.so`      | `libusb.dylib`                              |
 | HDDL   | `libHDDLPlugin.so`          | `libbsl.so`, `libhddlapi.so`, `libmvnc-hddl.so`             | `HDDLPlugin.dll`         | `bsl.dll`, `hddlapi.dll`, `json-c.dll`, `libcrypto-1_1-x64.dll`, `libssl-1_1-x64.dll`, `mvnc-hddl.dll` |  Is not supported            |  -                                          |
 | GNA    | `libGNAPlugin.so`           | `libgna.so`,                                                | `GNAPlugin.dll`          | `gna.dll`                                                                                              |  Is not supported            |  -                                          |
-| HETERO | `libHeteroPlugin.so`        | Same as for selected plugins                                | `HeteroPlugin.dll`       | Same as for selected plugins                                                                           | `libHeteroPlugin.so`      |  Same as for selected plugins               |
+| HETERO | `libov_hetero_plugin.so`        | Same as for selected plugins                                | `ov_hetero_plugin.dll`       | Same as for selected plugins                                                                           | `libov_hetero_plugin.so`      |  Same as for selected plugins               |
 | MULTI  | `libMultiDevicePlugin.so`   | Same as for selected plugins                                | `MultiDevicePlugin.dll`  | Same as for selected plugins                                                                           | `libMultiDevicePlugin.so` |  Same as for selected plugins               |

 > **NOTE**: All plugin libraries also depend on core Inference Engine libraries.
--- a/docs/IE_PLUGIN_DG/Doxyfile
+++ b/docs/IE_PLUGIN_DG/Doxyfile
@ -781,9 +781,9 @@ WARN_LOGFILE           =
 # Note: If this tag is empty the current directory is searched.

 INPUT                  = . \
-						 ../../inference-engine/src/transformations/include/transformations \
-						 ../../inference-engine/src/plugin_api \
-						 ../../openvino/itt/include/openvino
+						 ../../src/common/transformations/include/transformations \
+						 ../../src/inference/dev_api \
+						 ../../src/common/itt/include/openvino

 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
--- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md
+++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md
@ -274,4 +274,4 @@ exec_net = ie.load_network(network=net, device_name="CPU")
 result_ie = exec_net.infer(input_data)
 ```

-For more information about Python API, refer to [Inference Engine Python API Overview](../../../../../runtime/bindings/python/docs/api_overview.md).
+For more information about Python API, refer to [Inference Engine Python API Overview](../../../../../src/bindings/python/docs/api_overview.md).
--- a/docs/doxygen/doxygen-ignore.txt
+++ b/docs/doxygen/doxygen-ignore.txt
@ -1,6 +1,5 @@
 openvino/samples/cpp/hello_reshape_ssd/README.md
 openvino/docs/index.md
-inference-engine/include/ie_icnn_network.hpp
 openvino/docs/get_started/get_started_dl_workbench.md
 openvino/docs/get_started/get_started_linux.md
 openvino/docs/get_started/get_started_raspbian.md
@ -10,29 +9,13 @@ openvino/docs/HOWTO/Custom_Layers_Guide.md
 openvino/docs/install_guides/deployment-manager-tool.md
 openvino/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
 openvino/docs/ovsa/ovsa_get_started.md
-openvino/inference-engine/ie_bridges/c/docs/api_overview.md
-inference-engine/include/cpp/ie_infer_request.hpp
-inference-engine/include/ie_parallel.hpp
-inference-engine/include/gpu/gpu_context_api_ocl.hpp
-inference-engine/include/gpu/gpu_context_api_va.hpp
-inference-engine/include/ie_plugin_config.hpp
-inference-engine/include/ie_unicode.hpp
-inference-engine/include/vpu/myriad_config.hpp
-inference-engine/include/vpu/vpu_config.hpp
-inference-engine/include/vpu/vpu_plugin_config.hpp
+openvino/src/bindings/c/docs/api_overview.md
 openvino/docs/benchmarks/performance_int8_vs_fp32.md
 openvino/docs/get_started/get_started_macos.md
-inference-engine/include/details/ie_so_pointer.hpp
-inference-engine/include/ie_compound_blob.h
-inference-engine/include/ie_data.h
-inference-engine/include/ie_blob.h
-inference-engine/include/ie_precision.hpp
-inference-engine/include/ie_remote_context.hpp
-inference-engine/include/gpu/gpu_context_api_dx.hpp
 build/docs/openvino_docs.xml
 openvino/docs/install_guides/installing-openvino-linux-ivad-vpu.md
-inference-engine/src/inference_engine/include/ie/ie_parallel.hpp
-inference-engine/src/inference_engine/include/ie/ie_plugin_config.hpp
-inference-engine/src/inference_engine/include/ie/vpu/myriad_config.hpp
-inference-engine/src/inference_engine/include/ie/vpu/vpu_config.hpp
-inference-engine/src/inference_engine/include/ie/vpu/vpu_plugin_config.hpp
+src/inference/include/ie/ie_parallel.hpp
+src/inference/include/ie/ie_plugin_config.hpp
+src/inference/include/ie/vpu/myriad_config.hpp
+src/inference/include/ie/vpu/vpu_config.hpp
+src/inference/include/ie/vpu/vpu_plugin_config.hpp
--- a/docs/doxygen/ie_docs.config
+++ b/docs/doxygen/ie_docs.config
@ -824,7 +824,7 @@ WARN_LOGFILE           = "@DOCS_BUILD_DIR@/ie_docs.log"
 # Note: If this tag is empty the current directory is searched.

 INPUT                  = "@DOCS_BUILD_DIR@" \
-                         "@IE_SOURCE_DIR@/src/inference_engine/include"
+                         "@OV_INFERENCE_DIR@/include"

 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
--- a/docs/doxygen/ie_plugin_api.config
+++ b/docs/doxygen/ie_plugin_api.config
@ -24,9 +24,9 @@ GENERATE_TAGFILE       = "@DOCS_BUILD_DIR@/ie_plugin_api.tag"
 EXTRACT_LOCAL_CLASSES  = NO

 INPUT                  = "@DOCS_BUILD_DIR@/docs/IE_PLUGIN_DG" \
-                         "@IE_SOURCE_DIR@/src/plugin_api" \
-                         "@IE_SOURCE_DIR@/src/transformations/include" \
-                         "@OpenVINO_SOURCE_DIR@/openvino/itt/include/openvino"
+                         "@OV_INFERENCE_DIR@/dev_api" \
+                         "@OpenVINO_SOURCE_DIR@/src/common/transformations/include" \
+                         "@OpenVINO_SOURCE_DIR@/src/common/itt/include/openvino"


 RECURSIVE              = YES
--- a/docs/doxygen/ngraph_cpp_api.config
+++ b/docs/doxygen/ngraph_cpp_api.config
@ -28,9 +28,9 @@ FILE_PATTERNS          = *.cpp \

 LAYOUT_FILE            = "@NGRAPH_CPP_LAYOUT_BUILD@"

-INPUT                  = "@NGRAPH_DIR@/core/include/" \
-                         "@NGRAPH_DIR@/frontend/onnx/frontend/include/" \
-                         "@NGRAPH_DIR@/frontend/paddlepaddle/frontend/include/"
+INPUT                  = "@CORE_DIR@/core/include/" \
+                         "@FRONTENDS_DIR@/onnx/frontend/include/" \
+                         "@FRONTENDS_DIR@/paddlepaddle/frontend/include/"

 HTML_OUTPUT            = "@NGRAPH_CPP_OUTPUT@"

--- a/docs/nGraph_DG/nGraphTransformation.md
+++ b/docs/nGraph_DG/nGraphTransformation.md
@ -11,7 +11,7 @@ Before creating a transformation, do the following:

 ### Transformation Library Structure
 Transformation library is independent from Inference Engine target library named as `inference_engine_transformations`
-and is located in the `inference-engine/src/transformations` directory.
+and is located in the `src/common/transformations` directory.

 Transformations root directory contains two folders:
 * `ngraph_ops` - Contains internal opset operations that are common for plugins.
--- a/docs/nGraph_DG/nGraph_debug_capabilities.md
+++ b/docs/nGraph_DG/nGraph_debug_capabilities.md
@ -14,3 +14,5 @@ std::shared_ptr<ngraph::Function> nGraph;
 ...
 ngraph::pass::VisualizeTree("after.png").run_on_function(nGraph);     // Visualize the nGraph function to an image
 ```
+
+> **NOTE**: Graphviz is required for visualization to image.
--- a/docs/snippets/InferenceEngine_Caching3.cpp
+++ b/docs/snippets/InferenceEngine_Caching3.cpp
@ -14,7 +14,7 @@ using namespace InferenceEngine;
    auto it = std::find(keys.begin(), keys.end(), METRIC_KEY(IMPORT_EXPORT_SUPPORT));

    // If metric 'IMPORT_EXPORT_SUPPORT' exists, check it's value
-    bool cachingSupported = (it != keys.end()) && ie.GetMetric(deviceName, METRIC_KEY(IMPORT_EXPORT_SUPPORT));
+    bool cachingSupported = (it != keys.end()) && ie.GetMetric(deviceName, METRIC_KEY(IMPORT_EXPORT_SUPPORT)).as<bool>();
 //! [part3]
    return 0;
 }
--- a/docs/template_plugin/tests/functional/CMakeLists.txt
+++ b/docs/template_plugin/tests/functional/CMakeLists.txt
@ -21,7 +21,7 @@ addIeTargetTest(
 )

 if(ENABLE_HETERO)
-    add_dependencies(${TARGET_NAME} HeteroPlugin)
+    add_dependencies(${TARGET_NAME} ov_hetero_plugin)
 endif()

 find_package(OpenCV QUIET COMPONENTS core imgproc)
--- a/docs/template_plugin/tests/functional/op_reference/matrix_nms.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/matrix_nms.cpp
@ -0,0 +1,481 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "openvino/opsets/opset8.hpp"
+#include "openvino/opsets/opset1.hpp"
+#include "base_reference_test.hpp"
+
+using namespace reference_tests;
+using namespace ov;
+
+namespace {
+struct MatrixNmsParams {
+    MatrixNmsParams(
+        const opset8::MatrixNms::Attributes& attrs,
+        const Tensor& boxes, const Tensor& scores,
+        const Tensor& expectedSelectedScores, const Tensor& expectedSelectedIndices,
+        const Tensor& expectedValidOutputs, const std::string& testcaseName = "") :
+        attrs(attrs),
+        boxes(boxes), scores(scores),
+        expectedSelectedScores(expectedSelectedScores), expectedSelectedIndices(expectedSelectedIndices),
+        expectedValidOutputs(expectedValidOutputs), testcaseName(testcaseName) {}
+
+    opset8::MatrixNms::Attributes attrs;
+    Tensor boxes;
+    Tensor scores;
+    Tensor expectedSelectedScores;
+    Tensor expectedSelectedIndices;
+    Tensor expectedValidOutputs;
+    std::string testcaseName;
+};
+
+class ReferenceMatrixNmsTest : public testing::TestWithParam<MatrixNmsParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params);
+        inputData = {params.boxes.data, params.scores.data};
+        refOutData = {params.expectedSelectedScores.data,
+                      params.expectedSelectedIndices.data,
+                      params.expectedValidOutputs.data};
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<MatrixNmsParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "bType=" << param.boxes.type;
+        result << "_bShape=" << param.boxes.shape;
+        result << "_sType=" << param.scores.type;
+        result << "_sShape=" << param.scores.shape;
+        result << "_escType=" << param.expectedSelectedScores.type;
+        result << "_escShape=" << param.expectedSelectedScores.shape;
+        result << "_esiType=" << param.expectedSelectedIndices.type;
+        result << "_esiShape=" << param.expectedSelectedIndices.shape;
+        result << "_evoType=" << param.expectedValidOutputs.type;
+        if (param.testcaseName != "") {
+            result << "_evoShape=" << param.expectedValidOutputs.shape;
+            result << "_=" << param.testcaseName;
+        } else {
+            result << "_evoShape=" << param.expectedValidOutputs.shape;
+        }
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const MatrixNmsParams& params) {
+        const auto boxes = std::make_shared<opset1::Parameter>(params.boxes.type, PartialShape::dynamic());
+        const auto scores = std::make_shared<opset1::Parameter>(params.scores.type, PartialShape::dynamic());
+        const auto nms = std::make_shared<opset8::MatrixNms>(boxes, scores, params.attrs);
+        const auto f = std::make_shared<Function>(nms->outputs(), ParameterVector{boxes, scores});
+        return f;
+    }
+};
+
+TEST_P(ReferenceMatrixNmsTest, CompareWithRefs) {
+    Exec();
+}
+
+template <element::Type_t ET, element::Type_t ET_TH, element::Type_t ET_IND>
+std::vector<MatrixNmsParams> generateParams() {
+    using T = typename element_type_traits<ET>::value_type;
+    using T_TH = typename element_type_traits<ET_TH>::value_type;
+    using T_IND = typename element_type_traits<ET_IND>::value_type;
+    std::vector<MatrixNmsParams> params {
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                0,                                          // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.0f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {3, 6}, std::vector<T_TH>{
+                1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.8, 0.00,
+                10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1}),             // expected_selected_scores
+            Tensor(ET_IND, {3, 1}, std::vector<T_IND>{0, 3, 1}),                        // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{3}),                                 // expected_valid_outputs
+            "matrix_nms_output_type_i64"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                0,                                          // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.0f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {3, 6}, std::vector<T_TH>{
+                1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.8, 0.00,
+                10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1}),             // expected_selected_scores
+            Tensor(ET_IND, {3, 1}, std::vector<T_IND>{0, 3, 1}),                        // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{3}),                                 // expected_valid_outputs
+            "matrix_nms_output_type_i32"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                0,                                          // background_class
+                opset8::MatrixNms::DecayFunction::GAUSSIAN, // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.0f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {3, 6}, std::vector<T_TH>{
+                1.00,  0.95, 0.00,  0.00, 1.00,      1.00, 1.00, 0.8, 0.00,
+                10.00, 1.00, 11.00, 1.00, 0.1966116, 0.0,  0.1,  1.0, 1.1}),            // expected_selected_scores
+            Tensor(ET_IND, {3, 1}, std::vector<T_IND>{0, 3, 1}),                        // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{3}),                                 // expected_valid_outputs
+            "matrix_nms_gaussian"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                0,                                          // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.0f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {2, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3,
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {6, 6}, std::vector<T_TH>{
+                1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.8, 0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1,
+                1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.8, 0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {6, 1}, std::vector<T_IND>{0, 3, 1, 6, 9, 7}),               // expected_selected_indices
+            Tensor(ET_IND, {2}, std::vector<T_IND>{3, 3}),                              // expected_valid_outputs
+            "matrix_nms_two_batches_two_classes"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                true,                                       // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                -1,                                         // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.5f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {2, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3,
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {8, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00,
+                1.00, 0.95, 0.00, 0.00,  1.00, 1.00,
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00,
+                1.00, 0.95, 0.00, 0.00,  1.00, 1.00,
+                0.00, 0.90, 0.00, 0.00,  1.00, 1.00,
+                0.00, 0.90, 0.00, 0.00,  1.00, 1.00,
+                1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
+                1.00, 0.80, 0.00, 10.00, 1.00, 11.00}),                                 // expected_selected_scores
+            Tensor(ET_IND, {8, 1}, std::vector<T_IND>{3, 0, 9, 6, 0, 6, 3, 9}),         // expected_selected_indices
+            Tensor(ET_IND, {2}, std::vector<T_IND>{4, 4}),                              // expected_valid_outputs
+            "matrix_nms_two_batches_two_classes_by_score_cross_batch"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::CLASSID, // sort_result_type
+                true,                                       // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                -1,                                         // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.5f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {2, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3,
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {8, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00,
+                0.00, 0.90, 0.00, 0.00,  1.00, 1.00,
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00,
+                0.00, 0.90, 0.00, 0.00,  1.00, 1.00,
+                1.00, 0.95, 0.00, 0.00,  1.00, 1.00,
+                1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
+                1.00, 0.95, 0.00, 0.00,  1.00, 1.00,
+                1.00, 0.80, 0.00, 10.00, 1.00, 11.00}),                                 // expected_selected_scores
+            Tensor(ET_IND, {8, 1}, std::vector<T_IND>{3, 0, 9, 6, 0, 3, 6, 9}),         // expected_selected_indices
+            Tensor(ET_IND, {2}, std::vector<T_IND>{4, 4}),                              // expected_valid_outputs
+            "matrix_nms_two_batches_two_classes_by_classid_cross_batch"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::CLASSID, // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                3,                                          // keep_top_k
+                0,                                          // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.0f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {2, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3,
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {6, 6}, std::vector<T_TH>{
+                1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.8, 0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1,
+                1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.8, 0.00, 10.00, 1.00, 11.00, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {6, 1}, std::vector<T_IND>{0, 3, 1, 6, 9, 7}),               // expected_selected_indices
+            Tensor(ET_IND, {2}, std::vector<T_IND>{3, 3}),                              // expected_valid_outputs
+            "matrix_nms_by_keep_top_k"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                -1,                                         // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.0f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {6, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.0, 10.0, 1.0, 11.0, 1.00, 0.95,       0.0, 0.0, 1.0, 1.0, 0.00, 0.9,        0.0, 0.0, 1.0, 1.0,
+                1.00, 0.8,  0.0, 10.0, 1.0, 11.0, 0.00, 0.13636364, 0.0, 0.1, 1.0, 1.1, 1.00, 0.13636364, 0.0, 0.1, 1.0, 1.1}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {6, 1}, std::vector<T_IND>{3, 0, 0, 3, 1, 1}),               // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{6}),                                 // expected_valid_outputs
+            "matrix_nms_background"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                -1,                                         // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.0f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                1.0, 1.0,  0.0, 0.0,  0.0, 0.1,  1.0, 1.1,  0.0, 0.9,   1.0, -0.1,
+                0.0, 10.0, 1.0, 11.0, 1.0, 10.1, 0.0, 11.1, 1.0, 101.0, 0.0, 100.0}),   // boxes
+            Tensor(ET_TH, {1, 1, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),                                       // scores
+            Tensor(ET_TH, {3, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.0, 10.0, 1.0, 11.0, 0.00, 0.9, 1.0, 1.0, 0.0, 0.0, 0.00, 0.75, 0.0, 0.1, 1.0, 1.1}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {3, 1}, std::vector<T_IND>{3, 0, 1}),                        // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{3}),                                 // expected_valid_outputs
+            "matrix_nms_flipped_coordinates"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                -1,                                         // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.8f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 1, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),                                       // scores
+            Tensor(ET_TH, {2, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.9, 0.00, 0.00, 1.00, 1.00}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {2, 1}, std::vector<T_IND>{3, 0}),                           // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{2}),                                 // expected_valid_outputs
+            "matrix_nms_post_threshold"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                -1,                                         // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.3f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {1, 10, 4}, std::vector<T>{
+                0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0,
+                1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0,
+                0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0}),           // boxes
+            Tensor(ET_TH, {1, 1, 10}, std::vector<T_TH>{
+                0.4, 0.01, 0.2, 0.09, 0.15, 0.05, 0.02, 0.03, 0.05, 0.0}),              // scores
+            Tensor(ET_TH, {1, 6}, std::vector<T_TH>{
+                0.00, 0.40, 0.00, 0.00, 1.00, 1.00}),                                   // expected_selected_scores
+            Tensor(ET_IND, {1, 1}, std::vector<T_IND>{0}),                              // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{1}),                                 // expected_valid_outputs
+            "matrix_nms_identical_boxes"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                2,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                -1,                                         // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.0f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 1, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),                                       // scores
+            Tensor(ET_TH, {2, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {2, 1}, std::vector<T_IND>{3, 0}),                           // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{2}),                                 // expected_valid_outputs
+            "matrix_nms_nms_top_k"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                0.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                -1,                                         // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.0f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {1, 1, 4}, std::vector<T>{0.0, 0.0, 1.0, 1.0}),                  // boxes
+            Tensor(ET_TH, {1, 1, 1}, std::vector<T_TH>{0.9}),                           // scores
+            Tensor(ET_TH, {1, 6}, std::vector<T_TH>{
+                0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),                                   // expected_selected_scores
+            Tensor(ET_IND, {1, 1}, std::vector<T_IND>{0}),                              // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{1}),                                 // expected_valid_outputs
+            "matrix_nms_single_box"),
+        MatrixNmsParams(
+            {
+                opset8::MatrixNms::SortResultType::SCORE,   // sort_result_type
+                false,                                      // sort_result_across_batch
+                ET_IND,                                     // output_type
+                2.0f,                                       // score_threshold
+                3,                                          // nms_top_k
+                -1,                                         // keep_top_k
+                -1,                                         // background_class
+                opset8::MatrixNms::DecayFunction::LINEAR,   // decay_function
+                2.0f,                                       // gaussian_sigma
+                0.0f,                                       // post_threshold
+                true,                                       // normalized
+            },
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 1, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),                                       // scores
+            Tensor(ET_TH, {0, 6}, std::vector<T_TH>{}),                                 // expected_selected_scores
+            Tensor(ET_IND, {0, 1}, std::vector<T_IND>{}),                               // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{0}),                                 // expected_valid_outputs
+            "matrix_nms_no_output"),
+    };
+    return params;
+}
+
+std::vector<MatrixNmsParams> generateCombinedParams() {
+    const std::vector<std::vector<MatrixNmsParams>> generatedParams {
+        generateParams<element::Type_t::bf16, element::Type_t::f32, element::Type_t::i32>(),
+        generateParams<element::Type_t::f16, element::Type_t::f32, element::Type_t::i32>(),
+        generateParams<element::Type_t::f32, element::Type_t::f32, element::Type_t::i32>(),
+        generateParams<element::Type_t::bf16, element::Type_t::f32, element::Type_t::i64>(),
+        generateParams<element::Type_t::f16, element::Type_t::f32, element::Type_t::i64>(),
+        generateParams<element::Type_t::f32, element::Type_t::f32, element::Type_t::i64>(),
+    };
+    std::vector<MatrixNmsParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_MatrixNms_With_Hardcoded_Refs, ReferenceMatrixNmsTest,
+    testing::ValuesIn(generateCombinedParams()), ReferenceMatrixNmsTest::getTestCaseName);
+} // namespace
--- a/docs/template_plugin/tests/functional/op_reference/multiclass_nms.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/multiclass_nms.cpp
@ -0,0 +1,567 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "openvino/op/multiclass_nms.hpp"
+#include "base_reference_test.hpp"
+
+using namespace reference_tests;
+using namespace ov;
+
+namespace {
+struct MulticlassNmsParams {
+    MulticlassNmsParams(
+        const int nms_top_k,
+        const float iou_threshold,
+        const float score_threshold,
+        const op::v8::MulticlassNms::SortResultType sort_result_type,
+        const int keep_top_k,
+        const int background_class,
+        const float nms_eta,
+        const ov::element::Type output_type,
+        const bool sort_result_across_batch,
+        const bool normalized,
+        const Tensor& boxes, const Tensor& scores,
+        const Tensor& expectedSelectedScores, const Tensor& expectedSelectedIndices,
+        const Tensor& expectedValidOutputs, const std::string& testcaseName = "") :
+        nms_top_k(nms_top_k),
+        iou_threshold(iou_threshold),
+        score_threshold(score_threshold),
+        sort_result_type(sort_result_type),
+        keep_top_k(keep_top_k),
+        background_class(background_class),
+        nms_eta(nms_eta),
+        output_type(output_type),
+        sort_result_across_batch(sort_result_across_batch),
+        normalized(normalized),
+        boxes(boxes), scores(scores),
+        expectedSelectedScores(expectedSelectedScores), expectedSelectedIndices(expectedSelectedIndices),
+        expectedValidOutputs(expectedValidOutputs), testcaseName(testcaseName) {}
+
+    int nms_top_k;
+    float iou_threshold;
+    float score_threshold;
+    op::v8::MulticlassNms::SortResultType sort_result_type;
+    int keep_top_k;
+    int background_class;
+    float nms_eta;
+    ov::element::Type output_type;
+
+    bool sort_result_across_batch = false;
+    bool normalized = true;
+
+    Tensor boxes;
+    Tensor scores;
+    Tensor expectedSelectedScores;
+    Tensor expectedSelectedIndices;
+    Tensor expectedValidOutputs;
+    std::string testcaseName;
+};
+
+class ReferenceMulticlassNmsTest : public testing::TestWithParam<MulticlassNmsParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params);
+        inputData = {params.boxes.data, params.scores.data};
+        refOutData = {params.expectedSelectedScores.data,
+                      params.expectedSelectedIndices.data,
+                      params.expectedValidOutputs.data};
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<MulticlassNmsParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "bType=" << param.boxes.type;
+        result << "_bShape=" << param.boxes.shape;
+        result << "_sType=" << param.scores.type;
+        result << "_sShape=" << param.scores.shape;
+        result << "_escType=" << param.expectedSelectedScores.type;
+        result << "_escShape=" << param.expectedSelectedScores.shape;
+        result << "_esiType=" << param.expectedSelectedIndices.type;
+        result << "_esiShape=" << param.expectedSelectedIndices.shape;
+        result << "_evoType=" << param.expectedValidOutputs.type;
+        if (param.testcaseName != "") {
+            result << "_evoShape=" << param.expectedValidOutputs.shape;
+            result << "_=" << param.testcaseName;
+        } else {
+            result << "_evoShape=" << param.expectedValidOutputs.shape;
+        }
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const MulticlassNmsParams& params) {
+        op::v8::MulticlassNms::Attributes attrs;
+        attrs.nms_top_k = params.nms_top_k;
+        attrs.iou_threshold = params.iou_threshold;
+        attrs.score_threshold = params.score_threshold;
+        attrs.sort_result_type = params.sort_result_type;
+        attrs.keep_top_k = params.keep_top_k;
+        attrs.background_class = params.background_class;
+        attrs.nms_eta = params.nms_eta;
+        attrs.output_type = params.output_type;
+        attrs.sort_result_across_batch = params.sort_result_across_batch;
+        attrs.normalized = params.normalized;
+        const auto boxes = std::make_shared<op::v0::Parameter>(params.boxes.type, PartialShape::dynamic());
+        const auto scores = std::make_shared<op::v0::Parameter>(params.scores.type, PartialShape::dynamic());
+        const auto nms = std::make_shared<op::v8::MulticlassNms>(boxes, scores, attrs);
+        const auto f = std::make_shared<Function>(nms->outputs(), ParameterVector{boxes, scores});
+        return f;
+    }
+};
+
+TEST_P(ReferenceMulticlassNmsTest, CompareWithRefs) {
+    Exec();
+}
+
+template <element::Type_t ET, element::Type_t ET_TH, element::Type_t ET_IND>
+std::vector<MulticlassNmsParams> generateParams() {
+    using T = typename element_type_traits<ET>::value_type;
+    using T_TH = typename element_type_traits<ET_TH>::value_type;
+    using T_IND = typename element_type_traits<ET_IND>::value_type;
+    std::vector<MulticlassNmsParams> params {
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::SCORE,   // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {4, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 1.00, 0.95,
+                0.00, 0.00, 1.00, 1.00,  0.00, 0.90,  0.00, 0.00,
+                1.00, 1.00, 1.00, 0.80,  0.00, 10.00, 1.00, 11.00}),                    // expected_selected_scores
+            Tensor(ET_IND, {4, 1}, std::vector<T_IND>{3, 0, 0, 3}),                     // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{4}),                                 // expected_valid_outputs
+            "multiclass_nms_by_score"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::CLASSID, // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {4, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90,
+                0.00, 0.00, 1.00, 1.00,  1.00, 0.95,  0.00, 0.00,
+                1.00, 1.00, 1.00, 0.80,  0.00, 10.00, 1.00, 11.00}),                    // expected_selected_scores
+            Tensor(ET_IND, {4, 1}, std::vector<T_IND>{3, 0, 0, 3}),                     // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{4}),                                 // expected_valid_outputs
+            "multiclass_nms_by_class_id"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::CLASSID, // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {4, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90,
+                0.00, 0.00, 1.00, 1.00,  1.00, 0.95,  0.00, 0.00,
+                1.00, 1.00, 1.00, 0.80,  0.00, 10.00, 1.00, 11.00}),                    // expected_selected_scores
+            Tensor(ET_IND, {4, 1}, std::vector<T_IND>{3, 0, 0, 3}),                     // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{4}),                                 // expected_valid_outputs
+            "multiclass_nms_output_type_i32"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::SCORE,   // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {2, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3,
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {8, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 1.00, 0.95, 0.00, 0.00,  1.00, 1.00,
+                0.00, 0.90, 0.00, 0.00,  1.00, 1.00,  1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 1.00, 0.95, 0.00, 0.00,  1.00, 1.00,
+                0.00, 0.90, 0.00, 0.00,  1.00, 1.00,  1.00, 0.80, 0.00, 10.00, 1.00, 11.00}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {8, 1}, std::vector<T_IND>{3, 0, 0, 3, 9, 6, 6, 9}),         // expected_selected_indices
+            Tensor(ET_IND, {2}, std::vector<T_IND>{4, 4}),                              // expected_valid_outputs
+            "multiclass_nms_two_batches_two_classes_by_score"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::CLASSID, // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {2, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3,
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {8, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00, 0.00,  1.00, 1.00,
+                1.00, 0.95, 0.00, 0.00,  1.00, 1.00,  1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00, 0.00,  1.00, 1.00,
+                1.00, 0.95, 0.00, 0.00,  1.00, 1.00,  1.00, 0.80, 0.00, 10.00, 1.00, 11.00}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {8, 1}, std::vector<T_IND>{3, 0, 0, 3, 9, 6, 6, 9}),         // expected_selected_indices
+            Tensor(ET_IND, {2}, std::vector<T_IND>{4, 4}),                              // expected_valid_outputs
+            "multiclass_nms_two_batches_two_classes_by_class_id"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::SCORE,   // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            true,                                           // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {2, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3,
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {8, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00,
+                1.00, 0.95, 0.00, 0.00,  1.00, 1.00,
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00,
+                1.00, 0.95, 0.00, 0.00,  1.00, 1.00,
+                0.00, 0.90, 0.00, 0.00,  1.00, 1.00,
+                0.00, 0.90, 0.00, 0.00,  1.00, 1.00,
+                1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
+                1.00, 0.80, 0.00, 10.00, 1.00, 11.00}),                                 // expected_selected_scores
+            Tensor(ET_IND, {8, 1}, std::vector<T_IND>{3, 0, 9, 6, 0, 6, 3, 9}),         // expected_selected_indices
+            Tensor(ET_IND, {2}, std::vector<T_IND>{4, 4}),                              // expected_valid_outputs
+            "multiclass_nms_two_batches_two_classes_by_score_cross_batch"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::CLASSID, // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            true,                                           // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {2, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3,
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {8, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00,
+                0.00, 0.90, 0.00, 0.00,  1.00, 1.00,
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00,
+                0.00, 0.90, 0.00, 0.00,  1.00, 1.00,
+                1.00, 0.95, 0.00, 0.00,  1.00, 1.00,
+                1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
+                1.00, 0.95, 0.00, 0.00,  1.00, 1.00,
+                1.00, 0.80, 0.00, 10.00, 1.00, 11.00}),                                 // expected_selected_scores
+            Tensor(ET_IND, {8, 1}, std::vector<T_IND>{3, 0, 9, 6, 0, 3, 6, 9}),         // expected_selected_indices
+            Tensor(ET_IND, {2}, std::vector<T_IND>{4, 4}),                              // expected_valid_outputs
+            "multiclass_nms_two_batches_two_classes_by_class_id_cross_batch"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::SCORE,   // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                1.0, 1.0,  0.0, 0.0,  0.0, 0.1,  1.0, 1.1,  0.0, 0.9,   1.0, -0.1,
+                0.0, 10.0, 1.0, 11.0, 1.0, 10.1, 0.0, 11.1, 1.0, 101.0, 0.0, 100.0}),   // boxes
+            Tensor(ET_TH, {1, 1, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),                                       // scores
+            Tensor(ET_TH, {3, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 1.00, 1.00, 0.00, 0.00, 0.00, 0.75, 0.00, 0.10, 1.00, 1.10}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {3, 1}, std::vector<T_IND>{3, 0, 1}),                        // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{3}),                                 // expected_valid_outputs
+            "multiclass_nms_flipped_coordinates"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::SCORE,   // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {1, 10, 4}, std::vector<T>{
+                0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0,
+                1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0,
+                0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0}),           // boxes
+            Tensor(ET_TH, {1, 1, 10}, std::vector<T_TH>{
+                0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9}),                     // scores
+            Tensor(ET_TH, {1, 6}, std::vector<T_TH>{
+                0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {1, 1}, std::vector<T_IND>{0}),                              // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{1}),                                 // expected_valid_outputs
+            "multiclass_nms_identical_boxes"),
+        MulticlassNmsParams(
+            2,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::SCORE,   // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 1, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),                                       // scores
+            Tensor(ET_TH, {2, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {2, 1}, std::vector<T_IND>{3, 0}),                           // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{2}),                                 // expected_valid_outputs
+            "multiclass_nms_limit_output_size"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::SCORE,   // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {1, 1, 4}, std::vector<T>{0.0, 0.0, 1.0, 1.0}),                  // boxes
+            Tensor(ET_TH, {1, 1, 1}, std::vector<T_TH>{0.9}),                           // scores
+            Tensor(ET_TH, {1, 6}, std::vector<T_TH>{
+                0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),                                   // expected_selected_scores
+            Tensor(ET_IND, {1, 1}, std::vector<T_IND>{0}),                              // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{1}),                                 // expected_valid_outputs
+            "multiclass_nms_single_box"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.2f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::SCORE,   // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 1, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),                                       // scores
+            Tensor(ET_TH, {2, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {2, 1}, std::vector<T_IND>{3, 0}),                           // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{2}),                                 // expected_valid_outputs
+            "multiclass_nms_by_IOU"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.95f,                                          // score_threshold
+            op::v8::MulticlassNms::SortResultType::SCORE,   // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 1, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),                                       // scores
+            Tensor(ET_TH, {1, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00}),                                 // expected_selected_scores
+            Tensor(ET_IND, {1, 1}, std::vector<T_IND>{3}),                              // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{1}),                                 // expected_valid_outputs
+            "multiclass_nms_by_IOU_and_scores"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            2.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::SCORE, // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {1, 1, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),                                       // scores
+            Tensor(ET_TH, {0, 6}, std::vector<T_TH>{}),                                 // expected_selected_scores
+            Tensor(ET_IND, {0, 1}, std::vector<T_IND>{}),                               // expected_selected_indices
+            Tensor(ET_IND, {1}, std::vector<T_IND>{0}),                                 // expected_valid_outputs
+            "multiclass_nms_no_output"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::CLASSID, // sort_result_type
+            -1,                                             // keep_top_k
+            0,                                              // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {2, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3,
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {4, 6}, std::vector<T_TH>{
+                1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
+                1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {4, 1}, std::vector<T_IND>{0, 3, 6, 9}),                     // expected_selected_indices
+            Tensor(ET_IND, {2}, std::vector<T_IND>{2, 2}),                              // expected_valid_outputs
+            "multiclass_nms_by_background"),
+        MulticlassNmsParams(
+            3,                                              // nms_top_k
+            0.5f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::CLASSID, // sort_result_type
+            3,                                              // keep_top_k
+            -1,                                             // background_class
+            1.0f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {2, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3,
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {6, 6}, std::vector<T_TH>{
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00,
+                0.00, 1.00, 1.00, 1.00,  0.95, 0.00,  0.00, 1.00, 1.00,
+                0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00,
+                0.00, 1.00, 1.00, 1.00,  0.95, 0.00,  0.00, 1.00, 1.00}),               // expected_selected_scores
+            Tensor(ET_IND, {6, 1}, std::vector<T_IND>{3, 0, 0, 9, 6, 6}),               // expected_selected_indices
+            Tensor(ET_IND, {2}, std::vector<T_IND>{3, 3}),                              // expected_valid_outputs
+            "multiclass_nms_by_keep_top_k"),
+        MulticlassNmsParams(
+            -1,                                             // nms_top_k
+            1.0f,                                           // iou_threshold
+            0.0f,                                           // score_threshold
+            op::v8::MulticlassNms::SortResultType::CLASSID, // sort_result_type
+            -1,                                             // keep_top_k
+            -1,                                             // background_class
+            0.1f,                                           // nms_eta
+            ET_IND,                                         // output_type
+            false,                                          // sort_result_across_batch
+            true,                                           // normalized
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),   // boxes
+            Tensor(ET_TH, {2, 2, 6}, std::vector<T_TH>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3,
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.95, 0.75, 0.6, 0.80, 0.5, 0.3}),      // scores
+            Tensor(ET_TH, {12, 6}, std::vector<T_TH>{
+                0.00,   0.95, 0.00,   10.00,  1.00, 11.00,  0.00,   0.90, 0.00,   0.00,   1.00, 1.00,  0.00,  0.30, 0.00,
+                100.00, 1.00, 101.00, 1.00,   0.95, 0.00,   0.00,   1.00, 1.00,   1.00,   0.80, 0.00,  10.00, 1.00, 11.00,
+                1.00,   0.30, 0.00,   100.00, 1.00, 101.00, 0.00,   0.95, 0.00,   10.00,  1.00, 11.00, 0.00,  0.90, 0.00,
+                0.00,   1.00, 1.00,   0.00,   0.30, 0.00,   100.00, 1.00, 101.00, 1.00,   0.95, 0.00,  0.00,  1.00, 1.00,
+                1.00,   0.80, 0.00,   10.00,  1.00, 11.00,  1.00,   0.30, 0.00,   100.00, 1.00, 101.00}),
+                                                                                        // expected_selected_scores
+            Tensor(ET_IND, {12, 1}, std::vector<T_IND>{
+                3, 0, 5, 0, 3, 5, 9, 6, 11, 6, 9, 11}),                                 // expected_selected_indices
+            Tensor(ET_IND, {2}, std::vector<T_IND>{6, 6}),                              // expected_valid_outputs
+            "multiclass_nms_by_nms_eta"),
+    };
+    return params;
+}
+
+std::vector<MulticlassNmsParams> generateCombinedParams() {
+    const std::vector<std::vector<MulticlassNmsParams>> generatedParams {
+        generateParams<element::Type_t::bf16, element::Type_t::f32, element::Type_t::i32>(),
+        generateParams<element::Type_t::f16, element::Type_t::f32, element::Type_t::i32>(),
+        generateParams<element::Type_t::f32, element::Type_t::f32, element::Type_t::i32>(),
+        generateParams<element::Type_t::bf16, element::Type_t::f32, element::Type_t::i64>(),
+        generateParams<element::Type_t::f16, element::Type_t::f32, element::Type_t::i64>(),
+        generateParams<element::Type_t::f32, element::Type_t::f32, element::Type_t::i64>(),
+    };
+    std::vector<MulticlassNmsParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_MulticlassNms_With_Hardcoded_Refs, ReferenceMulticlassNmsTest,
+    testing::ValuesIn(generateCombinedParams()), ReferenceMulticlassNmsTest::getTestCaseName);
+} // namespace
--- a/docs/template_plugin/tests/functional/op_reference/non_max_suppression.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/non_max_suppression.cpp
@ -0,0 +1,489 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "openvino/op/non_max_suppression.hpp"
+#include "openvino/op/constant.hpp"
+#include "base_reference_test.hpp"
+
+using namespace reference_tests;
+using namespace ov;
+
+namespace {
+struct NonMaxSuppressionParams {
+    NonMaxSuppressionParams(
+        const Tensor& boxes, const Tensor& scores,
+        const Tensor& maxOutputBoxesPerClass, const Tensor& iouThreshold, const Tensor& scoreThreshold,
+        const Tensor& softNmsSigma, const op::v5::NonMaxSuppression::BoxEncodingType boxEncoding,
+        const Tensor& expectedSelectedIndices, const Tensor& expectedSelectedScores,
+        const Tensor& expectedValidOutputs, const std::string& testcaseName = "") :
+        boxes(boxes), scores(scores),
+        maxOutputBoxesPerClass(maxOutputBoxesPerClass), iouThreshold(iouThreshold), scoreThreshold(scoreThreshold),
+        softNmsSigma(softNmsSigma), boxEncoding(boxEncoding),
+        expectedSelectedIndices(expectedSelectedIndices), expectedSelectedScores(expectedSelectedScores),
+        expectedValidOutputs(expectedValidOutputs), testcaseName(testcaseName) {}
+
+    Tensor boxes;
+    Tensor scores;
+    Tensor maxOutputBoxesPerClass;
+    Tensor iouThreshold;
+    Tensor scoreThreshold;
+    Tensor softNmsSigma;
+    op::v5::NonMaxSuppression::BoxEncodingType boxEncoding;
+    Tensor expectedSelectedIndices;
+    Tensor expectedSelectedScores;
+    Tensor expectedValidOutputs;
+    std::string testcaseName;
+};
+
+class ReferenceNonMaxSuppressionTest : public testing::TestWithParam<NonMaxSuppressionParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params);
+        inputData = {params.boxes.data, params.scores.data};
+        refOutData = {params.expectedSelectedIndices.data,
+                      params.expectedSelectedScores.data,
+                      params.expectedValidOutputs.data};
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<NonMaxSuppressionParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "bType=" << param.boxes.type;
+        result << "_bShape=" << param.boxes.shape;
+        result << "_sType=" << param.scores.type;
+        result << "_sShape=" << param.scores.shape;
+        result << "_escType=" << param.expectedSelectedScores.type;
+        result << "_escShape=" << param.expectedSelectedScores.shape;
+        result << "_esiType=" << param.expectedSelectedIndices.type;
+        result << "_esiShape=" << param.expectedSelectedIndices.shape;
+        result << "_evoType=" << param.expectedValidOutputs.type;
+        if (param.testcaseName != "") {
+            result << "_evoShape=" << param.expectedValidOutputs.shape;
+            result << "_=" << param.testcaseName;
+        } else {
+            result << "_evoShape=" << param.expectedValidOutputs.shape;
+        }
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const NonMaxSuppressionParams& params) {
+        const auto boxes = std::make_shared<op::v0::Parameter>(params.boxes.type, params.boxes.shape);
+        const auto scores = std::make_shared<op::v0::Parameter>(params.scores.type, params.scores.shape);
+        const auto max_output_boxes_per_class = std::make_shared<op::v0::Constant>(
+            params.maxOutputBoxesPerClass.type, params.maxOutputBoxesPerClass.shape, params.maxOutputBoxesPerClass.data.data());
+        const auto iou_threshold = std::make_shared<op::v0::Constant>(
+            params.iouThreshold.type, params.iouThreshold.shape, params.iouThreshold.data.data());
+        const auto score_threshold = std::make_shared<op::v0::Constant>(
+            params.scoreThreshold.type, params.scoreThreshold.shape, params.scoreThreshold.data.data());
+        const auto soft_nms_sigma = std::make_shared<op::v0::Constant>(
+            params.softNmsSigma.type, params.softNmsSigma.shape, params.softNmsSigma.data.data());
+        const auto nms = std::make_shared<op::v5::NonMaxSuppression>(boxes,
+                                                                     scores,
+                                                                     max_output_boxes_per_class,
+                                                                     iou_threshold,
+                                                                     score_threshold,
+                                                                     soft_nms_sigma,
+                                                                     params.boxEncoding,
+                                                                     false);
+        const auto f = std::make_shared<Function>(nms->outputs(), ParameterVector{boxes, scores});
+        return f;
+    }
+};
+
+class ReferenceNonMaxSuppressionTestWithoutConstants : public ReferenceNonMaxSuppressionTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params);
+        inputData = {params.boxes.data, params.scores.data, params.maxOutputBoxesPerClass.data,
+                     params.iouThreshold.data, params.scoreThreshold.data, params.softNmsSigma.data};
+        refOutData = {params.expectedSelectedIndices.data,
+                      params.expectedSelectedScores.data,
+                      params.expectedValidOutputs.data};
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<NonMaxSuppressionParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "bType=" << param.boxes.type;
+        result << "_bShape=" << param.boxes.shape;
+        result << "_sType=" << param.scores.type;
+        result << "_sShape=" << param.scores.shape;
+        result << "_escType=" << param.expectedSelectedScores.type;
+        result << "_escShape=" << param.expectedSelectedScores.shape;
+        result << "_esiType=" << param.expectedSelectedIndices.type;
+        result << "_esiShape=" << param.expectedSelectedIndices.shape;
+        result << "_evoType=" << param.expectedValidOutputs.type;
+        if (param.testcaseName != "") {
+            result << "_evoShape=" << param.expectedValidOutputs.shape;
+            result << "_=" << param.testcaseName;
+        } else {
+            result << "_evoShape=" << param.expectedValidOutputs.shape;
+        }
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const NonMaxSuppressionParams& params) {
+        const auto boxes = std::make_shared<op::v0::Parameter>(params.boxes.type, params.boxes.shape);
+        const auto scores = std::make_shared<op::v0::Parameter>(params.scores.type, params.scores.shape);
+        const auto max_output_boxes_per_class = std::make_shared<op::v0::Parameter>(
+            params.maxOutputBoxesPerClass.type, params.maxOutputBoxesPerClass.shape);
+        const auto iou_threshold = std::make_shared<op::v0::Parameter>(
+            params.iouThreshold.type, params.iouThreshold.shape);
+        const auto score_threshold = std::make_shared<op::v0::Parameter>(
+            params.scoreThreshold.type, params.scoreThreshold.shape);
+        const auto soft_nms_sigma = std::make_shared<op::v0::Parameter>(
+            params.softNmsSigma.type, params.softNmsSigma.shape);
+        const auto nms = std::make_shared<op::v5::NonMaxSuppression>(boxes,
+                                                                     scores,
+                                                                     max_output_boxes_per_class,
+                                                                     iou_threshold,
+                                                                     score_threshold,
+                                                                     soft_nms_sigma,
+                                                                     params.boxEncoding,
+                                                                     false);
+        const auto f = std::make_shared<Function>(nms->outputs(),
+                                                  ParameterVector{boxes, scores, max_output_boxes_per_class,
+                                                                  iou_threshold, score_threshold, soft_nms_sigma});
+        return f;
+    }
+};
+
+TEST_P(ReferenceNonMaxSuppressionTest, CompareWithRefs) {
+    Exec();
+}
+
+TEST_P(ReferenceNonMaxSuppressionTestWithoutConstants, CompareWithRefs) {
+    Exec();
+}
+
+template <element::Type_t ET, element::Type_t ET_BOX, element::Type_t ET_TH, element::Type_t ET_IND>
+std::vector<NonMaxSuppressionParams> generateParams() {
+    using T = typename element_type_traits<ET>::value_type;
+    using T_BOX = typename element_type_traits<ET_BOX>::value_type;
+    using T_TH = typename element_type_traits<ET_TH>::value_type;
+    using T_IND = typename element_type_traits<ET_IND>::value_type;
+    std::vector<NonMaxSuppressionParams> params {
+        NonMaxSuppressionParams(
+            // boxes
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.5, 0.5,  1.0, 1.0, 0.5, 0.6,  1.0, 1.0, 0.5, 0.4,   1.0, 1.0,
+                0.5, 10.5, 1.0, 1.0, 0.5, 10.6, 1.0, 1.0, 0.5, 100.5, 1.0, 1.0}),
+            // scores
+            Tensor(ET, {1, 1, 6}, std::vector<T>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),
+            // max_output_boxes_per_class
+            Tensor(ET_BOX, {}, std::vector<T_BOX>{3}),
+            // iou_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.5f}),
+            // score_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // soft_nms_sigma
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // box_encoding
+            op::v5::NonMaxSuppression::BoxEncodingType::CENTER,
+            // selected_indices
+            Tensor(ET_IND, {3, 3}, std::vector<T_IND>{
+                0, 0, 3, 0, 0, 0, 0, 0, 5}),
+            // selected_scores
+            Tensor(ET_TH, {3, 3}, std::vector<T_TH>{
+                0.0, 0.0, 0.95, 0.0, 0.0, 0.9, 0.0, 0.0, 0.3}),
+            // valid_outputs
+            Tensor(ET_IND, {1}, std::vector<T_IND>{3}),
+            "nonmaxsuppression_center_point_box_format"),
+        NonMaxSuppressionParams(
+            // boxes
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                1.0, 1.0,  0.0, 0.0,  0.0, 0.1,  1.0, 1.1,  0.0, 0.9,   1.0, -0.1,
+                0.0, 10.0, 1.0, 11.0, 1.0, 10.1, 0.0, 11.1, 1.0, 101.0, 0.0, 100.0}),
+            // scores
+            Tensor(ET, {1, 1, 6}, std::vector<T>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),
+            // max_output_boxes_per_class
+            Tensor(ET_BOX, {}, std::vector<T_BOX>{3}),
+            // iou_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.5f}),
+            // score_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // soft_nms_sigma
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // box_encoding
+            op::v5::NonMaxSuppression::BoxEncodingType::CORNER,
+            // selected_indices
+            Tensor(ET_IND, {3, 3}, std::vector<T_IND>{
+                0, 0, 3, 0, 0, 0, 0, 0, 5}),
+            // selected_scores
+            Tensor(ET_TH, {3, 3}, std::vector<T_TH>{
+                0.0, 0.0, 0.95, 0.0, 0.0, 0.9, 0.0, 0.0, 0.3}),
+            // valid_outputs
+            Tensor(ET_IND, {1}, std::vector<T_IND>{3}),
+            "nonmaxsuppression_flipped_coordinates"),
+        NonMaxSuppressionParams(
+            // boxes
+            Tensor(ET, {1, 10, 4}, std::vector<T>{
+                0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0,
+                1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0,
+                0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0}),
+            // scores
+            Tensor(ET, {1, 1, 10}, std::vector<T>{
+                0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9}),
+            // max_output_boxes_per_class
+            Tensor(ET_BOX, {}, std::vector<T_BOX>{3}),
+            // iou_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.5f}),
+            // score_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // soft_nms_sigma
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // box_encoding
+            op::v5::NonMaxSuppression::BoxEncodingType::CORNER,
+            // selected_indices
+            Tensor(ET_IND, {1, 3}, std::vector<T_IND>{0, 0, 0}),
+            // selected_scores
+            Tensor(ET_TH, {1, 3}, std::vector<T_TH>{0.0, 0.0, 0.9}),
+            // valid_outputs
+            Tensor(ET_IND, {1}, std::vector<T_IND>{1}),
+            "nonmaxsuppression_identical_boxes"),
+        NonMaxSuppressionParams(
+            // boxes
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),
+            // scores
+            Tensor(ET, {1, 1, 6}, std::vector<T>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),
+            // max_output_boxes_per_class
+            Tensor(ET_BOX, {}, std::vector<T_BOX>{2}),
+            // iou_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.5f}),
+            // score_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // soft_nms_sigma
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // box_encoding
+            op::v5::NonMaxSuppression::BoxEncodingType::CORNER,
+            // selected_indices
+            Tensor(ET_IND, {2, 3}, std::vector<T_IND>{0, 0, 3, 0, 0, 0}),
+            // selected_scores
+            Tensor(ET_TH, {2, 3}, std::vector<T_TH>{
+                0.0, 0.0, 0.95, 0.0, 0.0, 0.9}),
+            // valid_outputs
+            Tensor(ET_IND, {1}, std::vector<T_IND>{2}),
+            "nonmaxsuppression_limit_output_size"),
+        NonMaxSuppressionParams(
+            // boxes
+            Tensor(ET, {1, 1, 4}, std::vector<T>{0.0, 0.0, 1.0, 1.0}),
+            // scores
+            Tensor(ET, {1, 1, 1}, std::vector<T>{0.9}),
+            // max_output_boxes_per_class
+            Tensor(ET_BOX, {}, std::vector<T_BOX>{3}),
+            // iou_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.5f}),
+            // score_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // soft_nms_sigma
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // box_encoding
+            op::v5::NonMaxSuppression::BoxEncodingType::CORNER,
+            // selected_indices
+            Tensor(ET_IND, {1, 3}, std::vector<T_IND>{0, 0, 0}),
+            // selected_scores
+            Tensor(ET_TH, {1, 3}, std::vector<T_TH>{0.0, 0.0, 0.9}),
+            // valid_outputs
+            Tensor(ET_IND, {1}, std::vector<T_IND>{1}),
+            "nonmaxsuppression_single_box"),
+        NonMaxSuppressionParams(
+            // boxes
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),
+            // scores
+            Tensor(ET, {1, 1, 6}, std::vector<T>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),
+            // max_output_boxes_per_class
+            Tensor(ET_BOX, {}, std::vector<T_BOX>{3}),
+            // iou_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.5f}),
+            // score_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // soft_nms_sigma
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // box_encoding
+            op::v5::NonMaxSuppression::BoxEncodingType::CORNER,
+            // selected_indices
+            Tensor(ET_IND, {3, 3}, std::vector<T_IND>{
+                0, 0, 3, 0, 0, 0, 0, 0, 5}),
+            // selected_scores
+            Tensor(ET_TH, {3, 3}, std::vector<T_TH>{
+                0.0, 0.0, 0.95, 0.0, 0.0, 0.9, 0.0, 0.0, 0.3}),
+            // valid_outputs
+            Tensor(ET_IND, {1}, std::vector<T_IND>{3}),
+            "nonmaxsuppression_suppress_by_IOU"),
+        NonMaxSuppressionParams(
+            // boxes
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),
+            // scores
+            Tensor(ET, {1, 1, 6}, std::vector<T>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),
+            // max_output_boxes_per_class
+            Tensor(ET_BOX, {}, std::vector<T_BOX>{3}),
+            // iou_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.5f}),
+            // score_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.4f}),
+            // soft_nms_sigma
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // box_encoding
+            op::v5::NonMaxSuppression::BoxEncodingType::CORNER,
+            // selected_indices
+            Tensor(ET_IND, {2, 3}, std::vector<T_IND>{
+                0, 0, 3, 0, 0, 0}),
+            // selected_scores
+            Tensor(ET_TH, {2, 3}, std::vector<T_TH>{
+                0.0, 0.0, 0.95, 0.0, 0.0, 0.9}),
+            // valid_outputs
+            Tensor(ET_IND, {1}, std::vector<T_IND>{2}),
+            "nonmaxsuppression_suppress_by_IOU_and_scores"),
+        NonMaxSuppressionParams(
+            // boxes
+            Tensor(ET, {2, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0,
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),
+            // scores
+            Tensor(ET, {2, 1, 6}, std::vector<T>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),
+            // max_output_boxes_per_class
+            Tensor(ET_BOX, {}, std::vector<T_BOX>{2}),
+            // iou_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.5f}),
+            // score_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // soft_nms_sigma
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // box_encoding
+            op::v5::NonMaxSuppression::BoxEncodingType::CORNER,
+            // selected_indices
+            Tensor(ET_IND, {4, 3}, std::vector<T_IND>{
+                0, 0, 3, 0, 0, 0, 1, 0, 3, 1, 0, 0}),
+            // selected_scores
+            Tensor(ET_TH, {4, 3}, std::vector<T_TH>{
+                0.0, 0.0, 0.95, 0.0, 0.0, 0.9, 1.0, 0.0, 0.95, 1.0, 0.0, 0.9}),
+            // valid_outputs
+            Tensor(ET_IND, {1}, std::vector<T_IND>{4}),
+            "nonmaxsuppression_two_batches"),
+        NonMaxSuppressionParams(
+            // boxes
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0, 0.0,  1.0, 1.0,  0.0, 0.1,  1.0, 1.1,  0.0, -0.1,  1.0, 0.9,
+                0.0, 10.0, 1.0, 11.0, 0.0, 10.1, 1.0, 11.1, 0.0, 100.0, 1.0, 101.0}),
+            // scores
+            Tensor(ET, {1, 2, 6}, std::vector<T>{
+                0.9, 0.75, 0.6, 0.95, 0.5, 0.3, 0.9, 0.75, 0.6, 0.95, 0.5, 0.3}),
+            // max_output_boxes_per_class
+            Tensor(ET_BOX, {}, std::vector<T_BOX>{2}),
+            // iou_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.5f}),
+            // score_threshold
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // soft_nms_sigma
+            Tensor(ET_TH, {}, std::vector<T_TH>{0.0f}),
+            // box_encoding
+            op::v5::NonMaxSuppression::BoxEncodingType::CORNER,
+            // selected_indices
+            Tensor(ET_IND, {4, 3}, std::vector<T_IND>{
+                0, 0, 3, 0, 0, 0, 0, 1, 3, 0, 1, 0}),
+            // selected_scores
+            Tensor(ET_TH, {4, 3}, std::vector<T_TH>{
+                0.0, 0.0, 0.95, 0.0, 0.0, 0.9, 0.0, 1.0, 0.95, 0.0, 1.0, 0.9}),
+            // valid_outputs
+            Tensor(ET_IND, {1}, std::vector<T_IND>{4}),
+            "nonmaxsuppression_two_classes"),
+    };
+    return params;
+}
+
+std::vector<NonMaxSuppressionParams> generateCombinedParams() {
+    const std::vector<std::vector<NonMaxSuppressionParams>> generatedParams {
+        generateParams<element::Type_t::bf16, element::Type_t::i32, element::Type_t::f32, element::Type_t::i32>(),
+        generateParams<element::Type_t::f16, element::Type_t::i32, element::Type_t::f32, element::Type_t::i32>(),
+        generateParams<element::Type_t::f32, element::Type_t::i32, element::Type_t::f32, element::Type_t::i32>(),
+        generateParams<element::Type_t::bf16, element::Type_t::i32, element::Type_t::f32, element::Type_t::i64>(),
+        generateParams<element::Type_t::f16, element::Type_t::i32, element::Type_t::f32, element::Type_t::i64>(),
+        generateParams<element::Type_t::f32, element::Type_t::i32, element::Type_t::f32, element::Type_t::i64>(),
+    };
+    std::vector<NonMaxSuppressionParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+template <element::Type_t ET, element::Type_t ET_BOX, element::Type_t ET_TH, element::Type_t ET_IND>
+std::vector<NonMaxSuppressionParams> generateParamsWithoutConstants() {
+    using T = typename element_type_traits<ET>::value_type;
+    using T_BOX = typename element_type_traits<ET_BOX>::value_type;
+    using T_TH = typename element_type_traits<ET_TH>::value_type;
+    using T_IND = typename element_type_traits<ET_IND>::value_type;
+    std::vector<NonMaxSuppressionParams> params {
+        NonMaxSuppressionParams(
+            // boxes
+            Tensor(ET, {1, 6, 4}, std::vector<T>{
+                0.0f, 0.0f,  1.0f, 1.0f,  0.0f, 0.1f,  1.0f, 1.1f,  0.0f, -0.1f,  1.0f, 0.9f,
+                0.0f, 10.0f, 1.0f, 11.0f, 0.0f, 10.1f, 1.0f, 11.1f, 0.0f, 100.0f, 1.0f, 101.0f}),
+            // scores
+            Tensor(ET, {1, 1, 6}, std::vector<T>{
+                0.9f, 0.75f, 0.6f, 0.95f, 0.5f, 0.3f}),
+            // max_output_boxes_per_class
+            Tensor(ET_BOX, {1}, std::vector<T_BOX>{1}),
+            // iou_threshold
+            Tensor(ET_TH, {1}, std::vector<T_TH>{0.4f}),
+            // score_threshold
+            Tensor(ET_TH, {1}, std::vector<T_TH>{0.2f}),
+            // soft_nms_sigma
+            Tensor(ET_TH, {1}, std::vector<T_TH>{0.0f}),
+            // box_encoding
+            op::v5::NonMaxSuppression::BoxEncodingType::CORNER,
+            // selected_indices
+            Tensor(ET_IND, {1, 3}, std::vector<T_IND>{0, 0, 3}),
+            // selected_scores
+            Tensor(ET_TH, {1, 3}, std::vector<T_TH>{0.0f, 0.0f, 0.95f}),
+            // valid_outputs
+            Tensor(ET_IND, {1}, std::vector<T_IND>{1}),
+            "nonmaxsuppression_suppress_by_IOU_and_scores_without_constants"),
+    };
+    return params;
+}
+
+std::vector<NonMaxSuppressionParams> generateCombinedParamsWithoutConstants() {
+    const std::vector<std::vector<NonMaxSuppressionParams>> generatedParams {
+        generateParamsWithoutConstants<element::Type_t::bf16, element::Type_t::i32, element::Type_t::f32, element::Type_t::i32>(),
+        generateParamsWithoutConstants<element::Type_t::f16, element::Type_t::i32, element::Type_t::f32, element::Type_t::i32>(),
+        generateParamsWithoutConstants<element::Type_t::f32, element::Type_t::i32, element::Type_t::f32, element::Type_t::i32>(),
+        generateParamsWithoutConstants<element::Type_t::bf16, element::Type_t::i32, element::Type_t::f32, element::Type_t::i64>(),
+        generateParamsWithoutConstants<element::Type_t::f16, element::Type_t::i32, element::Type_t::f32, element::Type_t::i64>(),
+        generateParamsWithoutConstants<element::Type_t::f32, element::Type_t::i32, element::Type_t::f32, element::Type_t::i64>(),
+    };
+    std::vector<NonMaxSuppressionParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_NonMaxSuppression_With_Hardcoded_Refs, ReferenceNonMaxSuppressionTest,
+    testing::ValuesIn(generateCombinedParams()), ReferenceNonMaxSuppressionTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_NonMaxSuppression_With_Hardcoded_Refs, ReferenceNonMaxSuppressionTestWithoutConstants,
+    testing::ValuesIn(generateCombinedParamsWithoutConstants()), ReferenceNonMaxSuppressionTest::getTestCaseName);
+} // namespace
--- a/docs/template_plugin/tests/functional/op_reference/prior_box.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/prior_box.cpp
@ -2,11 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 //

+#include "openvino/op/prior_box.hpp"
+
 #include <gtest/gtest.h>

-#include "openvino/op/prior_box.hpp"
 #include "base_reference_test.hpp"
-#include "openvino/opsets/opset1.hpp"
+#include "openvino/op/constant.hpp"

 using namespace reference_tests;
 using namespace ov;
@ -17,9 +18,11 @@ struct PriorBoxParams {
    PriorBoxParams(const std::vector<float>& min_size,
                   const std::vector<float>& aspect_ratio,
                   const bool scale_all_size,
-                   const ov::Shape& layerShapeShape, const ov::Shape& imageShapeShape,
+                   const ov::Shape& layerShapeShape,
+                   const ov::Shape& imageShapeShape,
                   const ov::element::Type& iType,
-                   const std::vector<IT>& layerShapeValues, const std::vector<IT>& imageShapeValues,
+                   const std::vector<IT>& layerShapeValues,
+                   const std::vector<IT>& imageShapeValues,
                   const std::vector<float>& oValues,
                   const std::string& testcaseName = "")
        : layerShapeShape(layerShapeShape),
@ -30,10 +33,10 @@ struct PriorBoxParams {
          imageShapeData(CreateTensor(iType, imageShapeValues)),
          refData(CreateTensor(outType, oValues)),
          testcaseName(testcaseName) {
-              attrs.min_size = min_size;
-              attrs.aspect_ratio = aspect_ratio;
-              attrs.scale_all_sizes = scale_all_size;
-          }
+        attrs.min_size = min_size;
+        attrs.aspect_ratio = aspect_ratio;
+        attrs.scale_all_sizes = scale_all_size;
+    }

    ov::op::v0::PriorBox::Attributes attrs;
    ov::Shape layerShapeShape;
@ -46,6 +49,46 @@ struct PriorBoxParams {
    std::string testcaseName;
 };

+struct PriorBoxV8Params {
+    template <class IT>
+    PriorBoxV8Params(const std::vector<float>& min_size,
+                     const std::vector<float>& max_size,
+                     const std::vector<float>& aspect_ratio,
+                     const bool scale_all_size,
+                     const bool min_max_aspect_ratios_order,
+                     const ov::Shape& layerShapeShape,
+                     const ov::Shape& imageShapeShape,
+                     const ov::element::Type& iType,
+                     const std::vector<IT>& layerShapeValues,
+                     const std::vector<IT>& imageShapeValues,
+                     const std::vector<float>& oValues,
+                     const std::string& testcaseName = "")
+        : layerShapeShape(layerShapeShape),
+          imageShapeShape(imageShapeShape),
+          inType(iType),
+          outType(ov::element::Type_t::f32),
+          layerShapeData(CreateTensor(iType, layerShapeValues)),
+          imageShapeData(CreateTensor(iType, imageShapeValues)),
+          refData(CreateTensor(outType, oValues)),
+          testcaseName(testcaseName) {
+        attrs.min_size = min_size;
+        attrs.max_size = max_size;
+        attrs.aspect_ratio = aspect_ratio;
+        attrs.scale_all_sizes = scale_all_size;
+        attrs.min_max_aspect_ratios_order = min_max_aspect_ratios_order;
+    }
+
+    ov::op::v8::PriorBox::Attributes attrs;
+    ov::Shape layerShapeShape;
+    ov::Shape imageShapeShape;
+    ov::element::Type inType;
+    ov::element::Type outType;
+    ov::runtime::Tensor layerShapeData;
+    ov::runtime::Tensor imageShapeData;
+    ov::runtime::Tensor refData;
+    std::string testcaseName;
+};
+
 class ReferencePriorBoxLayerTest : public testing::TestWithParam<PriorBoxParams>, public CommonReferenceTest {
 public:
    void SetUp() override {
@ -68,10 +111,43 @@ public:

 private:
    static std::shared_ptr<Function> CreateFunction(const PriorBoxParams& params) {
-        auto LS = std::make_shared<opset1::Constant>(params.inType, params.layerShapeShape, params.layerShapeData.data());
-        auto IS = std::make_shared<opset1::Constant>(params.inType, params.imageShapeShape, params.imageShapeData.data());
+        auto LS =
+            std::make_shared<op::v0::Constant>(params.inType, params.layerShapeShape, params.layerShapeData.data());
+        auto IS =
+            std::make_shared<op::v0::Constant>(params.inType, params.imageShapeShape, params.imageShapeData.data());
        const auto PriorBox = std::make_shared<op::v0::PriorBox>(LS, IS, params.attrs);
-        return std::make_shared<ov::Function>(NodeVector {PriorBox}, ParameterVector {});
+        return std::make_shared<ov::Function>(NodeVector{PriorBox}, ParameterVector{});
+    }
+};
+
+class ReferencePriorBoxV8LayerTest : public testing::TestWithParam<PriorBoxV8Params>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params);
+        inputData = {};
+        refOutData = {params.refData};
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<PriorBoxV8Params>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "layerShapeShape=" << param.layerShapeShape << "_";
+        result << "imageShapeShape=" << param.imageShapeShape << "_";
+        result << "iType=" << param.inType << "_";
+        result << "oType=" << param.outType;
+        if (param.testcaseName != "")
+            result << "_" << param.testcaseName;
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const PriorBoxV8Params& params) {
+        auto LS =
+            std::make_shared<op::v0::Constant>(params.inType, params.layerShapeShape, params.layerShapeData.data());
+        auto IS =
+            std::make_shared<op::v0::Constant>(params.inType, params.imageShapeShape, params.imageShapeData.data());
+        const auto PriorBoxV8 = std::make_shared<op::v8::PriorBox>(LS, IS, params.attrs);
+        return std::make_shared<ov::Function>(NodeVector{PriorBoxV8}, ParameterVector{});
    }
 };

@ -79,13 +155,20 @@ TEST_P(ReferencePriorBoxLayerTest, CompareWithRefs) {
    Exec();
 }

+TEST_P(ReferencePriorBoxV8LayerTest, CompareWithRefs) {
+    Exec();
+}
+
 template <element::Type_t IN_ET>
 std::vector<PriorBoxParams> generatePriorBoxFloatParams() {
    using T = typename element_type_traits<IN_ET>::value_type;

-    std::vector<PriorBoxParams> priorBoxParams {
-        PriorBoxParams({2.0f}, {1.5f}, false,
-                       {2}, {2},
+    std::vector<PriorBoxParams> priorBoxParams{
+        PriorBoxParams({2.0f},
+                       {1.5f},
+                       false,
+                       {2},
+                       {2},
                       IN_ET,
                       std::vector<T>{2, 2},
                       std::vector<T>{10, 10},
@ -101,8 +184,37 @@ std::vector<PriorBoxParams> generatePriorBoxFloatParams() {
    return priorBoxParams;
 }

+template <element::Type_t IN_ET>
+std::vector<PriorBoxV8Params> generatePriorBoxV8FloatParams() {
+    using T = typename element_type_traits<IN_ET>::value_type;
+
+    std::vector<PriorBoxV8Params> priorBoxV8Params{
+        PriorBoxV8Params(
+            {2.0f},
+            {5.0f},
+            {1.5f},
+            true,
+            false,
+            {2},
+            {2},
+            IN_ET,
+            std::vector<T>{2, 2},
+            std::vector<T>{10, 10},
+            std::vector<float>{
+                0.15, 0.15, 0.35, 0.35, 0.127526, 0.16835, 0.372474, 0.33165, 0.0918861, 0.0918861, 0.408114, 0.408114,
+                0.65, 0.15, 0.85, 0.35, 0.627526, 0.16835, 0.872474, 0.33165, 0.591886,  0.0918861, 0.908114, 0.408114,
+                0.15, 0.65, 0.35, 0.85, 0.127526, 0.66835, 0.372474, 0.83165, 0.0918861, 0.591886,  0.408114, 0.908114,
+                0.65, 0.65, 0.85, 0.85, 0.627526, 0.66835, 0.872474, 0.83165, 0.591886,  0.591886,  0.908114, 0.908114,
+                0.1,  0.1,  0.1,  0.1,  0.1,      0.1,     0.1,      0.1,     0.1,       0.1,       0.1,      0.1,
+                0.1,  0.1,  0.1,  0.1,  0.1,      0.1,     0.1,      0.1,     0.1,       0.1,       0.1,      0.1,
+                0.1,  0.1,  0.1,  0.1,  0.1,      0.1,     0.1,      0.1,     0.1,       0.1,       0.1,      0.1,
+                0.1,  0.1,  0.1,  0.1,  0.1,      0.1,     0.1,      0.1,     0.1,       0.1,       0.1,      0.1}),
+    };
+    return priorBoxV8Params;
+}
+
 std::vector<PriorBoxParams> generatePriorBoxCombinedParams() {
-    const std::vector<std::vector<PriorBoxParams>> priorBoxTypeParams {
+    const std::vector<std::vector<PriorBoxParams>> priorBoxTypeParams{
        generatePriorBoxFloatParams<element::Type_t::i64>(),
        generatePriorBoxFloatParams<element::Type_t::i32>(),
        generatePriorBoxFloatParams<element::Type_t::i16>(),
@ -111,7 +223,7 @@ std::vector<PriorBoxParams> generatePriorBoxCombinedParams() {
        generatePriorBoxFloatParams<element::Type_t::u32>(),
        generatePriorBoxFloatParams<element::Type_t::u16>(),
        generatePriorBoxFloatParams<element::Type_t::u8>(),
-        };
+    };
    std::vector<PriorBoxParams> combinedParams;

    for (const auto& params : priorBoxTypeParams) {
@ -120,7 +232,32 @@ std::vector<PriorBoxParams> generatePriorBoxCombinedParams() {
    return combinedParams;
 }

-INSTANTIATE_TEST_SUITE_P(smoke_PriorBox_With_Hardcoded_Refs, ReferencePriorBoxLayerTest,
-    testing::ValuesIn(generatePriorBoxCombinedParams()), ReferencePriorBoxLayerTest::getTestCaseName);
+std::vector<PriorBoxV8Params> generatePriorBoxV8CombinedParams() {
+    const std::vector<std::vector<PriorBoxV8Params>> priorBoxV8TypeParams{
+        generatePriorBoxV8FloatParams<element::Type_t::i64>(),
+        generatePriorBoxV8FloatParams<element::Type_t::i32>(),
+        generatePriorBoxV8FloatParams<element::Type_t::i16>(),
+        generatePriorBoxV8FloatParams<element::Type_t::i8>(),
+        generatePriorBoxV8FloatParams<element::Type_t::u64>(),
+        generatePriorBoxV8FloatParams<element::Type_t::u32>(),
+        generatePriorBoxV8FloatParams<element::Type_t::u16>(),
+        generatePriorBoxV8FloatParams<element::Type_t::u8>(),
+    };
+    std::vector<PriorBoxV8Params> combinedParams;

-} // namespace
+    for (const auto& params : priorBoxV8TypeParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_PriorBox_With_Hardcoded_Refs,
+                         ReferencePriorBoxLayerTest,
+                         testing::ValuesIn(generatePriorBoxCombinedParams()),
+                         ReferencePriorBoxLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_PriorBoxV8_With_Hardcoded_Refs,
+                         ReferencePriorBoxV8LayerTest,
+                         testing::ValuesIn(generatePriorBoxV8CombinedParams()),
+                         ReferencePriorBoxV8LayerTest::getTestCaseName);
+}  // namespace
--- a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/core_integration.cpp
+++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/core_integration.cpp
@ -130,10 +130,10 @@ TEST_F(IEClassGetConfigTestTEMPLATE, smoke_GetConfigNoThrow) {
            std::string defaultDeviceID = ie.GetConfig(deviceName, CONFIG_KEY(DEVICE_ID));
            std::cout << CONFIG_KEY(DEVICE_ID) << " : " << defaultDeviceID << std::endl;
        } else if (CONFIG_KEY(PERF_COUNT) == confKey) {
-            bool defaultPerfCount = ie.GetConfig(deviceName, CONFIG_KEY(PERF_COUNT));
+            bool defaultPerfCount = ie.GetConfig(deviceName, CONFIG_KEY(PERF_COUNT)).as<bool>();
            std::cout << CONFIG_KEY(PERF_COUNT) << " : " << defaultPerfCount << std::endl;
        } else if (CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS) == confKey) {
-            bool defaultExclusive = ie.GetConfig(deviceName, CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS));
+            bool defaultExclusive = ie.GetConfig(deviceName, CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)).as<bool>();
            std::cout << CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS) << " : " << defaultExclusive << std::endl;
        }
    }
--- a/docs/template_plugin/tests/functional/skip_tests_config.cpp
+++ b/docs/template_plugin/tests/functional/skip_tests_config.cpp
@ -100,6 +100,12 @@ std::vector<std::string> disabledTestPatterns() {
        R"(.*ReferenceTopKTest.*aType=f64.*)",
        // CVS-63947
        R"(.*ReferenceConcatTest.*concat_zero_.*)",
+        // CVS-64119
+        R"(.*ReferenceMatrixNmsTest.*esiType=i64.*evoType=i64.*)",
+        // CVS-64121
+        R"(.*ReferenceMulticlassNmsTest.*esiType=i64.*evoType=i64.*)",
+        // CVS-64096
+        R"(.*ReferenceNonMaxSuppressionTest.*esiType=i32.*evoType=i32.*)",
    };

 #ifdef _WIN32
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@ -4,10 +4,6 @@

 project(InferenceEngine)

-add_subdirectory(thirdparty)
-add_subdirectory(src)
-add_subdirectory(ie_bridges/c)
-
 if(ENABLE_PYTHON)
    add_subdirectory(ie_bridges/python)
 endif()
--- a/inference-engine/ie_bridges/python/cmake/UseCython.cmake
+++ b/inference-engine/ie_bridges/python/cmake/UseCython.cmake
@ -230,10 +230,7 @@ function( compile_pyx _name generated_file )
    set( no_docstrings_arg "--no-docstrings" )
  endif()

-  if( "${CMAKE_BUILD_TYPE}" STREQUAL "Debug" OR
-        "${CMAKE_BUILD_TYPE}" STREQUAL "RelWithDebInfo" )
-      set( cython_debug_arg "--gdb" )
-  endif()
+  set( cython_debug_arg "$<$<OR:$<CONFIG:Debug>,$<CONFIG:RelWithDebInfo>>:--gdb>" )

  if( "${PYTHONLIBS_VERSION_STRING}" MATCHES "^3." )
    set( version_arg "-3" )
--- a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
@ -47,7 +47,7 @@ endif()
 # create target for openvino.wheel

 set(openvino_wheel_deps ie_api offline_transformations_api)
-foreach(_target ie_libraries ie_plugins _pyngraph pyopenvino)
+foreach(_target ov_runtime_libraries ie_plugins _pyngraph pyopenvino)
    if(TARGET ${_target})
        list(APPEND openvino_wheel_deps ${_target})
    endif()
--- a/inference-engine/src/CMakeLists.txt
+++ b/inference-engine/src/CMakeLists.txt
@ -7,14 +7,8 @@ if(CMAKE_COMPILER_IS_GNUCXX)
    ie_add_compiler_flags(-Wmissing-declarations)
 endif()

-add_subdirectory(transformations)
-
-add_subdirectory(low_precision_transformations)
-
 add_subdirectory(offline_transformations)

-add_subdirectory(snippets)
-
 if(ENABLE_MKL_DNN)
    add_subdirectory(mkldnn_plugin)
 endif()
@ -31,10 +25,6 @@ if(ENABLE_GNA)
    add_subdirectory(gna_plugin)
 endif()

-if(ENABLE_HETERO)
-    add_subdirectory(hetero_plugin)
-endif()
-
 if(ENABLE_MULTI)
    add_subdirectory(multi_device)
 endif()
@ -43,42 +33,3 @@ if(ENABLE_BATCH)
    add_subdirectory(auto_batch)
 endif()

-
-add_subdirectory(inference_engine)
-
-add_subdirectory(legacy_api)
-
-add_subdirectory(readers)
-
-add_subdirectory(preprocessing)
-
-# add a custom target to build all Inference Engine Core libraries
-
-add_custom_target(ie_libraries ALL
-                  DEPENDS inference_engine_transformations inference_engine_legacy
-                          inference_engine inference_engine_preproc
-                          inference_engine_lp_transformations inference_engine_snippets)
-
-if(ENABLE_IR_V7_READER)
-    add_dependencies(ie_libraries inference_engine_ir_v7_reader)
-endif()
-
-if(NGRAPH_IR_FRONTEND_ENABLE)
-    if(BUILD_SHARED_LIBS)
-        add_dependencies(ie_libraries ir_ov_frontend)
-    endif()
-    # use this one once CVS-69781 is fixed
-    # add_dependencies(inference_engine ir_ov_frontend)
-endif()
-
-if(NGRAPH_ONNX_FRONTEND_ENABLE)
-    add_dependencies(inference_engine onnx_ov_frontend)
-endif()
-
-if(NGRAPH_PDPD_FRONTEND_ENABLE)
-    add_dependencies(inference_engine paddlepaddle_ov_frontend)
-endif()
-
-if(NGRAPH_TF_FRONTEND_ENABLE)
-    add_dependencies(inference_engine tensorflow_ov_frontend)
-endif()
--- a/inference-engine/src/cldnn_engine/CMakeLists.txt
+++ b/inference-engine/src/cldnn_engine/CMakeLists.txt
@ -26,9 +26,8 @@ ie_add_plugin(NAME ${TARGET_NAME}
              SOURCES ${MAIN_SRC} ${LIBRARY_HEADERS}
              VERSION_DEFINES_FOR cldnn_engine.cpp)

-if(CMAKE_BUILD_TYPE STREQUAL "Release")
-  target_compile_options(${TARGET_NAME} PRIVATE -Os)
-endif()
+target_compile_options(${TARGET_NAME} PRIVATE
+    $<$<CONFIG:Release>:$<IF:$<CXX_COMPILER_ID:MSVC>,/Os,-Os>>)

 target_link_libraries(${TARGET_NAME} PRIVATE clDNN_lib pugixml::static
                                             inference_engine_transformations
--- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
@ -84,8 +84,6 @@ void CLDNNGraph::Build() {
        m_networks.emplace_back(network);
    }

-    UpdateImplementationsMap();
-
    GPU_DEBUG_GET_INSTANCE(debug_config);
    GPU_DEBUG_IF(!debug_config->dry_run_path.empty()) {
        CNNNetwork net(GetExecGraphInfo());
@ -545,49 +543,6 @@ bool CLDNNGraph::IsLoaded() const {
    return GetNetwork() != nullptr;
 }

-void CLDNNGraph::UpdateImplementationsMap() {
-    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdateImplementationsMap");
-    if (m_config.useProfiling) {
-        auto extractImplementationFromInfo = [](const std::string& info) -> std::string {
-            std::string def_implementation = "undef";
-            std::string impl_section = "implementation :";
-            std::string::size_type pos = info.find(impl_section);
-            if (pos == std::string::npos) {
-                return def_implementation;
-            }
-
-            std::string::size_type end_pos = info.find(',', pos);
-            if (end_pos == std::string::npos) {
-                return def_implementation;
-            }
-
-            std::string::size_type length = end_pos - pos - impl_section.size();
-
-            auto trim = [](const std::string& str) {
-                size_t first = str.find_first_not_of(' ');
-                if (std::string::npos == first) {
-                    return str;
-                }
-                size_t last = str.find_last_not_of(' ');
-                return str.substr(first, (last - first + 1));
-            };
-            std::string tmp = trim(info.substr(pos + impl_section.size(), length));
-
-            return tmp.length() > 1 ? tmp : def_implementation;
-        };
-
-        // Parse primitive info and extract implementation name.
-        for (auto& id : profilingIDs) {
-            std::string prim_info = "";
-            try {
-                prim_info = GetNetwork()->get_primitive_info(id);
-            } catch (std::exception& /*e*/) { }
-
-            implementationsMap.insert({id, extractImplementationFromInfo(prim_info)});
-        }
-    }
-}
-
 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::GetPerformanceCounts() const {
    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetPerformanceCounts");
    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> result;
@ -632,7 +587,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
            static const std::string cpuExecType("CPU");
            cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length());  // Override execType as CPU
        } else {
-            std::string impl = implementationsMap.at(primId);
+            std::string impl = GetNetwork()->get_implementation_info(primId);
            impl.copy(extPerfEntry.exec_type, impl.length());
        }

@ -659,8 +614,10 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
                    allIds.erase(std::find(allIds.begin(), allIds.end(), id));
                }
            }
-            if (!kernelId.empty())
-                implementationsMap.at(kernelId).copy(extPerfEntry.exec_type, implementationsMap.at(kernelId).length());
+            if (!kernelId.empty()) {
+                std::string impl_info = GetNetwork()->get_implementation_info(kernelId);
+                std::memcpy(extPerfEntry.exec_type, &impl_info[0], impl_info.length());
+            }
        }

        getUpperCaseName(perfCounter.layerType).copy(extPerfEntry.layer_type, perfCounter.layerType.length());
--- a/inference-engine/src/cldnn_engine/cldnn_graph.h
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.h
@ -88,7 +88,6 @@ protected:
    std::map<std::string, std::vector<cldnn::primitive_id>> prevPrimitiveIDs;

    std::map<cldnn::primitive_id, std::pair<std::string, PerfCounter>> perfMap;
-    std::map<cldnn::primitive_id, std::string> implementationsMap;
    std::vector<cldnn::primitive_id> profilingIDs;

    std::map<std::string, InferenceEngine::SizeVector> outputDims;
@ -99,7 +98,6 @@ protected:
    std::shared_ptr<cldnn::network> BuildNetwork(std::shared_ptr<cldnn::program> program);
    void Build();
    void UpdateLayersMaps();
-    void UpdateImplementationsMap();
    std::shared_ptr<ngraph::Function> GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& pi,
                                                                       bool filter_const_primitives = true);
 };
--- a/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.cpp
@ -35,6 +35,9 @@
 #include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
 #include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
 #include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
+#include <transformations/common_optimizations/wrap_interpolate_into_transposes.hpp>
+#include <transformations/common_optimizations/transpose_sinking.hpp>
+
 #include <transformations/op_conversions/convert_depth_to_space.hpp>
 #include <transformations/op_conversions/convert_space_to_depth.hpp>
 #include <transformations/op_conversions/convert_gelu.hpp>
@ -120,6 +123,8 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Function> func) {

        manager.register_pass<ngraph::pass::InitNodeInfo>();
        manager.register_pass<ngraph::pass::CommonOptimizations>();
+        manager.register_pass<ngraph::pass::WrapInterpolateIntoTransposes>();
+        manager.register_pass<ngraph::pass::TransposeSinking>();

        if (!config.enable_loop_unrolling) {
            manager.register_pass<ngraph::pass::BidirectionalLSTMSequenceDecomposition>();
--- a/inference-engine/src/cldnn_engine/ops/constant.cpp
+++ b/inference-engine/src/cldnn_engine/ops/constant.cpp
@ -22,35 +22,6 @@

 namespace CLDNNPlugin {

-struct ConstProperties {
-    bool isWeights;
-    bool hasGroupDimension;
-    bool reversedChannelsOrder;
-};
-
-static ConstProperties getConstProperties(const std::shared_ptr<ngraph::op::Constant>& op) {
-    for (size_t i = 0; i < op->get_output_size(); i++) {
-        auto outTensors = op->get_output_target_inputs(i);
-        for (auto& t : outTensors) {
-            auto outOp = t.get_node();
-            if (dynamic_cast<ngraph::op::v1::Convolution*>(outOp)) {
-                return {t.get_index() == 1, false, false};
-            } else if (dynamic_cast<ngraph::op::v1::BinaryConvolution*>(outOp)) {
-                return {t.get_index() == 1, false, false};
-            } else if (auto castedOp = dynamic_cast<ngraph::op::v1::DeformableConvolution*>(outOp)) {
-                return {t.get_index() == 2, castedOp->get_group() > 1, false};
-            } else if (dynamic_cast<ngraph::op::v1::GroupConvolution*>(outOp)) {
-                return {t.get_index() == 1, true, false};
-            } else if (dynamic_cast<ngraph::op::v1::ConvolutionBackpropData*>(outOp)) {
-                return {t.get_index() == 1, false, true};
-            } else if (dynamic_cast<ngraph::op::v1::GroupConvolutionBackpropData*>(outOp)) {
-                return {t.get_index() == 1, true, true};
-            }
-        }
-    }
-    return {false, false, false};
-}
-
 static cldnn::tensor getConstTensor(const ngraph::Shape constDims) {
    cldnn::tensor constTensor;
    switch (constDims.size()) {
@ -78,71 +49,103 @@ static cldnn::tensor getConstTensor(const ngraph::Shape constDims) {
    return constTensor;
 }

+struct ConstProperties {
+    bool needsBatchInterpretation;
+    bool swapOI;
+    bool hasGroupDimension;
+};
+
+static void createClDnnConstant(Program& p, const ngraph::Shape& constDims, const std::shared_ptr<ngraph::op::v0::Constant>& op, const ConstProperties& props);
+
 static void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant>& op) {
-    auto constDims = op->get_shape();
-    cldnn::tensor constTensor = getConstTensor(constDims);
+    const auto& constDims = op->get_shape();
+    auto constUsers = op->get_output_target_inputs(0);
+    size_t numConstUsers = constUsers.size();
+
+    std::unordered_map<std::shared_ptr<ngraph::op::v0::Constant>, ConstProperties> consts = {
+        {op, {false, false, false}}
+    };
+
+    // handleConvWeights function is executed when one of the constant users is ConvolutionBackpropData or GroupConvolutionBackpropData.
+    // In that case, we mark that constant's O and I dimensions need to be swapped.
+    auto handleConvWeights = [&op] (ngraph::Node* conv, std::unordered_map<std::shared_ptr<ngraph::op::v0::Constant>, ConstProperties>& consts,
+                                 size_t& numConstUsers, bool hasGroupDimension) {
+                                 // If constant has multiple users - create its copy and replace 'conv' weights with the copy.
+                                 // This is to make sure that dimension change doesn't break other users of the constant node.
+                                 // It is a shallow copy, but that's fine since in createClDnnConstant
+                                 // every constant created here, gets memcopied to a brand new cldnn::memory.
+                                 if (numConstUsers > 1) {
+                                     auto constant = std::make_shared<ngraph::op::v0::Constant>(*(op.get()));
+                                     conv->input(1).replace_source_output(constant);
+                                     consts.insert({constant, {false, true, hasGroupDimension}});
+                                     numConstUsers--;
+                                 } else {
+                                     consts[op].swapOI = true;
+                                     consts[op].hasGroupDimension = hasGroupDimension;
+                                 }
+                             };

    // WA to inconsistency between input and const 1d tensors
    // For Concat along batch we go with batch interpretation
    // For Gather input we go with batch interpretation
-    bool needsBatchInterpretation = false;
-    if (constDims.size() == 1) {
-        for (size_t i = 0; i < op->get_output_size(); i++) {
-            auto outTensors = op->get_output_target_inputs(i);
-
-            for (auto& t : outTensors) {
-                auto outOp = t.get_node();
-                if (auto castedOp = dynamic_cast<ngraph::op::v0::Concat*>(outOp)) {
-                    if (castedOp->get_axis() == 0) {
-                        needsBatchInterpretation = true;
-                        break;
-                    }
-                } else if (ngraph::op::is_binary_elementwise_arithmetic(outOp) ||
-                           ngraph::op::is_binary_elementwise_logical(outOp) ||
-                           ngraph::is_type<ngraph::op::v0::SquaredDifference>(outOp)) {
-                    bool all_inputs_1d = true;
-                    for (size_t j = 0; j < outOp->get_input_size(); j++) {
-                        auto& in_shape = outOp->get_input_shape(j);
-                        if (in_shape.size() > 1)
-                            all_inputs_1d = false;
-                    }
-                    needsBatchInterpretation = all_inputs_1d;
-                    break;
-                } else if (ngraph::is_type<ngraph::op::v1::Gather>(outOp) ||
-                           ngraph::is_type<ngraph::op::v1::Split>(outOp) ||
-                           ngraph::is_type<ngraph::op::v1::VariadicSplit>(outOp)) {
-                    needsBatchInterpretation = true;
-                    break;
-                }
+    // Also check if constant users is a backprop convolution - in that case O and I need to be swapped.
+    for (auto& node : constUsers) {
+        auto outOp = node.get_node();
+        if (auto castedOp = dynamic_cast<ngraph::op::v0::Concat*>(outOp)) {
+            if (castedOp->get_axis() == 0) {
+                consts[op].needsBatchInterpretation = constDims.size() == 1;
            }
+        } else if (ngraph::op::is_binary_elementwise_arithmetic(outOp) ||
+                   ngraph::op::is_binary_elementwise_logical(outOp) ||
+                   ngraph::is_type<ngraph::op::v0::SquaredDifference>(outOp)) {
+            bool all_inputs_1d = true;
+            for (size_t j = 0; j < outOp->get_input_size(); j++) {
+                auto& in_shape = outOp->get_input_shape(j);
+                if (in_shape.size() > 1)
+                    all_inputs_1d = false;
+            }
+            consts[op].needsBatchInterpretation = all_inputs_1d && constDims.size() == 1;
+        } else if (ngraph::is_type<ngraph::op::v1::Gather>(outOp) ||
+                   ngraph::is_type<ngraph::op::v1::Split>(outOp) ||
+                   ngraph::is_type<ngraph::op::v1::VariadicSplit>(outOp)) {
+            consts[op].needsBatchInterpretation = constDims.size() == 1;
+        } else if (ngraph::is_type<ngraph::op::v1::ConvolutionBackpropData>(outOp) && node.get_index() == 1) {
+            handleConvWeights(outOp, consts, numConstUsers, false);
+        } else if (ngraph::is_type<ngraph::op::v1::GroupConvolutionBackpropData>(outOp) && node.get_index() == 1) {
+            handleConvWeights(outOp, consts, numConstUsers, true);
        }
    }

-    if (needsBatchInterpretation) {
+    for (auto& it : consts) {
+        createClDnnConstant(p, constDims, it.first, it.second);
+    }
+}
+
+void createClDnnConstant(Program& p, const ngraph::Shape& constDims, const std::shared_ptr<ngraph::op::v0::Constant>& op, const ConstProperties& props) {
+    cldnn::tensor constTensor = getConstTensor(constDims);
+    auto constFormat = DefaultFormatForDims(constDims.size());
+
+    if (props.needsBatchInterpretation) {
        constTensor.batch[0] = constTensor.count();
        constTensor.feature[0] = 1;
    }

-    auto constFormat = DefaultFormatForDims(op->get_output_shape(0).size());
-    auto prop = getConstProperties(op);
-
    // If constDims has a dimension = 0, then create tensor with single value
    // TODO: check if dim=0 is a valid case
    if (std::accumulate(constDims.begin(), constDims.end(), 1, std::multiplies<size_t>()) == 0)
        constTensor = cldnn::tensor{1};

    // Swap O and I dimensions to match expected deconvolution weights format
-    bool swap_oi = prop.isWeights && prop.reversedChannelsOrder;
    size_t inputFeatureElements = 1;
    size_t outputFeatureElements = 1;
    size_t groups = 1;
-    if (swap_oi) {
-        size_t expected_min_rank = 2 + (prop.hasGroupDimension ? 1 : 0);
+    auto newDims = constDims;
+    if (props.swapOI) {
+        size_t expected_min_rank = 2 + (props.hasGroupDimension ? 1 : 0);
        if (expected_min_rank > constDims.size())
            IE_THROW() << "Invalid constant properties or shape";

-        auto newDims = constDims;
-        if (prop.hasGroupDimension) {
+        if (props.hasGroupDimension) {
            std::swap(newDims[2], newDims[1]);
            inputFeatureElements = newDims[2];
            outputFeatureElements = newDims[1];
@ -164,8 +167,7 @@ static void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::C
    cldnn::primitive_id constPrimID;
    auto data = op->get_data_ptr<char>();

-
-    auto bufIter = p.blobMemCache.find(std::make_pair(data, constDims));
+    auto bufIter = p.blobMemCache.find(std::make_pair(data, newDims));

    if (bufIter != p.blobMemCache.end()) {
        constPrimID = bufIter->second;
@ -181,9 +183,9 @@ static void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::C
        auto bufSize = constLayout.bytes_count();

        // Do actual weights reorder and change O and I channels order
-        if (swap_oi) {
+        if (props.swapOI) {
            auto elementSize = cldnn::data_type_traits::size_of(constLayout.data_type);
-            size_t spatial_dim_off = prop.hasGroupDimension ? 3 : 2;
+            size_t spatial_dim_off = props.hasGroupDimension ? 3 : 2;
            size_t featureSize = elementSize;
            for (size_t i = spatial_dim_off; i < constDims.size(); i++) {
                featureSize *= constDims[i];
@ -205,7 +207,7 @@ static void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::C
            std::memcpy(&buf[0], &data[0], bufSize);
        }
        p.AddPrimitive(cldnn::data(initialconstPrimID, mem, op->get_friendly_name()));
-        p.blobMemCache[std::make_pair(data, constDims)] = initialconstPrimID;
+        p.blobMemCache[std::make_pair(data, newDims)] = initialconstPrimID;
        constPrimID = initialconstPrimID;
    }

--- a/inference-engine/src/cldnn_engine/ops/convolution.cpp
+++ b/inference-engine/src/cldnn_engine/ops/convolution.cpp
@ -39,19 +39,19 @@ static ConvoltuionParameters GetConvolutionParameters(const ngraph::CoordinateDi
    switch (strides.size()) {
        case 3: {
            stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[2], strides[1], strides[0]));
-            padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pads_begin[2], pads_begin[1], pads_begin[0]));
+            padding = cldnn::tensor({0, 0, TensorValue(pads_begin[2]), TensorValue(pads_begin[1]), TensorValue(pads_begin[0])}, 0);
            dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[2], dilations[1], dilations[0]));
            break;
        }
        case 2: {
            stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[1], strides[0], 1));
-            padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pads_begin[1], pads_begin[0], 0));
+            padding = cldnn::tensor({0, 0, TensorValue(pads_begin[1]), TensorValue(pads_begin[0])}, 0);
            dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[1], dilations[0], 1));
            break;
        }
        case 1: {
            stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[0], 1, 1));
-            padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(pads_begin[0], 0, 0));
+            padding = cldnn::tensor({0, 0, TensorValue(pads_begin[0]), 0}, 0);
            dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[0], 1, 1));
            break;
        }
--- a/inference-engine/src/cldnn_engine/ops/gather_nd.cpp
+++ b/inference-engine/src/cldnn_engine/ops/gather_nd.cpp
@ -17,6 +17,7 @@ static void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v5::G
    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
    std::string layerName = layer_type_name_ID(op);

+    int32_t input_rank = static_cast<int32_t>(op->get_input_shape(0).size());
    int32_t indices_rank = static_cast<int32_t>(op->get_input_shape(1).size());

    auto batch_dims = op->get_batch_dims();
@ -24,6 +25,7 @@ static void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v5::G
    auto primitive = cldnn::gather_nd(layerName,
                                      inputPrimitives[0],
                                      inputPrimitives[1],
+                                      input_rank,
                                      indices_rank,
                                      batch_dims,
                                      true,
@ -40,6 +42,7 @@ static void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v8::G
    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
    std::string layerName = layer_type_name_ID(op);

+    int32_t input_rank = static_cast<int32_t>(op->get_input_shape(0).size());
    int32_t indices_rank = static_cast<int32_t>(op->get_input_shape(1).size());

    auto batch_dims = op->get_batch_dims();
@ -47,6 +50,7 @@ static void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v8::G
    auto primitive = cldnn::gather_nd(layerName,
                                      inputPrimitives[0],
                                      inputPrimitives[1],
+                                      input_rank,
                                      indices_rank,
                                      batch_dims,
                                      false,
--- a/inference-engine/src/cldnn_engine/ops/strided_slice.cpp
+++ b/inference-engine/src/cldnn_engine/ops/strided_slice.cpp
@ -217,7 +217,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v

        auto cropPrim = cldnn::crop(layerName, inPrimitive, refSize, offSize, op->get_friendly_name());
        p.AddPrimitive(cropPrim);
-        p.AddPrimitiveToProfiler(layerName, op);
+        auto last_layer_primitive = layerName;

        // Reshape in case of deleting of axis
        if (!shrink_axis_mask.empty()) {
@ -226,7 +226,9 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
            auto reshapePrim = cldnn::reshape(reshapeOutName, layerName, targetShape, op->get_friendly_name());
            p.AddPrimitive(reshapePrim);
            p.AddInnerPrimitiveToProfiler(reshapeOutName, layerName, op);
+            last_layer_primitive = reshapeOutName;
        }
+        p.AddPrimitiveToProfiler(op, last_layer_primitive);
        return;
    } while (false);

--- a/inference-engine/src/gna_plugin/gna_graph_patterns.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_patterns.hpp
@ -13,8 +13,62 @@
 namespace GNAPluginNS {

 /**
- * @brief searchs for a pattern: Permute(0,3,1,2) -> ... -> Convolution -> ... -> Permute(0,2,3,1) or
- *        Reshape -> ... -> Convolution -> ... -> Permute(0,2,3,1) if Convolution has only one input dimension not equal to 1
+ * @brief checks if it's a reshape from 4d to 3d tensor inserted after convolution
+ * @param layer Non-functional layer
+ */
+inline bool IsReshapeFrom4dTo3d(InferenceEngine::CNNLayerPtr layer) {
+    if (!LayerInfo(layer).isNonFunctional()) {
+        return false;
+    }
+
+    auto input_dims = layer->insData[0].lock()->getDims();
+    auto output_dims = layer->outData[0]->getDims();
+    // If H input dimension is not 1, it can't be just skipped during reshape to 3d
+    size_t h_dim = input_dims[2];
+    if (input_dims.size() != 4 || output_dims.size() != 3 || h_dim != 1) {
+        return false;
+    }
+
+    input_dims.erase(std::begin(input_dims) + 2);
+    if (input_dims != output_dims) {
+        return false;
+    }
+
+    return true;
+}
+
+/**
+ * @brief checks if it's a reshape from 3d to 4d tensor inserted before convolution
+ * @param layer Non-functional layer
+ */
+inline bool IsReshapeFrom3dTo4d(InferenceEngine::CNNLayerPtr layer) {
+    if (!LayerInfo(layer).isNonFunctional()) {
+        return false;
+    }
+
+    auto input_dims = layer->insData[0].lock()->getDims();
+    auto output_dims = layer->outData[0]->getDims();
+    if (input_dims.size() != 3 || output_dims.size() != 4) {
+        return false;
+    }
+
+    input_dims.insert(std::begin(input_dims) + 2, 1);
+    if (input_dims != output_dims) {
+        return false;
+    }
+
+    return true;
+}
+
+/**
+ * @brief searchs for a pattern: Permute(NHWC->NCHW) -> ... -> Convolution -> ... -> Permute(NCHW->NHWC) or
+ * Reshape(NHWC->NCHW) -> ... -> Convolution -> ... -> Reshape(NCHW->NHWC) if Convolution has only one input/output
+ * dimension not equal to 1,
+ * if the original convolution layout is 3d, 3d->4d/4d->3d reshapes will be inserted before/after the convolution,
+ * so the possible patterns will be:
+ * Permute(NWC->NCW) -> ... -> Reshape(NCW ->NCHW) -> Convolution -> Reshape(NCHW->NCW) ... -> Permute(NCW->NWC) or
+ * Reshape(NWC->NCW) -> ... -> Reshape(NCW ->NCHW) -> Convolution -> Reshape(NCHW->NCW) ... -> Reshape(NCW->NWC)
+ * if Convolution has only one input/output dimension not equal to 1.
 * @param layer convolution layer
 * @return the found permutations before and after convolution
 */
@ -35,8 +89,10 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin

    auto next = getInputTo(layer->outData.front()).begin()->second;
    // Permute is inserted before Reshape by MO in NHWC models, so we need to find either permute, or reshape, or output
-    while (!LayerInfo(next).isPermute() && !LayerInfo(next).isNonFunctional() && !LayerInfo(next).isOutput() &&
-           next->outData.size() == 1) {
+    while (!LayerInfo(next).isPermute() && !LayerInfo(next).isOutput() && next->outData.size() == 1) {
+        if (LayerInfo(next).isNonFunctional() && !IsReshapeFrom4dTo3d(next) && !IsReshapeFrom3dTo4d(next)) {
+            break;
+        }
        auto input_to = getInputTo(next->outData.front());
        if (input_to.size() != 1) break;
        next = input_to.begin()->second;
@ -44,8 +100,11 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin

    // Check if the found layer is NCHW to NHWC permute or has 1D data, if it's not just skip this convolution
    if (LayerInfo(next).isPermute()) {
-        if (next->outData[0]->getLayout() != InferenceEngine::Layout::NCHW ||
-            next->GetParamAsInts("order") != GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)) {
+        const auto layout = next->outData[0]->getLayout();
+        const auto order = next->GetParamAsInts("order");
+        if (layout != InferenceEngine::Layout::NCHW && layout != InferenceEngine::Layout::CHW ||
+            order != GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC) &&
+            order != std::vector<int32_t>{0, 2, 1} /* NCW to NWC */) {
            return std::make_pair(nullptr, nullptr);
        }
    } else if (LayerInfo(next).isReshape()) {
@ -54,9 +113,11 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
        }
        // Check if reshape is expected for this pattern:
        // the next layer has the both, height and width dimensions > 1
-        if (next->outData[0]->getDims().size() != 4 ||
-            GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::H) != 1 ||
-            GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::W) != 1) {
+        auto in_dim_size = next->insData[0].lock()->getDims().size();
+        IE_ASSERT(in_dim_size == 3 || in_dim_size == 4);
+        size_t height = in_dim_size == 3 ? 1 : GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::H);
+        size_t width = GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::W);
+        if (next->outData[0]->getDims().size() < 3 || height != 1 || width != 1) {
            return std::make_pair(nullptr, nullptr);
        }
    } else {
@ -66,14 +127,19 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
    // Permute is inserted after Reshape by MO in NHWC models, so we need to find either permute, or reshape, or input
    auto parent = InferenceEngine::CNNNetPrevLayer(layer);
    auto prev = parent;
-    while (!LayerInfo(prev).isPermute() && !LayerInfo(prev).isNonFunctional() && !LayerInfo(prev).isInput() &&
-           InferenceEngine::CNNNetHasPrevLayer(prev.get())) {
+    while (!LayerInfo(prev).isPermute() && !LayerInfo(prev).isInput() && InferenceEngine::CNNNetHasPrevLayer(prev.get())) {
+        if (LayerInfo(prev).isNonFunctional() && !IsReshapeFrom4dTo3d(prev) && !IsReshapeFrom3dTo4d(prev)) {
+            break;
+        }
        prev = InferenceEngine::CNNNetPrevLayer(prev);
    }
    // Check if the found layer is NHWC to NCHW permute or has 1D data, if it's not just skip this convolution
    if (LayerInfo(prev).isPermute()) {
-        if (prev->outData[0]->getLayout() != InferenceEngine::Layout::NCHW ||
-            prev->GetParamAsInts("order") != GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW)) {
+        const auto layout = prev->outData[0]->getLayout();
+        const auto order = prev->GetParamAsInts("order");
+        if (layout != InferenceEngine::Layout::NCHW && layout != InferenceEngine::Layout::CHW ||
+            order != GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW) &&
+            order != std::vector<int32_t>{0, 2, 1} /* NWC to NCW */) {
            return std::make_pair(nullptr, nullptr);
        }
    } else if (LayerInfo(prev).isReshape())  {
@ -82,10 +148,12 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
        }
        // Check if reshape is expected for this pattern:
        // the previous layer has number of channels > 1 and one of height/width dimensions is also > 1
-        if (parent->insData[0].lock()->getDims().size() != 4 ||
-            GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::C) != 1 &&
-            (GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::H) != 1 ||
-             GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::W) != 1)) {
+        size_t out_dims_size = parent->outData[0]->getDims().size();
+        IE_ASSERT(out_dims_size == 3 || out_dims_size == 4);
+        size_t channels = GetDataDimSize(parent->outData[0], out_dims_size - 1);
+        size_t height = out_dims_size == 3 ? 1 : GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::H);
+        size_t width = GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::W);
+        if (parent->insData[0].lock()->getDims().size() < 3 || channels != 1 && (height != 1 || width != 1)) {
            return std::make_pair(nullptr, nullptr);
        }
    } else {
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -648,27 +648,39 @@ void RemovePermutationsNHWCToNCHWPass::run() {
        auto pattern_start = layers.first;
        auto pattern_end = layers.second;

-        auto setNHWCOrder = [](InferenceEngine::DataPtr data) {
-            if (data->getLayout() == Layout::NHWC) return;
+        auto getTransposedLayout = [](InferenceEngine::DataPtr data) {
+            size_t dims_size = data->getDims().size();
+            if (dims_size < 3 || dims_size > 4) {
+                THROW_GNA_EXCEPTION << data->getName() <<
+                    " unexpected dimensions size in Permute - Conv - Permute pattern";
+            }
+            return dims_size == 4 ? Layout::NHWC : Layout::HWC;
+        };
+
+        auto setTransposedOrder = [getTransposedLayout](InferenceEngine::DataPtr data) {
+            auto layout = getTransposedLayout(data);
+            if (data->getLayout() == layout) return;
+
            auto dims = data->getDims();
-            auto order = GetPermuteOrder(Layout::NCHW, Layout::NHWC);
+            auto order = dims.size() == 4 ? GetPermuteOrder(Layout::NCHW, Layout::NHWC) :
+                std::vector<int32_t>{0, 2, 1};
            InferenceEngine::SizeVector new_dims;
            for (int i = 0; i < dims.size(); ++i) {
                new_dims.push_back(dims[order[i]]);
            }
            data->setDims(new_dims);
-            data->setLayout(Layout::NHWC);
+            data->setLayout(layout);
        };

        auto input_to = getInputTo(pattern_start->outData[0]);
        IE_ASSERT(!input_to.empty());
        auto current_layer = input_to.begin()->second;
-        setNHWCOrder(current_layer->input());
+        setTransposedOrder(current_layer->input());
        std::function<void(CNNLayerPtr)> propogateNHWCOrderRecursive =
-            [pattern_end, &propogateNHWCOrderRecursive, &setNHWCOrder](CNNLayerPtr current_layer) {
+            [pattern_end, &propogateNHWCOrderRecursive, &setTransposedOrder](CNNLayerPtr current_layer) {
            if (current_layer == pattern_end) return;
            for (size_t i = 0; i < current_layer->outData.size(); ++i) {
-                setNHWCOrder(current_layer->outData[i]);
+                setTransposedOrder(current_layer->outData[i]);
                auto input_to = getInputTo(current_layer->outData[i]);
                IE_ASSERT(!input_to.empty());
                propogateNHWCOrderRecursive(input_to.begin()->second);
@ -682,7 +694,7 @@ void RemovePermutationsNHWCToNCHWPass::run() {
            for (auto before_output : layer_before_permute->outData) {
                if (areEqualDatas(pattern_start->input(), before_output)) {
                    output = before_output;
-                    output->setLayout(Layout::NHWC);
+                    output->setLayout(getTransposedLayout(output));
                    break;
                }
            }
@ -693,7 +705,7 @@ void RemovePermutationsNHWCToNCHWPass::run() {

        if (!pattern_end->outData.empty() && !getInputTo(pattern_end->outData.front()).empty()) {
            auto layer_after_permute = getInputTo(pattern_end->outData.front()).begin()->second;
-            layer_after_permute->input()->setLayout(Layout::NHWC);
+            layer_after_permute->input()->setLayout(getTransposedLayout(layer_after_permute->input()));
        }
    }

--- a/inference-engine/src/hetero_plugin/hetero_executable_network.hpp
+++ b/inference-engine/src/hetero_plugin/hetero_executable_network.hpp
@ -1,81 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-/**
- * @brief a header file for ExecutableNetwork
- * @file hetero_executable_network.hpp
- */
-#pragma once
-
-#include <memory>
-#include <string>
-#include <vector>
-#include <map>
-#include <unordered_map>
-#include <unordered_set>
-
-#include <ie_common.h>
-#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
-
-#include "hetero_infer_request.hpp"
-#include "ie_icore.hpp"
-#include "hetero_async_infer_request.hpp"
-
-namespace HeteroPlugin {
-
-class Engine;
-
-/**
- * @class ExecutableNetwork
- * @brief Interface of executable network
- */
-class HeteroExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
-public:
-    typedef std::shared_ptr<HeteroExecutableNetwork> Ptr;
-
-    /**
-    * @brief constructor
-    */
-    HeteroExecutableNetwork(const InferenceEngine::CNNNetwork&          network,
-                            const std::map<std::string, std::string>&   config,
-                            Engine*                                     plugin);
-    /**
-    * @brief Import from opened file constructor
-    */
-    HeteroExecutableNetwork(std::istream&                               heteroModel,
-                            const std::map<std::string, std::string>&   config,
-                            Engine*                                     plugin);
-
-    InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
-                                                                       InferenceEngine::OutputsDataMap networkOutputs) override;
-    InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
-                                                                       const std::vector<std::shared_ptr<const ov::Node>>& outputs) override;
-
-
-    InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
-
-    InferenceEngine::Parameter GetConfig(const std::string &name) const override;
-
-    InferenceEngine::Parameter GetMetric(const std::string &name) const override;
-
-    void Export(std::ostream& modelFile) override;
-
-private:
-    void InitCNNImpl(const InferenceEngine::CNNNetwork&    network);
-    void InitNgraph(const InferenceEngine::CNNNetwork&     network);
-
-    struct NetworkDesc {
-        std::string                                   _device;
-        InferenceEngine::CNNNetwork                   _clonedNetwork;
-        InferenceEngine::SoExecutableNetworkInternal  _network;
-    };
-
-    std::vector<NetworkDesc>                     _networks;
-    Engine*                                      _heteroPlugin;
-    std::string                                  _name;
-    std::map<std::string, std::string>           _config;
-    std::unordered_map<std::string, std::string> _blobNameMap;
-};
-
-}  // namespace HeteroPlugin
--- a/inference-engine/src/inference_engine/include/openvino/runtime/parameter.hpp
+++ b/inference-engine/src/inference_engine/include/openvino/runtime/parameter.hpp
@ -1,360 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-/**
- * @brief A header file for the Parameter class
- * @file openvino/runtime/parameter.hpp
- */
-#pragma once
-
-#include <algorithm>
-#include <cctype>
-#include <iterator>
-#include <map>
-#include <memory>
-#include <string>
-#include <tuple>
-#include <typeinfo>
-#include <utility>
-#include <vector>
-
-#include "openvino/core/except.hpp"
-#include "openvino/runtime/common.hpp"
-
-namespace ov {
-namespace runtime {
-
-/**
- * @brief This class represents an object to work with different parameters
- *
- */
-class OPENVINO_RUNTIME_API Parameter {
-public:
-    /**
-     * @brief Default constructor
-     */
-    Parameter() = default;
-
-    /**
-     * @brief Move constructor
-     *
-     * @param parameter Parameter object
-     */
-    Parameter(Parameter&& parameter) noexcept {
-        std::swap(ptr, parameter.ptr);
-    }
-
-    /**
-     * @brief Copy constructor
-     *
-     * @param parameter Parameter object
-     */
-    Parameter(const Parameter& parameter) {
-        *this = parameter;
-    }
-
-    /**
-     * @brief Constructor creates parameter with object
-     *
-     * @tparam T Parameter type
-     * @tparam U Identity type-transformation
-     * @param parameter object
-     */
-    template <class T,
-              typename = typename std::enable_if<!std::is_same<typename std::decay<T>::type, Parameter>::value &&
-                                                 !std::is_abstract<typename std::decay<T>::type>::value>::type>
-    Parameter(T&& parameter) {
-        static_assert(!std::is_same<typename std::decay<T>::type, Parameter>::value, "To prevent recursion");
-        ptr = new RealData<typename std::decay<T>::type>(std::forward<T>(parameter));
-    }
-
-    /**
-     * @brief Constructor creates string parameter from char *
-     *
-     * @param str char array
-     */
-    Parameter(const char* str) : Parameter(std::string(str)) {}
-
-    /**
-     * @brief Destructor
-     */
-    virtual ~Parameter();
-
-    /**
-     * Copy operator for Parameter
-     * @param parameter Parameter object
-     * @return Parameter
-     */
-    Parameter& operator=(const Parameter& parameter) {
-        if (this == &parameter) {
-            return *this;
-        }
-        clear();
-        if (!parameter.empty())
-            ptr = parameter.ptr->copy();
-        return *this;
-    }
-
-    /**
-     * Remove a value from parameter
-     */
-    void clear() {
-        delete ptr;
-        ptr = nullptr;
-    }
-
-    /**
-     * Checks that parameter contains a value
-     * @return false if parameter contains a value else false
-     */
-    bool empty() const noexcept {
-        return nullptr == ptr;
-    }
-
-    /**
-     * Checks the type of value
-     * @tparam T Type of value
-     * @return true if type of value is correct
-     */
-    template <class T>
-    bool is() const {
-        return empty() ? false : ptr->is(typeid(T));
-    }
-
-    /**
-     * Dynamic cast to specified type
-     * @tparam T type
-     * @return casted object
-     */
-    template <typename T>
-    T&& as() && {
-        return std::move(dyn_cast<T>(ptr));
-    }
-
-    /**
-     * Dynamic cast to specified type
-     * @tparam T type
-     * @return casted object
-     */
-    template <class T>
-    T& as() & {
-        return dyn_cast<T>(ptr);
-    }
-    /**
-     * Dynamic cast to specified type
-     * @tparam T type
-     * @return casted object
-     */
-    template <class T>
-    const T& as() const& {
-        return dyn_cast<T>(ptr);
-    }
-
-    /**
-     * Dynamic cast to specified type
-     * @tparam T type
-     * @return casted object
-     */
-    template <class T>
-    operator T &&() && {
-        return std::move(dyn_cast<typename std::remove_cv<T>::type>(ptr));
-    }
-
-    /**
-     * Dynamic cast to specified type
-     * @tparam T type
-     * @return casted object
-     */
-    template <class T>
-    operator T&() & {
-        return dyn_cast<typename std::remove_cv<T>::type>(ptr);
-    }
-
-    /**
-     * Dynamic cast to specified type
-     * @tparam T type
-     * @return casted object
-     */
-    template <class T>
-    operator const T&() const& {
-        return dyn_cast<typename std::remove_cv<T>::type>(ptr);
-    }
-
-    /**
-     * Dynamic cast to specified type
-     * @tparam T type
-     * @return casted object
-     */
-    template <class T>
-    operator T&() const& {
-        return dyn_cast<typename std::remove_cv<T>::type>(ptr);
-    }
-
-    /**
-     * @brief The comparison operator for the Parameter
-     *
-     * @param rhs object to compare
-     * @return true if objects are equal
-     */
-    bool operator==(const Parameter& rhs) const {
-        return *ptr == *(rhs.ptr);
-    }
-    /**
-     * @brief The comparison operator for the Parameter
-     *
-     * @param rhs object to compare
-     * @return true if objects aren't equal
-     */
-    bool operator!=(const Parameter& rhs) const {
-        return !(*this == rhs);
-    }
-
-    /**
-     * @brief Prints underlying object to the given output stream.
-     * Uses operator<< if it is defined, leaves stream unchanged otherwise.
-     * In case of empty parameter or nullptr stream immediately returns.
-     *
-     * @param object Object to be printed to the given output stream.
-     * @param stream Output stream object will be printed to.
-     */
-    friend void PrintTo(const Parameter& object, std::ostream* stream) {
-        if (object.empty() || !stream) {
-            return;
-        }
-        object.ptr->print(*stream);
-    }
-
-private:
-    template <class T, class EqualTo>
-    struct CheckOperatorEqual {
-        template <class U, class V>
-        static auto test(U*) -> decltype(std::declval<U>() == std::declval<V>()) {
-            return false;
-        }
-
-        template <typename, typename>
-        static auto test(...) -> std::false_type {
-            return {};
-        }
-
-        using type = typename std::is_same<bool, decltype(test<T, EqualTo>(nullptr))>::type;
-    };
-
-    template <class T, class EqualTo = T>
-    struct HasOperatorEqual : CheckOperatorEqual<T, EqualTo>::type {};
-
-    template <class T, class U>
-    struct CheckOutputStreamOperator {
-        template <class V, class W>
-        static auto test(W*) -> decltype(std::declval<V&>() << std::declval<W>(), std::true_type()) {
-            return {};
-        }
-
-        template <typename, typename>
-        static auto test(...) -> std::false_type {
-            return {};
-        }
-
-        using type = typename std::is_same<std::true_type, decltype(test<T, U>(nullptr))>::type;
-    };
-
-    template <class T>
-    struct HasOutputStreamOperator : CheckOutputStreamOperator<std::ostream, T>::type {};
-
-    struct Any {
-#ifdef __ANDROID__
-        virtual ~Any();
-#else
-        virtual ~Any() = default;
-#endif
-        virtual bool is(const std::type_info&) const = 0;
-        virtual Any* copy() const = 0;
-        virtual bool operator==(const Any& rhs) const = 0;
-        virtual void print(std::ostream&) const = 0;
-    };
-
-    template <class T>
-    struct RealData : Any, std::tuple<T> {
-        using std::tuple<T>::tuple;
-
-        bool is(const std::type_info& id) const override {
-            return id == typeid(T);
-        }
-        Any* copy() const override {
-            return new RealData{get()};
-        }
-
-        T& get() & {
-            return std::get<0>(*static_cast<std::tuple<T>*>(this));
-        }
-
-        const T& get() const& {
-            return std::get<0>(*static_cast<const std::tuple<T>*>(this));
-        }
-
-        template <class U>
-        typename std::enable_if<!HasOperatorEqual<U>::value, bool>::type equal(const Any& left, const Any& rhs) const {
-            throw ov::Exception("Parameter doesn't contain equal operator");
-        }
-
-        template <class U>
-        typename std::enable_if<HasOperatorEqual<U>::value, bool>::type equal(const Any& left, const Any& rhs) const {
-            return dyn_cast<U>(&left) == dyn_cast<U>(&rhs);
-        }
-
-        bool operator==(const Any& rhs) const override {
-            return rhs.is(typeid(T)) && equal<T>(*this, rhs);
-        }
-
-        template <class U, typename std::enable_if<!HasOutputStreamOperator<U>::value, bool>::type = true>
-        void print(std::ostream& stream, const U& object) const {}
-
-        template <class U, typename std::enable_if<HasOutputStreamOperator<U>::value, bool>::type = true>
-        void print(std::ostream& stream, const U& object) const {
-            stream << object;
-        }
-
-        void print(std::ostream& stream) const override {
-            print<T>(stream, get());
-        }
-    };
-
-    template <typename T>
-    static T& dyn_cast(Any* obj) {
-        OPENVINO_ASSERT(obj != nullptr, "Parameter is empty!");
-        return dynamic_cast<RealData<T>&>(*obj).get();
-    }
-
-    template <typename T>
-    static const T& dyn_cast(const Any* obj) {
-        OPENVINO_ASSERT(obj != nullptr, "Parameter is empty!");
-        return dynamic_cast<const RealData<T>&>(*obj).get();
-    }
-
-    Any* ptr = nullptr;
-};
-
-/**
- * @brief An std::map object containing parameters
- */
-using ParamMap = std::map<std::string, Parameter>;
-
-#ifdef __ANDROID__
-extern template struct OPENVINO_RUNTIME_API ov::runtime::Parameter::RealData<int>;
-extern template struct OPENVINO_RUNTIME_API ov::runtime::Parameter::RealData<bool>;
-extern template struct OPENVINO_RUNTIME_API ov::runtime::Parameter::RealData<float>;
-extern template struct OPENVINO_RUNTIME_API ov::runtime::Parameter::RealData<uint32_t>;
-extern template struct OPENVINO_RUNTIME_API ov::runtime::Parameter::RealData<std::string>;
-extern template struct OPENVINO_RUNTIME_API ov::runtime::Parameter::RealData<unsigned long>;
-extern template struct OPENVINO_RUNTIME_API ov::runtime::Parameter::RealData<std::vector<int>>;
-extern template struct OPENVINO_RUNTIME_API ov::runtime::Parameter::RealData<std::vector<std::string>>;
-extern template struct OPENVINO_RUNTIME_API ov::runtime::Parameter::RealData<std::vector<unsigned long>>;
-extern template struct OPENVINO_RUNTIME_API ov::runtime::Parameter::RealData<std::tuple<unsigned int, unsigned int>>;
-extern template struct OPENVINO_RUNTIME_API
-    ov::runtime::Parameter::RealData<std::tuple<unsigned int, unsigned int, unsigned int>>;
-#endif
-
-}  // namespace runtime
-
-}  // namespace ov
--- a/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.cpp
+++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.cpp
@ -113,11 +113,12 @@ std::shared_ptr<MemoryDesc> MemoryDescUtils::makeDummyDesc(const MemoryDesc &des
 }

 Shape MemoryDescUtils::makeDummyShape(const Shape &shape, Dim dummyVal) {
+    const auto& minDims = shape.getMinDims();
    const auto& maxDims = shape.getMaxDims();
    const auto& dims = shape.getDims();
    VectorDims dummyDims(dims.size());
    for (size_t i = 0; i < dims.size(); ++i) {
-        dummyDims[i] = dims[i] == Shape::UNDEFINED_DIM ? std::min(maxDims[i], dummyVal) : dims[i];
+        dummyDims[i] = dims[i] == Shape::UNDEFINED_DIM ? std::min(maxDims[i], std::max(minDims[i], dummyVal)) : dims[i];
    }
    return Shape(dummyDims);
 }
--- a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
@ -22,11 +22,15 @@ impl_desc_type MKLDNNPlugin::parse_impl_name(std::string impl_desc_name) {

    SEARCH_WORD(ref);
    SEARCH_WORD(jit);
-    SEARCH_WORD(gemm);
+    SEARCH_WORD(brgconv);
+    SEARCH_WORD(brgemm);
+    if ((res & impl_desc_type::brgemm) != impl_desc_type::brgemm)
+        SEARCH_WORD(gemm);
    SEARCH_WORD(blas);
    SEARCH_WORD(sse42);
    SEARCH_WORD_2(sse41, sse42);
    SEARCH_WORD(avx2);
+    SEARCH_WORD(amx);
    SEARCH_WORD(avx512);
    SEARCH_WORD(any);
    SEARCH_WORD(_1x1);
@ -82,6 +86,28 @@ const char* MKLDNNPlugin::impl_type_to_string(impl_desc_type type) {
    CASE(jit_avx_dw);
    CASE(jit_sse42_dw);
    CASE(jit_uni_dw);
+    CASE(jit_avx512_amx);
+    CASE(jit_avx512_amx_1x1);
+    CASE(jit_avx512_amx_dw);
+    CASE(brgconv_avx512);
+    CASE(brgconv_avx2);
+    CASE(brgconv_avx);
+    CASE(brgconv_sse42);
+    CASE(brgconv_uni);
+    CASE(brgconv_avx512_amx);
+    CASE(brgconv_avx512_1x1);
+    CASE(brgconv_avx2_1x1);
+    CASE(brgconv_avx_1x1);
+    CASE(brgconv_sse42_1x1);
+    CASE(brgconv_uni_1x1);
+    CASE(brgconv_avx512_amx_1x1);
+    CASE(brgemm_avx512);
+    CASE(brgemm_avx2);
+    CASE(brgemm_avx);
+    CASE(brgemm_sse42);
+    CASE(brgemm_uni);
+    CASE(brgemm_avx512_amx);
+
 #undef CASE
    return "unknown";
 }
--- a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.h
@ -12,25 +12,29 @@ enum impl_desc_type {
    unknown = 0x00000000,
    undef,
    // Optimization approach
-    simple = 1<<6,
-    ref    = 1<<7,
-    jit    = 1<<8,
-    gemm   = 1<<9,
+    simple  = 1<<6,
+    ref     = 1<<7,
+    jit     = 1<<8,
+    gemm    = 1<<9,
+    brgconv = 1<<10,
+    brgemm  = 1<<11,
    // CPU version
-    sse42  = 1<<10,
-    avx    = 1<<11,
-    avx2   = 1<<12,
-    avx512 = 1<<13,
-    blas   = 1<<14,
-    any    = 1<<15,
-    uni    = 1<<16,
+    sse42  = 1<<12,
+    avx    = 1<<13,
+    avx2   = 1<<14,
+    avx512 = 1<<15,
+    amx    = 1<<16,
+    blas   = 1<<17,
+    any    = 1<<18,
+    uni    = 1<<19,
    // Other specificator
-    _1x1    = 1<<17,
-    _dw     = 1<<18,
+    _1x1    = 1<<20,
+    _dw     = 1<<21,
    // Other info
-    reorder = 1<<19,
+    reorder = 1<<22,
    // winograd
-    winograd = 1<<20,
+    winograd = 1<<23,
+
    // real types
    ref_any             = ref  | any,

@ -49,18 +53,42 @@ enum impl_desc_type {
    jit_avx             = jit  | avx,
    jit_sse42           = jit  | sse42,
    jit_uni             = jit  | uni,
+    jit_avx512_amx      = jit  | avx512 | amx,

    jit_avx512_1x1      = jit  | avx512 | _1x1,
    jit_avx2_1x1        = jit  | avx2   | _1x1,
    jit_avx_1x1         = jit  | avx    | _1x1,
    jit_sse42_1x1       = jit  | sse42  | _1x1,
    jit_uni_1x1         = jit  | uni    | _1x1,
+    jit_avx512_amx_1x1  = jit  | avx512 | amx | _1x1,

    jit_avx512_dw       = jit  | avx512 | _dw,
    jit_avx2_dw         = jit  | avx2   | _dw,
    jit_avx_dw          = jit  | avx    | _dw,
    jit_sse42_dw        = jit  | sse42  | _dw,
    jit_uni_dw          = jit  | uni    | _dw,
+    jit_avx512_amx_dw   = jit  | avx512 | amx | _dw,
+
+    brgconv_avx512      = brgconv  | avx512,
+    brgconv_avx2        = brgconv  | avx2,
+    brgconv_avx         = brgconv  | avx,
+    brgconv_sse42       = brgconv  | sse42,
+    brgconv_uni         = brgconv  | uni,
+    brgconv_avx512_amx  = brgconv  | avx512 | amx,
+
+    brgconv_avx512_1x1      = brgconv  | avx512 | _1x1,
+    brgconv_avx2_1x1        = brgconv  | avx2 | _1x1,
+    brgconv_avx_1x1         = brgconv  | avx | _1x1,
+    brgconv_sse42_1x1       = brgconv  | sse42 | _1x1,
+    brgconv_uni_1x1         = brgconv  | uni | _1x1,
+    brgconv_avx512_amx_1x1  = brgconv  | avx512 | amx | _1x1,
+
+    brgemm_avx512      = brgemm  | avx512,
+    brgemm_avx2        = brgemm  | avx2,
+    brgemm_avx         = brgemm  | avx,
+    brgemm_sse42       = brgemm  | sse42,
+    brgemm_uni         = brgemm  | uni,
+    brgemm_avx512_amx  = brgemm  | avx512 | amx,
 };

 const char * impl_type_to_string(impl_desc_type type);
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@ -17,6 +17,7 @@
 #include "nodes/mkldnn_mvn_node.h"
 #include <nodes/mkldnn_transpose_node.h>
 #include "nodes/mkldnn_interpolate_node.h"
+#include "nodes/mkldnn_reduce_node.h"
 #include "nodes/mkldnn_input_node.h"
 #include "nodes/mkldnn_rnn.h"
 #include "nodes/common/cpu_convert.h"
@ -137,6 +138,10 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
    FuseNormalizeL2AndSimpleOperation(graph);
    graph.RemoveDroppedNodes();

+    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseReduceAndSimpleOperation");
+    FuseReduceAndSimpleOperation(graph);
+    graph.RemoveDroppedNodes();
+
    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseEltwiseAndSimple");
    FuseEltwiseAndSimple(graph);
    graph.RemoveDroppedNodes();
@ -1352,6 +1357,46 @@ void MKLDNNGraphOptimizer::FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph)
    }
 }

+void MKLDNNGraphOptimizer::FuseReduceAndSimpleOperation(MKLDNNGraph &graph) {
+    auto& graphNodes = graph.GetNodes();
+
+    auto isSuitableParentNode = [](MKLDNNNodePtr node) {
+        return node->getType() == Reduce && node->getChildEdges().size() == 1;
+    };
+
+    auto parent = graphNodes.begin();
+    while (parent != graphNodes.end()) {
+        auto parentNode = *parent;
+        if (!isSuitableParentNode(parentNode)) {
+            parent++;
+            continue;
+        }
+
+        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
+        if (!parentNode->canFuse(childNode)) {
+            parent++;
+            continue;
+        }
+
+        childNode->fuseInto(parentNode);
+
+        if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) {
+            auto parentEdges = childNode->parentEdges;
+            for (auto &parentEdge : parentEdges) {
+                auto p_edge = parentEdge.lock();
+                if (p_edge == nullptr)
+                    IE_THROW() << "Cannot get parent edge " << childNode->getName();
+                if (p_edge->getParent()->getType() == Reduce)
+                    continue;
+
+                graph.RemoveEdge(p_edge);
+            }
+        }
+
+        graph.DropNode(childNode);
+    }
+}
+
 void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
    auto& graphNodes = graph.GetNodes();

@ -1918,7 +1963,7 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) {
 void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) {
    auto& graphNodes = graph.GetNodes();

-    auto isSutableParentNode = [](MKLDNNNodePtr node) {
+    auto isSuitableParentNode = [](MKLDNNNodePtr node) {
        if (node->type != RNNSeq)
            return false;
        auto rnnNode = std::dynamic_pointer_cast<MKLDNNRNN>(node);
@ -1927,7 +1972,7 @@ void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) {

    for (size_t i = 0; i < graphNodes.size(); i++) {
        auto parentNode = graphNodes[i];
-        if (!isSutableParentNode(parentNode)) {
+        if (!isSuitableParentNode(parentNode)) {
            continue;
        }

--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
@ -32,6 +32,7 @@ private:
    void FuseMVNAndSimpleOperation(MKLDNNGraph &graph);
    void FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph);
    void FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph);
+    void FuseReduceAndSimpleOperation(MKLDNNGraph &graph);

    void DropDoubleReorders(MKLDNNGraph& graph);
    void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph);
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@ -391,9 +391,12 @@ std::string MKLDNNNode::getPrimitiveDescriptorType() {
    SEARCH_TYPE(reorder);
    SEARCH_TYPE(jit);
    SEARCH_TYPE(gemm);
+    SEARCH_TYPE(brgconv);
+    SEARCH_TYPE(brgemm);
    SEARCH_TYPE(ref);

    SEARCH_TYPE(avx512);
+    SEARCH_TYPE(amx);
    SEARCH_TYPE(avx2);
    SEARCH_TYPE(avx);
    SEARCH_TYPE(sse42);
@ -521,10 +524,17 @@ void MKLDNNNode::redefineOutputMemory(const std::vector<VectorDims> &newOutputSh
    }
    for (size_t i = 0; i < outputShapes.size(); i++) {
        const auto edges = getChildEdgesAtPort(i);
-        const auto memDesc = getBaseMemDescAtOutputPort(i)->cloneWithNewDims(newOutputShapes[i]);
+
+        // avoid 0D shape incompatible
+        auto newOutputShape = newOutputShapes[i];
+        if (newOutputShape.empty()) {
+            newOutputShape.push_back(1);
+        }
+
+        const auto memDesc = getBaseMemDescAtOutputPort(i)->cloneWithNewDims(newOutputShape);

        const auto &currDesc = edges[0]->getMemory().getDesc();
-        if (currDesc.getShape().isStatic() && currDesc.getShape().getStaticDims() == newOutputShapes[i])
+        if (currDesc.getShape().isStatic() && currDesc.getShape().getStaticDims() == newOutputShape)
            continue;

        // this path neccesary if there are several edges per one port
@ -838,6 +848,14 @@ void MKLDNNNode::cleanup() {
 const std::vector<impl_desc_type>& MKLDNNNode::getPrimitivesPriority() {
    std::vector<impl_desc_type> priorities = {
            impl_desc_type::unknown,
+            impl_desc_type::brgconv_avx512_amx_1x1,
+            impl_desc_type::brgconv_avx512_amx,
+            impl_desc_type::jit_avx512_amx_dw,
+            impl_desc_type::jit_avx512_amx_1x1,
+            impl_desc_type::jit_avx512_amx,
+            // Brgconv kernels disabled in order to prevent perf degradations on non AMX HW
+//            impl_desc_type::brgconv_avx512_1x1,
+//            impl_desc_type::brgconv_avx512,
            impl_desc_type::jit_uni_dw,
            impl_desc_type::jit_uni_1x1,
            impl_desc_type::jit_uni,
@ -1285,6 +1303,19 @@ bool MKLDNNNode::inputShapesDefined() const {
    return true;
 }

+bool MKLDNNNode::outputShapesDefined() const {
+    for (size_t i = 0; i < outputShapes.size(); i++) {
+        if (!getChildEdgesAtPort(i)[0]->getMemory().getDesc().isDefined()) {
+            return false;
+        }
+    }
+    return true;
+}
+
+bool MKLDNNNode::shapesDefined() const {
+    return inputShapesDefined() && outputShapesDefined();
+}
+
 bool MKLDNNNode::needPrepareParams() const {
    return inputShapesModified();
 }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@ -707,6 +707,8 @@ protected:
    bool isDynamic = false;

    bool inputShapesDefined() const;
+    bool outputShapesDefined() const;
+    bool shapesDefined() const;
    void updateLastInputDims();

    bool inputShapesModified() const;
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@ -35,6 +35,7 @@
 #include <transformations/common_optimizations/nop_elimination.hpp>
 #include <transformations/common_optimizations/wrap_interpolate_into_transposes.hpp>
 #include <transformations/common_optimizations/transpose_sinking.hpp>
+#include <transformations/op_conversions/convert_broadcast_to_tiles.hpp>
 #include <transformations/op_conversions/convert_depth_to_space.hpp>
 #include <transformations/op_conversions/convert_shuffle_channels3.hpp>
 #include <transformations/op_conversions/convert_space_to_depth.hpp>
@ -70,6 +71,7 @@
 #include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
 #include <transformations/smart_reshape/matmul_sr.hpp>
 #include <transformations/op_conversions/convert_minimum_to_power_and_max.hpp>
+#include <transformations/op_conversions/convert_reduce_to_pooling.hpp>
 #include <transformations/convert_precision.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/rt_info/fused_names_attribute.hpp>
@ -313,7 +315,35 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
                for (size_t i = 0; i < node->get_output_size(); i++) {
                    const auto outputs = node->get_output_target_inputs(i);
                    for (const auto &out : outputs) {
-                        if (out.get_node()->get_type_info() != ngraph::op::v0::Result::get_type_info_static()) {
+                        if (!ngraph::op::is_output(out.get_node())) {
+                            return false;
+                        }
+                    }
+                }
+                return true;
+            });
+
+    // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
+    pass_config->set_callback<ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE>(
+            [](const_node_ptr &node) -> bool {
+                for (size_t i = 0; i < node->get_output_size(); i++) {
+                    const auto outputs = node->get_output_target_inputs(i);
+                    for (const auto &out : outputs) {
+                        if (!ngraph::op::is_output(out.get_node())) {
+                            return false;
+                        }
+                    }
+                }
+                return true;
+            });
+
+    // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS
+    pass_config->set_callback<ngraph::pass::ConvertMatrixNmsToMatrixNmsIE>(
+            [](const_node_ptr &node) -> bool {
+                for (size_t i = 0; i < node->get_output_size(); i++) {
+                    const auto outputs = node->get_output_target_inputs(i);
+                    for (const auto &out : outputs) {
+                        if (!ngraph::op::is_output(out.get_node())) {
                            return false;
                        }
                    }
@ -337,6 +367,10 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
    pass_config->disable<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
    pass_config->disable<ngraph::pass::ConvertGather7ToGather1>();
    pass_config->disable<ngraph::pass::ConvertMinimum>();
+    pass_config->disable<ngraph::pass::ConvertBroadcastToTiles>();
+    pass_config->disable<ngraph::pass::ConvertReduceMeanToPooling>();
+    pass_config->disable<ngraph::pass::ConvertReduceMaxToPooling>();
+    pass_config->disable<ngraph::pass::ConvertReduceSumToPooling>();

    pass_config->enable<ngraph::pass::NormalizeL2Decomposition>();
    pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
@ -32,7 +32,6 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphF
    manager.register_pass<Reshape1DMaxPool>();
    manager.register_pass<ConvertMatMulToFC>();
    manager.register_pass<AlignMatMulInputRanks>();
-    manager.register_pass<ConvertBroadcastToTiles>();
    manager.register_pass<ConvertTileToSeqTiles>();
    manager.register_pass<FullyConnectedBiasFusion>();
    manager.register_pass<ReshapeFullyConnected>();
--- a/inference-engine/src/mkldnn_plugin/nodes/common/tile_broadcast_utils.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/tile_broadcast_utils.cpp
@ -0,0 +1,243 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "tile_broadcast_utils.h"
+
+#include "cpu_memcpy.h"
+#include "ie_parallel.hpp"
+#include <memory_desc/cpu_memory_desc_utils.h>
+#include "memory_desc/dnnl_blocked_memory_desc.h"
+
+using namespace InferenceEngine;
+using namespace MKLDNNPlugin;
+
+VectorDims TileBroadcastCommon::calculateDenseStrides(const VectorDims &dims) {
+    VectorDims strides(dims.size(), 1);
+
+    for (int i = strides.size() - 2; i >= 0; i--) {
+        strides[i] = strides[i + 1] * dims[i + 1];
+    }
+
+    return strides;
+}
+
+void TileBroadcastCommon::fillOptimizedDimsAndSrcStrides(const VectorDims& srcBlockedDims, const VectorDims& blockedRepeats,
+        VectorDims& optimizedDims, VectorDims& optimizedSrcStrides) {
+    optimizedDims.clear();
+    optimizedSrcStrides.clear();
+    VectorDims srcBlockedStrides = calculateDenseStrides(srcBlockedDims);
+
+    for (int i = 0; i < srcBlockedDims.size(); i++) {
+        optimizedDims.push_back(blockedRepeats[i]);
+        optimizedDims.push_back(srcBlockedDims[i]);
+        optimizedSrcStrides.push_back(0);
+        optimizedSrcStrides.push_back(srcBlockedStrides[i]);
+    }
+
+    int i = 1;
+    while (i < optimizedDims.size() - 1) {
+        if (optimizedDims[i] == 1) {
+            optimizedDims[i + 1] *= optimizedDims[i - 1];
+            optimizedDims.erase(optimizedDims.begin() + i - 1, optimizedDims.begin() + i + 1);
+            optimizedSrcStrides.erase(optimizedSrcStrides.begin() + i - 1, optimizedSrcStrides.begin() + i + 1);
+        } else {
+            i++;
+        }
+    }
+
+    if (optimizedDims[0] == 1 && optimizedDims.size() > 1) {
+        optimizedDims.erase(optimizedDims.begin());
+        optimizedSrcStrides.erase(optimizedSrcStrides.begin());
+    }
+
+    if (optimizedDims[optimizedDims.size() - 1] == 1 && optimizedDims.size() > 1) {
+        optimizedDims.erase(optimizedDims.end() - 1);
+        optimizedSrcStrides.erase(optimizedSrcStrides.end() - 1);
+    }
+}
+
+bool TileBroadcastCommon::canBeExecutedInBlockedLayout(VectorDims srcBlockedDims, VectorDims blockedRepeats,
+        const size_t elemsInBlock) {
+    if (srcBlockedDims.empty() || blockedRepeats.empty() || elemsInBlock == 0lu || srcBlockedDims[1] == Shape::UNDEFINED_DIM ||
+            (blockedRepeats[1] != 1 && srcBlockedDims[1] % elemsInBlock != 0))
+        return false;
+
+    srcBlockedDims[1] = div_up(srcBlockedDims[1], elemsInBlock);
+    srcBlockedDims.push_back(elemsInBlock);
+    blockedRepeats.push_back(1);
+
+    VectorDims optimizedDims, optimizedSrcStrides;
+    fillOptimizedDimsAndSrcStrides(srcBlockedDims, blockedRepeats, optimizedDims, optimizedSrcStrides);
+
+    constexpr size_t maxNDims = 6lu;
+    return optimizedDims.size() <= maxNDims;
+}
+
+bool TileBroadcastCommon::canBeExecutedInNSPCLayout(VectorDims srcBlockedDims, VectorDims blockedRepeats) {
+    srcBlockedDims.push_back(srcBlockedDims[1]);
+    srcBlockedDims.erase(srcBlockedDims.begin() + 1);
+    blockedRepeats.push_back(blockedRepeats[1]);
+    blockedRepeats.erase(blockedRepeats.begin() + 1);
+
+    VectorDims optimizedDims, optimizedSrcStrides;
+    fillOptimizedDimsAndSrcStrides(srcBlockedDims, blockedRepeats, optimizedDims, optimizedSrcStrides);
+
+    constexpr size_t maxNDims = 6lu;
+    return optimizedDims.size() <= maxNDims;
+}
+
+std::vector<NodeDesc> TileBroadcastCommon::getSupportedConfigs(const MKLDNNNode *node) {
+    std::vector<NodeDesc> supportedPrimitiveDescriptors;
+    auto precision = node->getOriginalInputPrecisionAtPort(0);
+    auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
+
+    const auto& srcDims = node->getInputShapeAtPort(0).getDims();
+    const auto& inDataShape = node->getInputShapeAtPort(0);
+    size_t outDataShapeRank = node->getOutputShapeAtPort(0).getRank();
+
+    NodeConfig config;
+    if (repeats.size() != outDataShapeRank && !repeats.empty())
+        IE_THROW() << node->getTypeStr() << " node with name " << node->getName() << " has incorrect Repeats vector."
+                "Repeats rank must be equal to output shape rank. Repeats rank: " << repeats.size() << ", output shape rank: " << outDataShapeRank;
+
+    config.dynBatchSupport = false;
+    config.inConfs.resize(node->getParentEdges().size());
+    config.inConfs[0].inPlace = -1;
+    config.inConfs[0].constant = constMap[0];
+    config.inConfs[1].inPlace = -1;
+    config.inConfs[1].constant = constMap[1];
+    config.inConfs[1].desc = std::make_shared<CpuBlockedMemoryDesc>(Precision::I32, node->getInputShapeAtPort(1));
+    if (config.inConfs.size() == 3) {
+        config.inConfs[2].inPlace = -1;
+        config.inConfs[2].constant = constMap[2];
+        config.inConfs[2].desc = std::make_shared<CpuBlockedMemoryDesc>(Precision::I32, node->getInputShapeAtPort(2));
+    }
+
+    config.outConfs.resize(node->getChildEdges().size());
+
+    auto pushDesc = [&](mkldnn::memory::format_tag inFormat, mkldnn::memory::format_tag outFormat) {
+        config.inConfs[0].desc = std::make_shared<DnnlBlockedMemoryDesc>(node->getInputShapeAtPort(0), dataType, inFormat);
+        for (int i = 0; i < config.outConfs.size(); i++) {
+            config.outConfs[i].inPlace = -1;
+            config.outConfs[i].constant = false;
+            config.outConfs[i].desc = std::make_shared<DnnlBlockedMemoryDesc>(node->getOutputShapeAtPort(0), dataType, outFormat);
+        }
+        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref});
+    };
+
+    if (!repeats.empty() && inDataShape.getRank() == outDataShapeRank && (outDataShapeRank == 4 || outDataShapeRank == 5)) {
+        if (canBeExecutedInBlockedLayout(srcDims, repeats, 16)) {
+            if (outDataShapeRank == 4) {
+                pushDesc(mkldnn::memory::format_tag::nChw16c, mkldnn::memory::format_tag::nChw16c);
+            } else {
+                pushDesc(mkldnn::memory::format_tag::nCdhw16c, mkldnn::memory::format_tag::nCdhw16c);
+            }
+        }
+        if (canBeExecutedInBlockedLayout(srcDims, repeats, 8)) {
+            if (outDataShapeRank == 4) {
+                pushDesc(mkldnn::memory::format_tag::nChw8c, mkldnn::memory::format_tag::nChw8c);
+            } else {
+                pushDesc(mkldnn::memory::format_tag::nCdhw8c, mkldnn::memory::format_tag::nCdhw8c);
+            }
+        }
+        if (canBeExecutedInNSPCLayout(srcDims, repeats)) {
+            if (outDataShapeRank == 4) {
+                pushDesc(mkldnn::memory::format_tag::nhwc, mkldnn::memory::format_tag::nhwc);
+            } else {
+                pushDesc(mkldnn::memory::format_tag::ndhwc, mkldnn::memory::format_tag::ndhwc);
+            }
+        }
+    }
+
+    auto inFmt = MKLDNNExtensionUtils::GetPlainFormatByRank(inDataShape.getRank());
+    auto outFmt = MKLDNNExtensionUtils::GetPlainFormatByRank(outDataShapeRank);
+    if (inFmt == mkldnn::memory::format_tag::undef || outFmt == mkldnn::memory::format_tag::undef) {
+        config.inConfs[0].desc = std::make_shared<CpuBlockedMemoryDesc>(precision, node->getInputShapeAtPort(0));
+        for (int i = 0; i < config.outConfs.size(); i++) {
+            config.outConfs[i].inPlace = -1;
+            config.outConfs[i].constant = false;
+            config.outConfs[i].desc = std::make_shared<CpuBlockedMemoryDesc>(precision, node->getOutputShapeAtPort(i));
+        }
+        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref});
+    } else {
+        pushDesc(inFmt, outFmt);
+    }
+
+    return supportedPrimitiveDescriptors;
+}
+
+bool TileBroadcastCommon::prepareOptimizedParams(const MKLDNNNode *node, VectorDims& srcBlockedDims, VectorDims& dstBlockedDims) {
+    while (srcBlockedDims.size() < dstBlockedDims.size()) {
+        srcBlockedDims.insert(srcBlockedDims.begin(), 1);
+    }
+
+    VectorDims blockedRepeats = repeats;
+    // for nC(d)hw16c and nC(d)hw8c layouts
+    while (blockedRepeats.size() < dstBlockedDims.size()) {
+        blockedRepeats.push_back(1);
+    }
+    // for NSPC layouts
+    if (node->getBaseMemDescAtInputPort(0)->hasLayoutType(LayoutType::nspc) && one_of(node->getBaseMemDescAtInputPort(0)->getShape().getRank(), 4, 5)) {
+        blockedRepeats.push_back(blockedRepeats[1]);
+        blockedRepeats.erase(blockedRepeats.begin() + 1);
+    }
+
+    VectorDims optimizedDims, optimizedSrcStrides;
+    fillOptimizedDimsAndSrcStrides(srcBlockedDims, blockedRepeats, optimizedDims, optimizedSrcStrides);
+
+    constexpr size_t maxNDims = 6lu;
+    if (optimizedDims.size() > maxNDims)
+        return false;
+
+    while (optimizedDims.size() < maxNDims) {
+        optimizedDims.insert(optimizedDims.begin(), 1);
+        optimizedSrcStrides.insert(optimizedSrcStrides.begin(), 1);
+    }
+
+    VectorDims optimizedDstStrides = calculateDenseStrides(optimizedDims);
+
+    size_t dataSize = node->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size();
+    for (int i = 0; i < optimizedDims.size(); i++) {
+        optimizedSrcStrides[i] *= dataSize;
+        optimizedDstStrides[i] *= dataSize;
+    }
+
+    optimizedParams.dims = optimizedDims;
+    optimizedParams.srcStrides = optimizedSrcStrides;
+    optimizedParams.dstStrides = optimizedDstStrides;
+    optimizedParams.copySize = optimizedDims[5] * dataSize;
+
+    return true;
+}
+
+void TileBroadcastCommon::optimizedExecute(const MKLDNNMemoryPtr& srcMemory, const MKLDNNMemoryPtr& dstMemory) {
+    auto srcData = reinterpret_cast<const char *>(srcMemory->GetPtr());
+    auto dstData = reinterpret_cast<char *>(dstMemory->GetPtr());
+
+    if (optimizedParams.srcStrides[5] == 0) {
+        parallel_for5d(optimizedParams.dims[0], optimizedParams.dims[1], optimizedParams.dims[2], optimizedParams.dims[3], optimizedParams.dims[4],
+                [&](int i0, int i1, int i2, int i3, int i4) {
+            auto srcData2 = srcData + (i0 * optimizedParams.srcStrides[0] + i1 * optimizedParams.srcStrides[1] +
+                                                 i2 * optimizedParams.srcStrides[2] + i3 * optimizedParams.srcStrides[3] +
+                                                 i4 * optimizedParams.srcStrides[4]);
+            auto dstData2 = dstData + (i0 * optimizedParams.dstStrides[0] + i1 * optimizedParams.dstStrides[1] +
+                                           i2 * optimizedParams.dstStrides[2] + i3 * optimizedParams.dstStrides[3] +
+                                           i4 * optimizedParams.dstStrides[4]);
+            for (int i = 0; i < optimizedParams.dims[5]; i++) {
+                cpu_memcpy(dstData2 + i * optimizedParams.dstStrides[5], srcData2, optimizedParams.dstStrides[5]);
+            }
+        });
+    } else {
+        parallel_for5d(optimizedParams.dims[0], optimizedParams.dims[1], optimizedParams.dims[2], optimizedParams.dims[3], optimizedParams.dims[4],
+                [&](int i0, int i1, int i2, int i3, int i4) {
+            auto srcData2 = srcData + (i0 * optimizedParams.srcStrides[0] + i1 * optimizedParams.srcStrides[1] +
+                                                 i2 * optimizedParams.srcStrides[2] + i3 * optimizedParams.srcStrides[3] +
+                                                 i4 * optimizedParams.srcStrides[4]);
+            auto dstData2 = dstData + (i0 * optimizedParams.dstStrides[0] + i1 * optimizedParams.dstStrides[1] +
+                                           i2 * optimizedParams.dstStrides[2] + i3 * optimizedParams.dstStrides[3] +
+                                           i4 * optimizedParams.dstStrides[4]);
+            cpu_memcpy(dstData2, srcData2, optimizedParams.copySize);
+        });
+    }
+}
--- a/inference-engine/src/mkldnn_plugin/nodes/common/tile_broadcast_utils.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/tile_broadcast_utils.h
@ -0,0 +1,43 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "mkldnn_node.h"
+
+#include <memory>
+#include <vector>
+
+
+namespace MKLDNNPlugin {
+
+class TileBroadcastCommon {
+protected:
+    static VectorDims calculateDenseStrides(const VectorDims &dims);
+    std::vector<NodeDesc> getSupportedConfigs(const MKLDNNNode *node);
+    bool prepareOptimizedParams(const MKLDNNNode *node, VectorDims& srcBlockedDims, VectorDims& dstBlockedDims);
+
+    void optimizedExecute(const MKLDNNMemoryPtr& srcMemory, const MKLDNNMemoryPtr& dstMemory);
+
+    VectorDims repeats;
+    bool optimizedCase = false;
+    bool constMap[3] = { false };
+    mutable bool needPrepareParamsVar = false;
+
+private:
+    static void fillOptimizedDimsAndSrcStrides(const VectorDims &srcBlockedDims, const VectorDims &blockedRepeats,
+            VectorDims &optimizedDims, VectorDims &optimizedSrcStrides);
+
+    static bool canBeExecutedInBlockedLayout(VectorDims srcDims, VectorDims repeats, const size_t elemsInBlock);
+    static bool canBeExecutedInNSPCLayout(VectorDims srcDims, VectorDims repeats);
+
+    struct {
+        VectorDims dims;
+        VectorDims srcStrides;
+        VectorDims dstStrides;
+        size_t copySize;
+    } optimizedParams;
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp
@ -23,10 +23,6 @@ using namespace mkldnn::impl::cpu::x64;

 bool MKLDNNAdaptivePoolingNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
        if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveAvgPool::get_type_info_static())) {
            auto adaPool = std::dynamic_pointer_cast<const ngraph::opset8::AdaptiveAvgPool>(op);
            if (!adaPool) {
@ -63,6 +59,7 @@ MKLDNNAdaptivePoolingNode::MKLDNNAdaptivePoolingNode(const std::shared_ptr<ngrap
        algorithm = Algorithm::AdaptivePoolingMax;
    }
    spatialDimsCount = getInputShapeAtPort(0).getRank() - 2;
+    spatialDimsValue.resize(spatialDimsCount);
 }

 void MKLDNNAdaptivePoolingNode::getSupportedDescriptors() {
@ -74,12 +71,9 @@ void MKLDNNAdaptivePoolingNode::getSupportedDescriptors() {
    if (getChildEdges().size() != (algorithm == AdaptivePoolingMax ? 2 : 1))
        IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getParentEdges().size();

-    auto parentDims = getInputShapeAtPort(0).getStaticDims();
-    auto childDims = getOutputShapeAtPort(0).getStaticDims();
-
-    spatialDimsCount = parentDims.size() - 2;
+    auto srcRank = getInputShapeAtPort(0).getRank();
    if (!one_of(spatialDimsCount, 1, 2, 3)) {
-        IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getInputShapeAtPort(0).getRank();
+        IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << srcRank;
    }

    if (getInputShapeAtPort(1).getRank() != 1) {
@ -91,6 +85,35 @@ void MKLDNNAdaptivePoolingNode::getSupportedDescriptors() {
    }
 }

+bool MKLDNNAdaptivePoolingNode::needShapeInfer() const {
+    const auto newSpatialDimsPtr = reinterpret_cast<int32_t *>(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr());
+    for (size_t i = 0; i < spatialDimsCount; i++) {
+        if (spatialDimsValue[i] != newSpatialDimsPtr[i])
+            return true;
+    }
+    return MKLDNNNode::needShapeInfer();
+}
+
+std::vector<VectorDims> MKLDNNAdaptivePoolingNode::shapeInfer() const {
+    const auto inputDims = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims();
+    const auto spatialDims = getParentEdgesAtPort(1)[0]->getMemory().GetShape().getStaticDims();
+    const auto inputRank = inputDims.size();
+    const auto spatialDimsSize = spatialDims[0];
+
+    VectorDims outputDims(inputRank);
+    outputDims[0] = inputDims[0];
+    outputDims[1] = inputDims[1];
+    auto newSpatialDimsPtr = reinterpret_cast<int32_t *>(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr());
+    for (size_t i = 0; i < spatialDimsSize; i++) {
+        outputDims[i + 2] = newSpatialDimsPtr[i];
+        spatialDimsValue[i] = newSpatialDimsPtr[i];
+    }
+
+    std::vector<VectorDims> result = {};
+    result.resize(outputShapes.size(), outputDims);
+    return result;
+}
+
 void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() {
    if (!supportedPrimitiveDescriptors.empty())
        return;
@ -104,7 +127,8 @@ void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() {
    config.outConfs.resize((algorithm == Algorithm::AdaptivePoolingAvg ? 1 : 2));

    std::vector<LayoutType> dataFormats{ LayoutType::ncsp };
-    if (getInputShapeAtPort(0).getStaticDims()[1] != 1) {
+    const auto &inDims = getInputShapeAtPort(0).getDims();
+    if (inDims[1] != Shape::UNDEFINED_DIM && inDims[1] != 1) {
        dataFormats.push_back(LayoutType::nspc);
        dataFormats.push_back(LayoutType::nCsp16c);
        dataFormats.push_back(LayoutType::nCsp8c);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h
@ -26,10 +26,17 @@ public:

 private:
    int spatialDimsCount;
+    mutable std::vector<Dim> spatialDimsValue = {};
    InferenceEngine::Precision precision = InferenceEngine::Precision::FP32;
    inline void setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength);

    std::string errorPrefix;
+
+protected:
+    bool needShapeInfer() const override;
+    std::vector<VectorDims> shapeInfer() const override;
+    bool needPrepareParams() const override { return false; };
+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
 };

 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
@ -17,23 +17,27 @@
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;

-bool MKLDNNBroadcastNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool MKLDNNBroadcastNode::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
+        if (!ov::is_type<ov::op::v1::Broadcast>(op)) {
+            errorMessage = "Only Broadcast operations from opset1 are supported.";
            return false;
        }
-        const auto broadcast = std::dynamic_pointer_cast<const ngraph::opset1::Broadcast>(op);
-        if (!broadcast) {
-            errorMessage = "Only opset1 Broadcast operation is supported";
+        if (!one_of(ov::as_type_ptr<const ov::op::v1::Broadcast>(op)->get_broadcast_spec().m_type,
+                ov::op::AutoBroadcastType::NUMPY, ov::op::AutoBroadcastType::EXPLICIT)) {
+            errorMessage = "Only NUMPY and EXPLICIT broadcast types are supported.";
            return false;
        }
-        if (broadcast->get_broadcast_spec() != ngraph::op::AutoBroadcastType::NUMPY) {
-            errorMessage = "Only NUMPY broadcast type is supported";
+        if (op->get_input_partial_shape(TARGET_SHAPE_IDX).is_dynamic() ||
+                (op->get_input_size() > AXES_MAPPING_IDX && op->get_input_partial_shape(AXES_MAPPING_IDX).is_dynamic())) {
+            errorMessage = "Only static shapes are supported for target shape and axes mapping inputs.";
            return false;
        }
-        if (std::dynamic_pointer_cast<const ngraph::opset1::Constant>(broadcast->get_input_node_shared_ptr(BROADCAST_SHAPE)) == nullptr) {
-            errorMessage = "Only const 'shape' input is supported";
+        if (!isDynamicNgraphNode(op) &&
+                (!ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(TARGET_SHAPE_IDX)) ||
+                 (op->get_input_size() > AXES_MAPPING_IDX &&
+                 !ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXES_MAPPING_IDX))))) {
+            errorMessage = "Only constant target shapes and axis mapping inputs are supported for static shapes.";
            return false;
        }
    } catch (...) {
@ -42,91 +46,231 @@ bool MKLDNNBroadcastNode::isSupportedOperation(const std::shared_ptr<const ngrap
    return true;
 }

-MKLDNNBroadcastNode::MKLDNNBroadcastNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
+MKLDNNBroadcastNode::MKLDNNBroadcastNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng,
        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
    std::string errorMessage;
    if (!isSupportedOperation(op, errorMessage)) {
        IE_THROW(NotImplemented) << errorMessage;
    }

-    errorPrefix = "Broadcast node with name '" + op->get_friendly_name() + "'";
-    if (op->get_input_size() != 2 || op->get_output_size() != 1)
-        IE_THROW() << errorPrefix << " has incorrect number of input/output edges! " << op->get_input_size() << "->" << op->get_output_size();
+    errorPrefix = "Broadcast node with name '" + op->get_friendly_name() + "' ";
+    if (op->get_input_size() != 2 && op->get_input_size() != 3)
+        IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size();
+    if (op->get_output_size() == 0)
+        IE_THROW() << errorPrefix << "has no output edges.";

-    SizeVector shape_dims = op->get_input_shape(BROADCAST_SHAPE);
-    if (shape_dims.size() > 1)
-        IE_THROW() << errorPrefix << " has incorrect 'shape' input rank: " << shape_dims.size();
+    auto broadcastOp = ov::as_type_ptr<const ov::op::v1::Broadcast>(op);
+    if (broadcastOp->get_broadcast_spec().m_type == ov::op::AutoBroadcastType::NUMPY) {
+        broadcastType = NUMPY;
+    } else if (broadcastOp->get_broadcast_spec().m_type == ov::op::AutoBroadcastType::EXPLICIT) {
+        if (op->get_input_size() <= AXES_MAPPING_IDX)
+            IE_THROW() << errorPrefix << " and EXPLICIT mode must have tree input edges: " << getParentEdges().size();
+        broadcastType = EXPLICIT;
+    }
+
+    if (ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(TARGET_SHAPE_IDX))) {
+        constMap[TARGET_SHAPE_IDX] = true;
+        targetShape = (ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(TARGET_SHAPE_IDX)))->get_vector<int32_t>();
+    }
+    if (broadcastType == EXPLICIT &&
+                ov::is_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXES_MAPPING_IDX))) {
+        constMap[AXES_MAPPING_IDX] = true;
+        axesMapping = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(AXES_MAPPING_IDX))->get_vector<int32_t>();
+    }
+}
+
+void MKLDNNBroadcastNode::getSupportedDescriptors() {
+    if (!isDynamicNode()) {
+        const auto& srcDims = getInputShapeAtPort(INPUT_DATA_IDX).getDims();
+        repeats.assign(targetShape.begin(), targetShape.end());
+        const auto ndims = repeats.size();
+
+        if (broadcastType == NUMPY) {
+            for (size_t i = 0lu; i < srcDims.size(); i++) {
+                repeats[ndims - 1lu - i] /= srcDims[srcDims.size() - 1lu - i];
+            }
+        } else if (broadcastType == EXPLICIT) {
+            for (size_t i = 0lu; i < axesMapping.size(); i++) {
+                repeats[axesMapping[i]] /= srcDims[i];
+            }
+        }
+        needPrepareParamsVar = true;
+    }
 }

 void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() {
    if (!supportedPrimitiveDescriptors.empty())
        return;

-    Precision prec = getOriginalInputPrecisionAtPort(BROADCAST_INPUT);
+    supportedPrimitiveDescriptors = getSupportedConfigs(this);
+}

-    addSupportedPrimDesc({{LayoutType::ncsp, prec},
-                          {LayoutType::ncsp, Precision::I32}},
-                         {{LayoutType::ncsp, prec}},
-                         impl_desc_type::ref_any);
+void MKLDNNBroadcastNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        if (needPrepareParams())
+            prepareParams();
+        updateLastInputDims();
+    }
+}
+
+bool MKLDNNBroadcastNode::needPrepareParams() const {
+    return needPrepareParamsVar;
+}
+
+void MKLDNNBroadcastNode::prepareParams() {
+    if (!constMap[TARGET_SHAPE_IDX]) {
+        const auto& targetShapeMem = getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory();
+        const int32_t* targetShapeData = reinterpret_cast<const int32_t *>(targetShapeMem.GetPtr());
+        targetShape.assign(targetShapeData, targetShapeData + targetShapeMem.getStaticDims()[0]);
+    }
+    if (broadcastType == EXPLICIT && !constMap[AXES_MAPPING_IDX]) {
+        const auto& axesMapMem = getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory();
+        const int32_t* axesMapData = reinterpret_cast<const int32_t *>(axesMapMem.GetPtr());
+        axesMapping.assign(axesMapData, axesMapData + axesMapMem.getStaticDims()[0]);
+    }
+
+    const auto& srcDims = getParentEdgesAtPort(INPUT_DATA_IDX)[0]->getMemory().GetShape().getStaticDims();
+    repeats.assign(targetShape.begin(), targetShape.end());
+    const auto ndims = repeats.size();
+
+    auto srcBlockedDims = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getBlockDims();
+    auto dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getBlockDims();
+
+    if (broadcastType == NUMPY) {
+        for (size_t i = 0lu; i < srcDims.size(); i++) {
+            repeats[ndims - 1lu - i] /= srcDims[srcDims.size() - 1lu - i];
+        }
+    } else if (broadcastType == EXPLICIT) {
+        for (size_t i = 0; i < getInputShapeAtPort(AXES_MAPPING_IDX).getDims()[0]; i++) {
+            repeats[axesMapping[i]] /= srcDims[i];
+        }
+
+        SizeVector newSrcBlockedDims = SizeVector(dstBlockedDims.size(), 1);
+        for (size_t i = 0; i < getInputShapeAtPort(AXES_MAPPING_IDX).getDims()[0]; i++) {
+            newSrcBlockedDims[axesMapping[i]] = srcBlockedDims[i];
+        }
+        srcBlockedDims = newSrcBlockedDims;
+    }
+
+    optimizedCase = prepareOptimizedParams(this, srcBlockedDims, dstBlockedDims);
+}
+
+bool MKLDNNBroadcastNode::needShapeInfer() const {
+    needPrepareParamsVar = true;
+    if (inputShapesModified()) {
+        return true;
+    }
+
+    if (!constMap[TARGET_SHAPE_IDX]) {
+        if (targetShape.empty()) {
+            return true;
+        }
+        const int32_t* targetShapeData = reinterpret_cast<const int32_t *>(getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory().GetPtr());
+        for (size_t i = 0lu; i < targetShape.size(); i++) {
+            if (targetShape[i] != targetShapeData[i]) {
+                return true;
+            }
+        }
+    }
+    if (broadcastType == EXPLICIT && !constMap[AXES_MAPPING_IDX]) {
+        if (axesMapping.empty()) {
+            return true;
+        }
+        const int32_t* axesMappingData = reinterpret_cast<const int32_t *>(getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory().GetPtr());
+        for (size_t i = 0lu; i < axesMapping.size(); i++) {
+            if (axesMapping[i] != axesMappingData[i]) {
+                return true;
+            }
+        }
+    }
+    needPrepareParamsVar = false;
+    return false;
+}
+
+std::vector<VectorDims> MKLDNNBroadcastNode::shapeInfer() const {
+    ngraph::OutputVector inputsForShapeInfer {
+            std::make_shared<ov::op::v0::Parameter>(opToShapeInfer->get_input_element_type(INPUT_DATA_IDX),
+                            getParentEdgesAtPort(INPUT_DATA_IDX)[0]->getMemory().GetShape().toPartialShape()),
+            std::make_shared<ov::op::v0::Constant>(ov::element::Type_t::i32,
+                            getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory().GetShape().getStaticDims(),
+                            getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory().GetPtr())
+        };
+    if (opToShapeInfer->get_input_size() > AXES_MAPPING_IDX) {
+        inputsForShapeInfer.push_back(std::make_shared<ov::op::v0::Constant>(ov::element::Type_t::i32,
+                        getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory().GetShape().getStaticDims(),
+                        getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory().GetPtr()));
+    }
+    const auto localShapeInferOp = opToShapeInfer->clone_with_new_inputs(inputsForShapeInfer);
+
+    localShapeInferOp->validate_and_infer_types();
+
+    std::vector<VectorDims> newOutputShapes(outputShapes.size());
+    for (size_t i = 0lu; i < newOutputShapes.size(); i++) {
+        const auto &partShape = localShapeInferOp->get_output_partial_shape(i);
+        newOutputShapes[i] = partShape.get_shape();
+    }
+
+    return newOutputShapes;
 }

 void MKLDNNBroadcastNode::execute(mkldnn::stream strm) {
-    size_t shape_size = (getParentEdgeAt(BROADCAST_SHAPE)->getMemory().getStaticDims())[0];
-    SizeVector dst_dims = getChildEdgeAt(0)->getMemory().getStaticDims();
-    SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getMemory().getStaticDims();
+    if (optimizedCase) {
+        optimizedExecute(getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr());
+    } else {
+        plainExecute(strm);
+    }
+}

-    auto srcDesc = getParentEdgeAt(BROADCAST_INPUT)->getMemory().GetDescWithType<BlockedMemoryDesc>();
-    SizeVector srcStrides = srcDesc->getStrides();
-    size_t data_size = srcDesc->getPrecision().size();
+void MKLDNNBroadcastNode::plainExecute(mkldnn::stream strm) {
+    VectorDims srcDims = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getStaticDims();
+    const auto& dstDims = getChildEdgeAt(0)->getMemory().getStaticDims();
+    const auto& dataSrcRank = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetShape().getRank();
+    const auto& dataDstRank = getChildEdgeAt(0)->getMemory().GetShape().getRank();

-    if (!src_dims.size())
-        src_dims = SizeVector(1, 1);
+    auto srcDesc = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    VectorDims srcStrides = srcDesc->getStrides();
+    const size_t dataSize = srcDesc->getPrecision().size();
+
+    if (!dataSrcRank)
+        srcDims = VectorDims(1, 1);
    if (!srcStrides.size())
-        srcStrides = SizeVector(1, 1);
-
-    if (dst_dims.size() != shape_size) {
-        IE_THROW() << "Output tensor dimension mismatch";
-    }
-
-    if (src_dims.size() > dst_dims.size()) {
-        IE_THROW() << "Output tensor dimension is smaller then input tensor dimension";
-    }
+        srcStrides = VectorDims(1, 1);

    auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
-    InferenceEngine::SizeVector dstStrides = dstDesc->getStrides();
-    InferenceEngine::SizeVector src_aligned(dst_dims.size());
-    InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size());
-    size_t prefix_size = dst_dims.size() - src_dims.size();
-    for (size_t i = 0; i < dst_dims.size(); i++) {
-        if (i < prefix_size) {
-            src_aligned[i] = 1;
-            srcStrides_aligned[i] = srcStrides[0];
+    VectorDims dstStrides = dstDesc->getStrides();
+    VectorDims srcAligned(dataDstRank);
+    VectorDims srcStridesAligned(dataDstRank);
+    const size_t prefixSize = dataDstRank - dataSrcRank;
+    for (size_t i = 0lu; i < dataDstRank; i++) {
+        if (i < prefixSize) {
+            srcAligned[i] = 1;
+            srcStridesAligned[i] = srcStrides[0];
        } else {
-            src_aligned[i] = src_dims[i - prefix_size];
-            srcStrides_aligned[i] = srcStrides[i - prefix_size];
+            srcAligned[i] = srcDims[i - prefixSize];
+            srcStridesAligned[i] = srcStrides[i - prefixSize];
        }
    }

-    size_t work_amount_dst = dstStrides[0] * dst_dims[0];
-    const auto *src_data = reinterpret_cast<const uint8_t *>(getParentEdgeAt(BROADCAST_INPUT)->getMemoryPtr()->GetPtr());
-    auto *dst_data = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
+    const size_t workAmountDst = dstStrides[0] * dstDims[0];
+    const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr()->GetPtr());
+    auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());

    parallel_nt(0, [&](const int ithr, const int nthr) {
-        size_t i, src_idx, start = 0, end = 0;
-        SizeVector counters(dst_dims.size(), 0);
-        splitter(work_amount_dst, nthr, ithr, start, end);
-        for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
-            counters[j] = i % dst_dims[j];
-            i /= dst_dims[j];
+        size_t i = 0lu, srcIdx = 0lu, start = 0lu, end = 0lu;
+        VectorDims counters(dataDstRank, 0);
+        splitter(workAmountDst, nthr, ithr, start, end);
+        for (int j = dataDstRank - 1, i = start; j >= 0; j--) {
+            counters[j] = i % dstDims[j];
+            i /= dstDims[j];
        }
-        for (size_t iwork = start * data_size; iwork < end * data_size; iwork += data_size) {
-            for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
-                src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
+        for (size_t iwork = start * dataSize; iwork < end * dataSize; iwork += dataSize) {
+            for (i = 0lu, srcIdx = 0lu; i < dataDstRank; ++i)
+                srcIdx += counters[i] ? ((counters[i] % srcAligned[i]) * srcStridesAligned[i]) : 0;

-            cpu_memcpy(&dst_data[iwork], &src_data[src_idx * data_size], data_size);
+            cpu_memcpy(&dstData[iwork], &srcData[srcIdx * dataSize], dataSize);

-            for (int j = dst_dims.size() - 1; j >= 0; j--) {
-                counters[j] = (counters[j] + 1) % dst_dims[j];
+            for (int j = dataDstRank - 1; j >= 0; j--) {
+                counters[j] = (counters[j] + 1) % dstDims[j];
                if (counters[j] != 0) break;
            }
        }
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h
@ -4,29 +4,51 @@

 #pragma once

-#include <ie_common.h>
-#include <mkldnn_node.h>
-#include <string>
+#include "common/tile_broadcast_utils.h"
+
 #include <memory>
+#include <string>
 #include <vector>

+
 namespace MKLDNNPlugin {

-class MKLDNNBroadcastNode : public MKLDNNNode {
+class MKLDNNBroadcastNode : public MKLDNNNode, public TileBroadcastCommon {
 public:
-    MKLDNNBroadcastNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
+    MKLDNNBroadcastNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);

-    void getSupportedDescriptors() override {};
+    void getSupportedDescriptors() override;
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
+    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
+    void executeDynamicImpl(mkldnn::stream strm) override {
+        execute(strm);
+    }
    bool created() const override;

-    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
+    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
+
+protected:
+    bool needPrepareParams() const override;
+    void prepareParams() override;
+    bool needShapeInfer() const override;
+    std::vector<VectorDims> shapeInfer() const override;

 private:
-    static const size_t BROADCAST_INPUT = 0;
-    static const size_t BROADCAST_SHAPE = 1;
+    void plainExecute(mkldnn::stream strm);
+
+    enum AutoBroadcastType {
+        NUMPY,
+        EXPLICIT
+    };
+    AutoBroadcastType broadcastType;
+
+    static constexpr size_t INPUT_DATA_IDX = 0;
+    static constexpr size_t TARGET_SHAPE_IDX = 1;
+    static constexpr size_t AXES_MAPPING_IDX = 2;
+
+    std::vector<int32_t> targetShape;
+    std::vector<int32_t> axesMapping;

    std::string errorPrefix;
 };
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp
@ -14,10 +14,6 @@ using namespace InferenceEngine;

 bool MKLDNNCTCGreedyDecoderNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
        const auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v0::CTCGreedyDecoder>(op);
        if (!greedyDecOp) {
            errorMessage = "Node is not an instance of the CTCGreedyDecoder operation from operation set v0.";
@ -42,8 +38,10 @@ MKLDNNCTCGreedyDecoderNode::MKLDNNCTCGreedyDecoderNode(const std::shared_ptr<ngr
    if (getOriginalOutputsNumber() != 1)
        IE_THROW() << errorPrefix << "has invalid number of outputs edges: " << getOriginalOutputsNumber();

-    if (op->get_input_shape(DATA_INDEX)[0] != op->get_input_shape(SEQUENCE_LENGTH_INDEX)[0] &&
-        op->get_input_shape(DATA_INDEX)[1] != op->get_input_shape(SEQUENCE_LENGTH_INDEX)[1])
+    const auto& dataDims = getInputShapeAtPort(DATA_INDEX).getDims();
+    const auto& seqDims = getInputShapeAtPort(SEQUENCE_LENGTH_INDEX).getDims();
+
+    if (!dimsEqualWeak(dataDims[0], seqDims[0]) || !dimsEqualWeak(dataDims[1], seqDims[1]))
        IE_THROW() << errorPrefix << "has invalid input shapes.";

    auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v0::CTCGreedyDecoder>(op);
@ -167,4 +165,18 @@ bool MKLDNNCTCGreedyDecoderNode::created() const {
    return getType() == CTCGreedyDecoder;
 }

+void MKLDNNCTCGreedyDecoderNode::executeDynamicImpl(dnnl::stream strm) {
+    MKLDNNCTCGreedyDecoderNode::execute(strm);
+}
+
+void MKLDNNCTCGreedyDecoderNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        updateLastInputDims();
+    }
+}
+
+bool MKLDNNCTCGreedyDecoderNode::needPrepareParams() const {
+    return false;
+}
+
 REG_MKLDNN_PRIM_FOR(MKLDNNCTCGreedyDecoderNode, CTCGreedyDecoder)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.h
@ -15,12 +15,13 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
+    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;
+    void executeDynamicImpl(dnnl::stream strm) override;
+    bool needPrepareParams() const override;

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
-
 private:
    const size_t DATA_INDEX = 0lu;
    const size_t SEQUENCE_LENGTH_INDEX = 1lu;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp
@ -14,10 +14,6 @@ using namespace InferenceEngine;

 bool MKLDNNCTCGreedyDecoderSeqLenNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
        const auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v6::CTCGreedyDecoderSeqLen>(op);
        if (!greedyDecOp) {
            errorMessage = "Node is not an instance of the CTCGreedyDecoderSeqLen operation from operation set v6.";
@ -42,7 +38,9 @@ MKLDNNCTCGreedyDecoderSeqLenNode::MKLDNNCTCGreedyDecoderSeqLenNode(const std::sh
    if (getOriginalOutputsNumber() != 2)
        IE_THROW() << errorPrefix << "has invalid number of outputs edges: " << getOriginalOutputsNumber();

-    if (op->get_input_shape(DATA_INDEX)[0] != op->get_input_shape(SEQUENCE_LENGTH_INDEX)[0])
+    const auto& dataDims = getInputShapeAtPort(DATA_INDEX).getDims();
+    const auto& seqDims = getInputShapeAtPort(SEQUENCE_LENGTH_INDEX).getDims();
+    if (!dimsEqualWeak(dataDims[0], seqDims[0]))
        IE_THROW() << errorPrefix << "has invalid input shapes.";

    auto greedyDecOp = ngraph::as_type_ptr<const ngraph::op::v6::CTCGreedyDecoderSeqLen>(op);
@ -170,4 +168,18 @@ bool MKLDNNCTCGreedyDecoderSeqLenNode::created() const {
    return getType() == CTCGreedyDecoderSeqLen;
 }

+void MKLDNNCTCGreedyDecoderSeqLenNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        updateLastInputDims();
+    }
+}
+
+void MKLDNNCTCGreedyDecoderSeqLenNode::executeDynamicImpl(dnnl::stream strm) {
+    MKLDNNCTCGreedyDecoderSeqLenNode::execute(strm);
+}
+
+bool MKLDNNCTCGreedyDecoderSeqLenNode::needPrepareParams() const {
+    return false;
+}
+
 REG_MKLDNN_PRIM_FOR(MKLDNNCTCGreedyDecoderSeqLenNode, CTCGreedyDecoderSeqLen)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.h
@ -15,9 +15,11 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
+    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;
+    void executeDynamicImpl(dnnl::stream strm) override;
+    bool needPrepareParams() const override;

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp
@ -13,10 +13,6 @@ using namespace InferenceEngine;

 bool MKLDNNCTCLossNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
        const auto ctcLossOp = ngraph::as_type_ptr<const ngraph::op::v4::CTCLoss>(op);
        if (!ctcLossOp) {
            errorMessage = "Node is not an instance of the CTCLoss operation from operation set v4.";
@ -61,6 +57,14 @@ void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() {
                         impl_desc_type::ref_any);
 }

+void MKLDNNCTCLossNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        if (needPrepareParams())
+            prepareParams();
+        updateLastInputDims();
+    }
+}
+
 void MKLDNNCTCLossNode::execute(mkldnn::stream strm) {
    StatusCode returnCode = OK;

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.h
@ -15,12 +15,15 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
+    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
+    bool needPrepareParams() const override { return false; };
+
 private:
    bool ctcMergeRepeated;
    bool preprocessCollapseRepeated;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp
@ -13,10 +13,6 @@ using namespace InferenceEngine;

 bool MKLDNNEmbeddingBagOffsetSumNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
        const auto embBagOffsetSumOp = ngraph::as_type_ptr<const ngraph::op::v3::EmbeddingBagOffsetsSum>(op);
        if (!embBagOffsetSumOp) {
            errorMessage = "Node is not an instance of the EmbeddingBagOffsetsSum operation from opset v3.";
@ -35,14 +31,11 @@ MKLDNNEmbeddingBagOffsetSumNode::MKLDNNEmbeddingBagOffsetSumNode(const std::shar
        IE_THROW(NotImplemented) << errorMessage;
    }

-    if (op->get_input_shape(INDICES_IDX).size() != 1)
-        IE_THROW() << "'" << _layerName << "' layer has indices data with invalid shape.";
+    if (getInputShapeAtPort(INDICES_IDX).getRank() != 1ul)
+        IE_THROW() << "'" << _layerName << "' layer has indices data with invalid rank.";

-    if (op->get_input_shape(OFFSETS_IDX).size() != 1)
-        IE_THROW() << "'" << _layerName << "' layer's offsets data has invalid shape.";
-
-    _indicesLen = op->get_input_shape(INDICES_IDX)[0];
-    _offsetsLen = op->get_input_shape(OFFSETS_IDX)[0];
+    if (getInputShapeAtPort(OFFSETS_IDX).getRank() != 1ul)
+        IE_THROW() << "'" << _layerName << "' layer's offsets data has invalid rank.";
 }

 void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() {
@ -77,6 +70,20 @@ void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() {
    addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
 }

+void MKLDNNEmbeddingBagOffsetSumNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        if (needPrepareParams())
+            prepareParams();
+        updateLastInputDims();
+    }
+}
+
+void MKLDNNEmbeddingBagOffsetSumNode::prepareParams() {
+    _indicesLen = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[0];
+    _offsetsLen = getParentEdgesAtPort(OFFSETS_IDX)[0]->getMemory().getStaticDims()[0];
+    MKLDNNEmbeddingBagSumNode::prepareParams(getParentEdgesAtPort(EMB_TABLE_IDX)[0]->getMemory().getStaticDims());
+}
+
 void MKLDNNEmbeddingBagOffsetSumNode::initFromInputs() {
    indicesData_ = reinterpret_cast<const int *>(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr());
    offsetsData_ = reinterpret_cast<const int *>(getParentEdgeAt(OFFSETS_IDX)->getMemoryPtr()->GetPtr());
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h
@ -19,12 +19,16 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
+    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

+protected:
+    void prepareParams() override;
+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+
 private:
    void initFromInputs() override;
    void getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) override;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp
@ -13,10 +13,6 @@ using namespace InferenceEngine;

 bool MKLDNNEmbeddingBagPackedSumNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
        const auto embBagPackedSumOp = ngraph::as_type_ptr<const ngraph::op::v3::EmbeddingBagPackedSum>(op);
        if (!embBagPackedSumOp) {
            errorMessage = "Node is not an instance of the EmbeddingBagPackedSum operation from opset v3.";
@ -35,10 +31,8 @@ MKLDNNEmbeddingBagPackedSumNode::MKLDNNEmbeddingBagPackedSumNode(const std::shar
        IE_THROW(NotImplemented) << errorMessage;
    }

-    if (op->get_input_shape(INDICES_IDX).size() != 2)
-        IE_THROW() << "'" << _layerName << "' layer has indices data with invalid shape.";
-    _batch = op->get_input_shape(INDICES_IDX)[0];
-    _indicesPerBag = op->get_input_shape(INDICES_IDX)[1];
+    if (getInputShapeAtPort(INDICES_IDX).getRank() != 2ul)
+        IE_THROW() << "'" << _layerName << "' layer has indices data with invalid rank.";
 }

 void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() {
@ -70,6 +64,20 @@ void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() {
    addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
 }

+void MKLDNNEmbeddingBagPackedSumNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        if (needPrepareParams())
+            prepareParams();
+        updateLastInputDims();
+    }
+}
+
+void MKLDNNEmbeddingBagPackedSumNode::prepareParams() {
+    _batch = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[0];
+    _indicesPerBag = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[1];
+    MKLDNNEmbeddingBagSumNode::prepareParams(getParentEdgesAtPort(EMB_TABLE_IDX)[0]->getMemory().getStaticDims());
+}
+
 void MKLDNNEmbeddingBagPackedSumNode::initFromInputs() {
    _indices = reinterpret_cast<const int *>(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr());
 }
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h
@ -19,12 +19,16 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
+    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

+protected:
+    void prepareParams() override;
+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+
 private:
    void initFromInputs() override;
    void getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) override;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.cpp
@ -34,11 +34,12 @@ MKLDNNEmbeddingBagSumNode::MKLDNNEmbeddingBagSumNode(
        if (op->get_input_shape(PER_SAMPLE_WEIGHTS_IDX) != op->get_input_shape(INDICES_IDX))
             IE_THROW() << logPrefix << "must have equal shapes for indices and per_sample_weights inputs.";
    }
+}

-    const auto& inDataDims = op->get_input_shape(EMB_TABLE_IDX);
+void MKLDNNEmbeddingBagSumNode::prepareParams(const VectorDims& indexStaticShape) {
    _embDepth = 1lu;
-    for (size_t i = 1lu; i < inDataDims.size(); i++) {
-        _embDepth *= inDataDims[i];
+    for (size_t i = 1lu; i < indexStaticShape.size(); i++) {
+        _embDepth *= indexStaticShape[i];
    }
 }

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_sum_node.h
@ -35,6 +35,8 @@ protected:
            int& weightsIdx,
            bool& withWeights) = 0;

+    void prepareParams(const VectorDims& indexStaticShape);
+
    template<typename T>
    void processData(const T* srcData, const T* weightsData, T* dstData,
                     const InferenceEngine::SizeVector& inDataDims, const InferenceEngine::SizeVector& outDataDims);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp
@ -11,12 +11,16 @@
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;

+void MKLDNNEmbeddingSegmentsSumNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        if (needPrepareParams())
+            prepareParams();
+        updateLastInputDims();
+    }
+}
+
 bool MKLDNNEmbeddingSegmentsSumNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
        const auto embBagSegSumOp = ngraph::as_type_ptr<const ngraph::op::v3::EmbeddingSegmentsSum>(op);
        if (!embBagSegSumOp) {
            errorMessage = "Node is not an instance of the EmbeddingSegmentsSum operation from opset v3.";
@ -36,13 +40,13 @@ MKLDNNEmbeddingSegmentsSumNode::MKLDNNEmbeddingSegmentsSumNode(const std::shared
    }

    std::string errPrefix = std::string("EmbeddingSegmentsSum layer with name '") + _layerName + "' ";
-    if (op->get_input_shape(INDICES_IDX).size() != 1)
-        IE_THROW() << errPrefix << "has indices data with invalid shape: "
-                   << op->get_input_shape(INDICES_IDX).size();
+    if (getInputShapeAtPort(INDICES_IDX).getRank() != 1ul)
+        IE_THROW() << errPrefix << "has indices data with invalid rank: "
+                   << getInputShapeAtPort(INDICES_IDX).getRank();

-    if (op->get_input_shape(SEGMENT_ID_IDX).size() != 1)
-        IE_THROW() << errPrefix << "has invalid segmentID data shape: "
-                   << op->get_input_shape(SEGMENT_ID_IDX).size();
+    if (getInputShapeAtPort(SEGMENT_ID_IDX).getRank() != 1ul)
+        IE_THROW() << errPrefix << "has invalid segmentID data rank: "
+                   << getInputShapeAtPort(SEGMENT_ID_IDX).getRank();
 }

 void MKLDNNEmbeddingSegmentsSumNode::initSupportedPrimitiveDescriptors() {
@ -78,6 +82,10 @@ void MKLDNNEmbeddingSegmentsSumNode::initSupportedPrimitiveDescriptors() {
    addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
 }

+void MKLDNNEmbeddingSegmentsSumNode::prepareParams() {
+    MKLDNNEmbeddingBagSumNode::prepareParams(getParentEdgesAtPort(EMB_TABLE_IDX)[0]->getMemory().getStaticDims());
+}
+
 void MKLDNNEmbeddingSegmentsSumNode::initFromInputs() {
    indices_ = reinterpret_cast<const int *>(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr());
    indicesSize_ = getParentEdgeAt(INDICES_IDX)->getMemory().GetShape().getElementsCount();
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h
@ -19,12 +19,16 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
+    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

+protected:
+    void prepareParams() override;
+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+
 private:
    void initFromInputs() override;
    void getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) override;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp
@ -270,11 +270,7 @@ private:

 bool MKLDNNExtractImagePatchesNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
-        const auto extImgPatcher = std::dynamic_pointer_cast<const ngraph::opset3::ExtractImagePatches>(op);
+        auto extImgPatcher = ngraph::as_type_ptr<const ngraph::opset3::ExtractImagePatches>(op);
        if (!extImgPatcher) {
            errorMessage = "Only opset3 ExtractImagePatches operation is supported";
            return false;
@ -302,21 +298,18 @@ MKLDNNExtractImagePatchesNode::MKLDNNExtractImagePatchesNode(const std::shared_p
    }

    errorPrefix = "ExtractImagePatches layer with name '" + op->get_friendly_name() + "' ";
-    const auto extImgPatcher = std::dynamic_pointer_cast<const ngraph::opset3::ExtractImagePatches>(op);
+    auto extImgPatcher = ngraph::as_type_ptr<const ngraph::opset3::ExtractImagePatches>(op);

-    if (getOriginalInputsNumber() != 1 || getOriginalOutputsNumber() != 1)
+    if (inputShapes.size() != 1 || outputShapes.size() != 1)
        IE_THROW() << errorPrefix << "has incorrect number of input or output edges!"
-                   << " Input: " << getOriginalInputsNumber() << "; Output: " << getOriginalOutputsNumber();
+                   << " Input: " << inputShapes.size() << "; Output: " << outputShapes.size();

-    if (op->get_input_shape(0).size() != 4)
-        IE_THROW() << errorPrefix << "must have 4D input tensor. Actual: " << op->get_input_shape(0).size();
+    if (getInputShapeAtPort(0).getRank() != 4)
+        IE_THROW() << errorPrefix << "must have 4D input tensor. Actual: " << getInputShapeAtPort(0).getRank();

-    if (op->get_output_shape(0).size() != 4)
-        IE_THROW() << errorPrefix << "must have 4D output tensor. Actual: " << op->get_output_shape(0).size();
+    if (getOutputShapeAtPort(0).getRank() != 4)
+        IE_THROW() << errorPrefix << "must have 4D output tensor. Actual: " << getOutputShapeAtPort(0).getRank();

-    auto ksizes = extImgPatcher->get_sizes();
-    auto strides = extImgPatcher->get_strides();
-    auto rates = extImgPatcher->get_rates();
    if (extImgPatcher->get_auto_pad() == ngraph::op::PadType::VALID) {
        _auto_pad = ExtImgPatcherPadType::VALID;
    } else if (extImgPatcher->get_auto_pad() == ngraph::op::PadType::SAME_LOWER) {
@ -327,80 +320,46 @@ MKLDNNExtractImagePatchesNode::MKLDNNExtractImagePatchesNode(const std::shared_p
        IE_THROW() << errorPrefix << "has unsupported pad type: " << extImgPatcher->get_auto_pad();
    }

-    if (ksizes.size() != 2 || strides.size() != 2 || rates.size() != 2)
+    _ksizes = extImgPatcher->get_sizes();;
+    _strides = extImgPatcher->get_strides();
+    _rates = extImgPatcher->get_rates();
+    if (_ksizes.size() != 2 || _strides.size() != 2 || _rates.size() != 2)
        IE_THROW() << errorPrefix << "must have the following attributes with shape {2}: sizes, strides, rates.";
-    _ksizes.clear();
-    _strides.clear();
-    _rates.clear();
-    for (const auto& x : ksizes)
-        _ksizes.push_back(x);
-    for (const auto& x : strides)
-        _strides.push_back(x);
-    for (const auto& x : rates)
-        _rates.push_back(x);
+}

-    SizeVector in_dims = op->get_input_shape(0);
-    _pad_left = 0;
-    _pad_top = 0;
-    jit_extract_image_patches_params jpp;
-    jpp.need_padding = false;
-    if (_auto_pad != ExtImgPatcherPadType::VALID) {
-        const size_t iheight = in_dims[2];
-        const size_t iwidth = in_dims[3];
-        const int64_t ihStep = _ksizes[0] + (_rates[0] - 1) * (_ksizes[0] - 1);
-        const int64_t iwStep = _ksizes[1] + (_rates[1] - 1) * (_ksizes[1] - 1);
-
-        int64_t PW = (std::ceil(1.f * iwidth/_strides[1]) - 1) * _strides[1] + iwStep - iwidth;
-        int64_t PH = (std::ceil(1.f * iheight/_strides[0]) - 1) * _strides[0] + ihStep - iheight;
-
-        int64_t increment_sign = 0;
-        if (_auto_pad == ExtImgPatcherPadType::SAME_LOWER) {
-            increment_sign = 1;
-        } else if (_auto_pad == ExtImgPatcherPadType::SAME_UPPER) {
-            increment_sign = -1;
-        }
-
-        if ((PW > 0) && (PW < iwStep)) {
-            _pad_left = static_cast<size_t>((PW + increment_sign * (PW % 2)) / 2);
-            jpp.need_padding = true;
-        }
-        if ((PH > 0) && (PH < ihStep)) {
-            _pad_top = static_cast<size_t>((PH + increment_sign * (PH % 2)) / 2);
-            jpp.need_padding = true;
-        }
+void MKLDNNExtractImagePatchesNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        if (needPrepareParams())
+            prepareParams();
+        updateLastInputDims();
    }
+}

-    jpp.IW = in_dims[3];
-    SizeVector out_dims = op->get_output_shape(0);
-    jpp.OH = out_dims[2];
-    jpp.OW = out_dims[3];
-    jpp.KH = _ksizes[0];
-    jpp.KW = _ksizes[1];
-    jpp.SH = _strides[0];
-    jpp.SW = _strides[1];
-    jpp.dtype_size = getOriginalInputPrecisionAtPort(0).size();
-    jpp.block_size = 1;
+void MKLDNNExtractImagePatchesNode::prepareParams() {
+    const auto& srcMemPtr0 = getParentEdgeAt(0)->getMemoryPtr();
+    const auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
+    if (!srcMemPtr0 || !srcMemPtr0->GetPrimitivePtr())
+        IE_THROW() << "Input memory didn't allocate.";
+    if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
+        IE_THROW() << "Destination memory didn't allocate.";
+    if (getSelectedPrimitiveDescriptor() == nullptr)
+        IE_THROW() << "Preferable primitive descriptor is not set.";

-    if (mayiuse(x64::avx512_common)) {
-        jpp.block_size = cpu_isa_traits<x64::avx512_common>::vlen / jpp.dtype_size;
-        extract_image_patches_kernel.reset(new jit_extract_image_patches_kernel<x64::avx512_common>(jpp));
-    } else if (mayiuse(x64::avx2)) {
-        jpp.block_size = cpu_isa_traits<x64::avx2>::vlen / jpp.dtype_size;
-        extract_image_patches_kernel.reset(new jit_extract_image_patches_kernel<x64::avx2>(jpp));
-    } else if (mayiuse(x64::sse41)) {
-        jpp.block_size = cpu_isa_traits<x64::sse41>::vlen / jpp.dtype_size;
-        extract_image_patches_kernel.reset(new jit_extract_image_patches_kernel<x64::sse41>(jpp));
+    const auto& in_dims = getParentEdgeAt(0)->getMemory().getStaticDims();
+    const auto& out_dims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims();
+    const auto prcSize = getOriginalInputPrecisionAtPort(0).size();
+    if (mayiuse(x64::sse41)) {
+        execPtr = std::make_shared<ExtractImagePatchesJitExecutor>(in_dims, out_dims, _ksizes, _strides, _rates, _auto_pad, prcSize);
+    } else {
+        execPtr = std::make_shared<ExtractImagePatchesRefExecutor>(in_dims, out_dims, _ksizes, _strides, _rates, _auto_pad, prcSize);
    }
-
-    if (extract_image_patches_kernel)
-        extract_image_patches_kernel->create_ker();
 }

 void MKLDNNExtractImagePatchesNode::initSupportedPrimitiveDescriptors() {
    if (!supportedPrimitiveDescriptors.empty())
        return;

-    precision = getOriginalInputPrecisionAtPort(0);
+    const auto precision = getOriginalInputPrecisionAtPort(0);
    if (_supported_precisions_sizes.find(precision.size()) == _supported_precisions_sizes.end())
        IE_THROW() << errorPrefix << "has unsupported precision: " << precision.name();

@ -410,90 +369,208 @@ void MKLDNNExtractImagePatchesNode::initSupportedPrimitiveDescriptors() {
 }

 void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) {
-    const char *src_data = reinterpret_cast<const char *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
-    char *dst_data = reinterpret_cast<char *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());
-    const size_t dtype_size = getOriginalInputPrecisionAtPort(0).size();
-
-    const auto& inDims = getParentEdgeAt(0)->getMemory().getStaticDims();
-    const size_t IC = inDims[1];
-    const size_t IH = inDims[2];
-    const size_t IW = inDims[3];
-
-    const auto& outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims();
-    const size_t OB = outDims[0];
-    const size_t OH = outDims[2];
-    const size_t OW = outDims[3];
-
-    const size_t KH = _ksizes[0], KW = _ksizes[1];
-    const size_t SH = _strides[0], SW = _strides[1];
-    const size_t RH = _rates[0], RW = _rates[1];
-    const size_t PT = _pad_top, PL = _pad_left;
-
-    const std::vector<size_t> istrides = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getStrides();
-    const std::vector<size_t> ostrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType<BlockedMemoryDesc>()->getStrides();
-    const std::vector<size_t> ostrides_partial = {ostrides[0], KW * IC * ostrides[1], IC * ostrides[1], ostrides[1]};
-
-    if (extract_image_patches_kernel) {
-        parallel_for4d(OB, KH, KW, IC, [&](const size_t ob, const size_t kh, const size_t kw, const size_t ic) {
-            const int64_t ih_start = kh * RH - PT;
-            const int64_t iw_start = kw * RW - PL;
-            const size_t ih_lpad = ih_start >= 0 ? 0 : std::ceil(-1.f * ih_start / SH);
-            const size_t iw_lpad = iw_start >= 0 ? 0 : std::ceil(-1.f * iw_start / SW);
-            const size_t ih_hpad = std::ceil((IH - 1.f * ih_start) / SH) > OH ? OH : std::ceil((IH - 1.f * ih_start) / SH);
-            const size_t iw_hpad = std::ceil((IW - 1.f * iw_start) / SW) > OW ? OW : std::ceil((IW - 1.f * iw_start) / SW);
-
-            size_t dst_offset = ob * ostrides_partial[0] + kh * ostrides_partial[1] + kw * ostrides_partial[2] + ic * ostrides_partial[3];
-            size_t src_offset = ob * istrides[0] + ic * istrides[1] + ih_start * istrides[2] + iw_start + ih_lpad * SH * IW;
-
-            auto args = jit_extract_image_patches_args();
-            args.src = src_data + src_offset * dtype_size;
-            args.dst = dst_data + dst_offset * dtype_size;
-            args.h_lo_pad = ih_lpad;
-            args.h_hi_pad = ih_hpad;
-            args.w_lo_pad = iw_lpad;
-            args.w_hi_pad = iw_hpad;
-            (*extract_image_patches_kernel)(&args);
-        });
+    if (execPtr) {
+        auto src = getParentEdgeAt(0)->getMemoryPtr()->GetPtr();
+        auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr();
+        const auto inStrides = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>()->getStrides();
+        const auto outStrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType<BlockedMemoryDesc>()->getStrides();
+        execPtr->exec(src, dst, inStrides, outStrides);
    } else {
-        parallel_for4d(OB, KH, KW, IC, [&](const size_t ob, const size_t kh, const size_t kw, const size_t ic) {
-            const int64_t iw_start = kw * RW - PL;
-            const int64_t ih_start = kh * RH - PT;
-            const size_t ih_lpad = ih_start >= 0 ? 0 : std::ceil(- 1.f * ih_start / SH);
-            const size_t iw_lpad = iw_start >= 0 ? 0 : std::ceil(- 1.f * iw_start / SW);
+        IE_THROW() << "Can't execute extract image patches node. Primitive wasn't created";
+    }
+}

-            const size_t ih_hpad = std::ceil((IH - 1.f * ih_start) / SH) > OH ? OH : std::ceil((IH + -1.f * ih_start) / SH);
-            const size_t iw_hpad = std::ceil((IW - 1.f * iw_start) / SW) > OW ? OW : std::ceil((IW - 1.f * iw_start) / SW);
+void MKLDNNExtractImagePatchesNode::executeDynamicImpl(mkldnn::stream strm) {
+    return execute(strm);
+}

-            char *my_dst_ptr = dst_data +
-                               (ob * ostrides_partial[0] + kh * ostrides_partial[1] + kw * ostrides_partial[2] + ic * ostrides_partial[3]) * dtype_size;
-            const char *my_src_ptr = src_data + (ob * istrides[0] + ic * istrides[1] + ih_start * istrides[2] + iw_start) * dtype_size;
+void MKLDNNExtractImagePatchesNode::ExtractImagePatchesRefExecutor::executeReference(
+    void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) const {
+    const char* src_data = reinterpret_cast<const char*>(src);
+    char* dst_data = reinterpret_cast<char*>(dst);

-            size_t num_bytes_to_set = ih_lpad * OW * dtype_size;
+    const std::vector<size_t> ostrides_partial = { ostrides[0], jpp.KW * IC * ostrides[1], IC * ostrides[1], ostrides[1] };
+
+    parallel_for4d(OB, jpp.KH, jpp.KW, IC, [&](const size_t ob, const size_t kh, const size_t kw, const size_t ic) {
+        const int64_t iw_start = kw * RW - PL;
+        const int64_t ih_start = kh * RH - PT;
+        const size_t ih_lpad = ih_start >= 0 ? 0 : std::ceil(-1.f * ih_start / jpp.SH);
+        const size_t iw_lpad = iw_start >= 0 ? 0 : std::ceil(-1.f * iw_start / jpp.SW);
+
+        const size_t ih_hpad = std::ceil((IH - 1.f * ih_start) / jpp.SH) > jpp.OH ? jpp.OH : std::ceil((IH + -1.f * ih_start) / jpp.SH);
+        const size_t iw_hpad = std::ceil((jpp.IW - 1.f * iw_start) / jpp.SW) > jpp.OW ? jpp.OW : std::ceil((jpp.IW - 1.f * iw_start) / jpp.SW);
+
+        char* my_dst_ptr = dst_data +
+            (ob * ostrides_partial[0] + kh * ostrides_partial[1] + kw * ostrides_partial[2] + ic * ostrides_partial[3]) * jpp.dtype_size;
+        const char* my_src_ptr = src_data + (ob * istrides[0] + ic * istrides[1] + ih_start * istrides[2] + iw_start) * jpp.dtype_size;
+
+        size_t num_bytes_to_set = ih_lpad * jpp.OW * jpp.dtype_size;
+        memset(my_dst_ptr, 0, num_bytes_to_set);
+        my_dst_ptr += num_bytes_to_set;
+
+        const char* src_ptr_h_stop = my_src_ptr + ih_hpad * jpp.SH * jpp.IW * jpp.dtype_size;
+        for (const char* src_h_ptr = my_src_ptr + ih_lpad * jpp.SH * jpp.IW * jpp.dtype_size;
+            src_h_ptr < src_ptr_h_stop; src_h_ptr += jpp.SH * jpp.IW * jpp.dtype_size) {
+            num_bytes_to_set = iw_lpad * jpp.dtype_size;
            memset(my_dst_ptr, 0, num_bytes_to_set);
            my_dst_ptr += num_bytes_to_set;

-            const char* src_ptr_h_stop = my_src_ptr + ih_hpad * SH * IW * dtype_size;
-            for (const char *src_h_ptr = my_src_ptr + ih_lpad * SH * IW * dtype_size;
-                 src_h_ptr < src_ptr_h_stop; src_h_ptr += SH * IW * dtype_size) {
-                num_bytes_to_set = iw_lpad * dtype_size;
-                memset(my_dst_ptr, 0, num_bytes_to_set);
-                my_dst_ptr += num_bytes_to_set;
-
-                const char* src_ptr_w_stop = src_h_ptr + iw_hpad * SW * dtype_size;
-                for (const char* src_w_ptr = src_h_ptr + iw_lpad * SW * dtype_size;
-                     src_w_ptr < src_ptr_w_stop; src_w_ptr += SW * dtype_size) {
-                    num_bytes_to_set = dtype_size;
-                    memcpy(my_dst_ptr, src_w_ptr, num_bytes_to_set);
-                    my_dst_ptr += num_bytes_to_set;
-                }
-                num_bytes_to_set = (OW - iw_hpad) * dtype_size;
-                memset(my_dst_ptr, 0, num_bytes_to_set);
+            const char* src_ptr_w_stop = src_h_ptr + iw_hpad * jpp.SW * jpp.dtype_size;
+            for (const char* src_w_ptr = src_h_ptr + iw_lpad * jpp.SW * jpp.dtype_size;
+                src_w_ptr < src_ptr_w_stop; src_w_ptr += jpp.SW * jpp.dtype_size) {
+                num_bytes_to_set = jpp.dtype_size;
+                memcpy(my_dst_ptr, src_w_ptr, num_bytes_to_set);
                my_dst_ptr += num_bytes_to_set;
            }
-            num_bytes_to_set = (OH - ih_hpad) * OW * dtype_size;
+            num_bytes_to_set = (jpp.OW - iw_hpad) * jpp.dtype_size;
            memset(my_dst_ptr, 0, num_bytes_to_set);
-        });
+            my_dst_ptr += num_bytes_to_set;
+        }
+        num_bytes_to_set = (jpp.OH - ih_hpad) * jpp.OW * jpp.dtype_size;
+        memset(my_dst_ptr, 0, num_bytes_to_set);
+    });
+}
+
+void MKLDNNExtractImagePatchesNode::ExtractImagePatchesJitExecutor::executeOptimizedGeneric(
+    void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) const {
+    const char* src_data = reinterpret_cast<const char*>(src);
+    char* dst_data = reinterpret_cast<char*>(dst);
+    const auto& jpp = pKernel->jpp;
+
+    const std::vector<size_t> ostrides_partial = { ostrides[0], jpp.KW * IC * ostrides[1], IC * ostrides[1], ostrides[1] };
+
+    parallel_for4d(OB, jpp.KH, jpp.KW, IC, [&](const size_t ob, const size_t kh, const size_t kw, const size_t ic) {
+        const int64_t ih_start = kh * RH - PT;
+        const int64_t iw_start = kw * RW - PL;
+        const size_t ih_lpad = ih_start >= 0 ? 0 : std::ceil(-1.f * ih_start / jpp.SH);
+        const size_t iw_lpad = iw_start >= 0 ? 0 : std::ceil(-1.f * iw_start / jpp.SW);
+        const size_t ih_hpad = std::ceil((IH - 1.f * ih_start) / jpp.SH) > jpp.OH ? jpp.OH : std::ceil((IH - 1.f * ih_start) / jpp.SH);
+        const size_t iw_hpad = std::ceil((jpp.IW - 1.f * iw_start) / jpp.SW) > jpp.OW ? jpp.OW : std::ceil((jpp.IW - 1.f * iw_start) / jpp.SW);
+
+        size_t dst_offset = ob * ostrides_partial[0] + kh * ostrides_partial[1] + kw * ostrides_partial[2] + ic * ostrides_partial[3];
+        size_t src_offset = ob * istrides[0] + ic * istrides[1] + ih_start * istrides[2] + iw_start + ih_lpad * jpp.SH * jpp.IW;
+
+        auto args = jit_extract_image_patches_args();
+        args.src = src_data + src_offset * jpp.dtype_size;
+        args.dst = dst_data + dst_offset * jpp.dtype_size;
+        args.h_lo_pad = ih_lpad;
+        args.h_hi_pad = ih_hpad;
+        args.w_lo_pad = iw_lpad;
+        args.w_hi_pad = iw_hpad;
+        (*pKernel)(&args);
+    });
+}
+
+jit_extract_image_patches_params MKLDNNExtractImagePatchesNode::ExtractImagePatchesExecutor::fillJpp(
+    const VectorDims& inDims,
+    const VectorDims& outDims,
+    const VectorDims& kSizes,
+    const VectorDims& strides,
+    const VectorDims& rates,
+    const ExtImgPatcherPadType& padType,
+    const size_t prcSize) {
+    jit_extract_image_patches_params jpp{};
+
+    IC = inDims[1];
+    IH = inDims[2];
+    jpp.IW = inDims[3];
+
+    OB = outDims[0];
+    jpp.OH = outDims[2];
+    jpp.OW = outDims[3];
+
+    jpp.KH = kSizes[0];
+    jpp.KW = kSizes[1];
+
+    jpp.SH = strides[0];
+    jpp.SW = strides[1];
+
+    RH = rates[0];
+    RW = rates[1];
+
+    PL = 0;
+    PT = 0;
+    jpp.need_padding = false;
+    if (padType != ExtImgPatcherPadType::VALID) {
+        const int64_t ihStep = kSizes[0] + (rates[0] - 1) * (kSizes[0] - 1);
+        const int64_t iwStep = kSizes[1] + (rates[1] - 1) * (kSizes[1] - 1);
+
+        int64_t PW = (std::ceil(1.f * jpp.IW / strides[1]) - 1) * strides[1] + iwStep - jpp.IW;
+        int64_t PH = (std::ceil(1.f * IH / strides[0]) - 1) * strides[0] + ihStep - IH;
+
+        int64_t increment_sign = 0;
+        if (padType == ExtImgPatcherPadType::SAME_LOWER) {
+            increment_sign = 1;
+        } else if (padType == ExtImgPatcherPadType::SAME_UPPER) {
+            increment_sign = -1;
+        }
+
+        if ((PW > 0) && (PW < iwStep)) {
+            PL = static_cast<size_t>((PW + increment_sign * (PW % 2)) / 2);
+            jpp.need_padding = true;
+        }
+        if ((PH > 0) && (PH < ihStep)) {
+            PT = static_cast<size_t>((PH + increment_sign * (PH % 2)) / 2);
+            jpp.need_padding = true;
+        }
    }
+
+    jpp.dtype_size = prcSize;
+    if (mayiuse(x64::avx512_common)) {
+        jpp.block_size = cpu_isa_traits<x64::avx512_common>::vlen / prcSize;
+    } else if (mayiuse(x64::avx2)) {
+        jpp.block_size = cpu_isa_traits<x64::avx2>::vlen / prcSize;
+    } else if (mayiuse(x64::sse41)) {
+        jpp.block_size = cpu_isa_traits<x64::sse41>::vlen / prcSize;
+    } else {
+        jpp.block_size = 1;
+    }
+
+    return jpp;
+}
+
+MKLDNNExtractImagePatchesNode::ExtractImagePatchesJitExecutor::ExtractImagePatchesJitExecutor(
+    const VectorDims& inDims,
+    const VectorDims& outDims,
+    const VectorDims& kSizes,
+    const VectorDims& strides,
+    const VectorDims& rates,
+    const ExtImgPatcherPadType& padType,
+    const size_t prcSize) {
+    auto jpp = fillJpp(inDims, outDims, kSizes, strides, rates, padType, prcSize);
+    if (mayiuse(x64::avx512_common)) {
+        pKernel.reset(new jit_extract_image_patches_kernel<x64::avx512_common>(jpp));
+    } else if (mayiuse(x64::avx2)) {
+        pKernel.reset(new jit_extract_image_patches_kernel<x64::avx2>(jpp));
+    } else if (mayiuse(x64::sse41)) {
+        pKernel.reset(new jit_extract_image_patches_kernel<x64::sse41>(jpp));
+    } else {
+        IE_THROW() << "Can't create jit extract image patches kernel";
+    }
+
+    if (pKernel)
+        pKernel->create_ker();
+}
+
+void MKLDNNExtractImagePatchesNode::ExtractImagePatchesJitExecutor::exec(
+    void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) {
+    if (!pKernel)
+        IE_THROW() << "Can't execute, kernel for extract image patches node is not compiled";
+    executeOptimizedGeneric(src, dst, istrides, ostrides);
+}
+
+MKLDNNExtractImagePatchesNode::ExtractImagePatchesRefExecutor::ExtractImagePatchesRefExecutor(
+    const VectorDims& inDims,
+    const VectorDims& outDims,
+    const VectorDims& kSizes,
+    const VectorDims& strides,
+    const VectorDims& rates,
+    const ExtImgPatcherPadType& padType,
+    const size_t prcSize) : jpp(fillJpp(inDims, outDims, kSizes, strides, rates, padType, prcSize)) {}
+
+void MKLDNNExtractImagePatchesNode::ExtractImagePatchesRefExecutor::exec(
+    void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) {
+    executeReference(src, dst, istrides, ostrides);
 }

 const std::set<size_t> MKLDNNExtractImagePatchesNode::_supported_precisions_sizes = {1, 2, 4};
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.h
@ -11,7 +11,6 @@
 #include <vector>

 namespace MKLDNNPlugin {
-
 struct jit_extract_image_patches_params {
    size_t IW;
    size_t OH, OW;
@ -46,10 +45,13 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
+    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

+    void executeDynamicImpl(mkldnn::stream strm) override;
+    void prepareParams() override;
+
    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

 private:
@ -62,15 +64,68 @@ private:
    std::vector<size_t> _ksizes;
    std::vector<size_t> _strides;
    std::vector<size_t> _rates;
-    size_t _pad_left;
-    size_t _pad_top;
-    std::shared_ptr<jit_uni_extract_image_patches_kernel> extract_image_patches_kernel;
    static const std::set<size_t> _supported_precisions_sizes;
-
    ExtImgPatcherPadType _auto_pad;
-    InferenceEngine::Precision precision;

    std::string errorPrefix;
+
+    struct ExtractImagePatchesExecutor {
+        ExtractImagePatchesExecutor() = default;
+        virtual void exec(void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) = 0;
+        jit_extract_image_patches_params fillJpp(
+            const VectorDims& inDims,
+            const VectorDims& outDims,
+            const VectorDims& kSizes,
+            const VectorDims& strides,
+            const VectorDims& rates,
+            const ExtImgPatcherPadType& padType,
+            const size_t prcSize);
+        virtual ~ExtractImagePatchesExecutor() = default;
+
+    protected:
+        size_t IC = 0;
+        size_t IH = 0;
+        size_t OB = 0;
+        size_t RH = 0;
+        size_t RW = 0;
+        size_t PT = 0;
+        size_t PL = 0;
+    };
+
+    using executorPtr = std::shared_ptr<ExtractImagePatchesExecutor>;
+    executorPtr execPtr = nullptr;
+
+    struct ExtractImagePatchesJitExecutor : public ExtractImagePatchesExecutor {
+        ExtractImagePatchesJitExecutor(
+            const VectorDims& inDims,
+            const VectorDims& outDims,
+            const VectorDims& kSizes,
+            const VectorDims& strides,
+            const VectorDims& rates,
+            const ExtImgPatcherPadType& padType,
+            const size_t prcSize);
+        void exec(void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) override;
+        void executeOptimizedGeneric(void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) const;
+
+    private:
+        std::unique_ptr<jit_uni_extract_image_patches_kernel> pKernel;
+    };
+
+    struct ExtractImagePatchesRefExecutor : public ExtractImagePatchesExecutor {
+        ExtractImagePatchesRefExecutor(
+            const VectorDims& inDims,
+            const VectorDims& outDims,
+            const VectorDims& kSizes,
+            const VectorDims& strides,
+            const VectorDims& rates,
+            const ExtImgPatcherPadType& padType,
+            const size_t prcSize);
+        void exec(void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) override;
+        void executeReference(void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) const;
+
+    private:
+        jit_extract_image_patches_params jpp;
+    };
 };

 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
@ -261,6 +261,15 @@ const std::vector<impl_desc_type>& MKLDNNFullyConnectedNode::getPrimitivesPriori
            impl_desc_type::jit_sse42,
            impl_desc_type::ref,
    };
+
+    // WA: brgemm kernel contains bug that may lead to segfault in case of added post-ops and unaligned number of channels
+    size_t simdWidth = 16;
+    auto inputDims = inputShapes[0].getDims();
+    if (inputDims.back() != Shape::UNDEFINED_DIM && inputDims.back() % simdWidth == 0) {
+        priorities.insert(priorities.begin() + 1, impl_desc_type::brgemm_avx512_amx);
+        priorities.insert(priorities.begin() + 2, impl_desc_type::brgemm_avx512);
+    }
+
    for (const auto& impl : priorities) {
        if (std::find(implPriorities.begin(), implPriorities.end(), impl) == implPriorities.end())
            implPriorities.push_back(impl);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h
@ -97,6 +97,7 @@ public:
    void createPrimitive() override;
    bool created() const override;
    void execute(mkldnn::stream strm) override;
+    void executeDynamicImpl(mkldnn::stream strm) override;
    bool canBeInPlace() const override {
        return false;
    }
@ -104,83 +105,141 @@ public:

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

+    bool needShapeInfer() const override;
+    std::vector<VectorDims> shapeInfer() const override;
+    bool needPrepareParams() const override;
+    void prepareParams() override;
+
 private:
-    // nearest neighbor
-    void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
-    void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
-    void NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+    struct InterpolateAttrs {
+        InterpolateMode mode;
+        InterpolateCoordTransMode coordTransMode;
+        InterpolateNearestMode nearestMode;
+        bool antialias;
+        float cubeCoeff;
+        std::vector<int> padBegin;
+        std::vector<int> padEnd;
+        InferenceEngine::Precision inPrc;
+        InferenceEngine::Precision outPrc;
+        InterpolateLayoutType layout;
+    } interpAttrs;

-    // onnx linear
-    void linearOnnxCF(int outCoord, float scale, int inShape, int outShape, int& index0, int& index1, float& weight0, float& weight1);
-    void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
-    void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
-    void linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+    class InterpolateExecutor {
+        public:
+            InterpolateExecutor(const InterpolateAttrs& interpAttrs,
+                                const VectorDims &srcDims,
+                                const VectorDims &dstDims,
+                                const std::vector<float> &dataScales);

-    // cubic
-    std::vector<float> getCubicCoeffs(float mantissa, float a);
-    void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
-    void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
-    void cubicRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
+            virtual void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) = 0;
+            virtual ~InterpolateExecutor() = default;
+            VectorDims getSrcDimPad5d() const { return srcDimPad5d; }

-    // linear
-    void linearInterpolation(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
-                                          float fx, float fy, float fz, int OD, int OH, int OW, int kernel_width, bool antialias);
+        private:
+            void buildTblNN(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector<float>& dataScales,
+                            InterpolateLayoutType layout, InterpolateNearestMode nearestMode);
+            void buildTblLinearOnnx(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector<float>& dataScales,
+                                    InterpolateLayoutType layout);
+            void buildTblLinear(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector<float>& dataScales, int kernel_width,
+                                bool antialias);
+            void buildTblCubic(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector<float>& dataScales, float cubicCoeff,
+                               InterpolateLayoutType layout);

-    void buildTblNN(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector<float>& dataScales, InterpolateLayoutType layout);
-    void buildTblLinearOnnx(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector<float>& dataScales, InterpolateLayoutType layout);
-    void buildTblLinear(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector<float>& dataScales, int kernel_width, bool antialias);
-    void buildTblCubic(SizeVector& srcDimPad5d, SizeVector& dstDim5d, std::vector<float>& dataScales, float cubicCoeff, InterpolateLayoutType layout);
+            float coordTransToInput(int outCoord, float scale, int inShape, int outShape) const;
+            int nearestRound(float origin, bool isDownsample, InterpolateNearestMode nearestMode) const;
+            void linearOnnxCF(int outCoord, float scale, int inShape, int outShape, int& index0, int& index1, float& weight0, float& weight1);
+            std::vector<float> getCubicCoeffs(float mantissa, float a);

-    void setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false);
+        protected:
+            InterpolateMode mode;
+            InterpolateCoordTransMode coordTransMode;
+            InterpolateLayoutType configured_for_layout;
+            VectorDims srcDimPad5d, dstDim5d;
+            InferenceEngine::Precision inputPrec, outputPrec;
+            size_t srcDataSize, dstDataSize;
+            int spatialDimSize;
+            size_t dataRank;
+            std::vector<int> indexTable;
+    };
+    std::shared_ptr<InterpolateExecutor> execPtr = nullptr;

-    inline float coordTransToInput(int outCoord, float scale, int inShape, int outShape);
-    inline int nearestRound(float origin, bool isDownsample);
-    float getValue(const uint8_t *base, size_t offset, InferenceEngine::Precision prec);
-    void setValue(uint8_t *base, size_t offset, float value, InferenceEngine::Precision prec);
+    class InterpolateJitExecutor : public InterpolateExecutor {
+        public:
+            InterpolateJitExecutor(const InterpolateAttrs& interpAttrs,
+                                   const VectorDims &srcDims,
+                                   const VectorDims &dstDims,
+                                   const std::vector<float> &dataScales,
+                                   const mkldnn::primitive_attr &attr);

-    SizeVector getPaddedInputShape();
-    std::vector<float> getScales();
+            void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override;

-    static const size_t DATA_ID = 0;
-    static const size_t TARGET_SHAPE_ID = 1;
-    static const size_t SCALES_ID = 2;
-    static const size_t AXES_ID = 3;
-    const int LINEAR_KERNEL = 2;
-    const int CUBIC_GRID_LEN = 4;
+        private:
+            // nearest neighbor
+            void NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+            void NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+
+            // onnx linear
+            void linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+            void linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+
+            // cubic
+            void cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
+            void cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
+
+        private:
+            std::shared_ptr<jit_uni_interpolate_kernel> interpolateKernel = nullptr;
+    };
+
+    class InterpolateRefExecutor : public InterpolateExecutor {
+        public:
+            InterpolateRefExecutor(const InterpolateAttrs& interpAttrs,
+                                   const VectorDims &srcDims,
+                                   const VectorDims &dstDims,
+                                   const std::vector<float> &_dataScales) : dataScales(_dataScales), antialias(interpAttrs.antialias),
+                InterpolateExecutor(interpAttrs, srcDims, dstDims, _dataScales) {}
+
+            void exec(const uint8_t *in_ptr_, uint8_t *out_ptr_) override;
+
+        private:
+            void NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+            void linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW);
+
+            void cubicRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW);
+            void linearInterpolation(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW,
+                                      float fx, float fy, float fz, int OD, int OH, int OW, int kernel_width, bool antialias);
+
+            static float getValue(const uint8_t *base, size_t offset, InferenceEngine::Precision prec);
+            static void setValue(uint8_t *base, size_t offset, float value, InferenceEngine::Precision prec);
+
+        private:
+            bool antialias;
+            std::vector<float> dataScales;
+    };
+
+    void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false);
+
+    static SizeVector getPaddedInputShape(const VectorDims &srcDims, const std::vector<int> &padBegin, const std::vector<int> &padEnd);
+    std::vector<float> getScales(const VectorDims &srcDimPad, const VectorDims &dstDim);
+    static size_t getSpatialDimsNum(const Dim rank);
+
+    static constexpr size_t DATA_ID = 0;
+    static constexpr size_t TARGET_SHAPE_ID = 1;
+    static constexpr size_t SCALES_ID = 2;
+    static constexpr size_t AXES_ID = 3;
+    static constexpr int CUBIC_GRID_LEN = 4;

-    InterpolateMode mode;
-    InterpolateCoordTransMode coordTransMode = InterpolateCoordTransMode::half_pixel;
-    bool antialias = false;
-    std::vector<int> padBegin;
-    std::vector<int> padEnd;
    bool hasPad = false;
-    InterpolateNearestMode nearestMode = InterpolateNearestMode::round_prefer_floor;
    InterpolateShapeCalcMode shapeCalcMode;

-    float cubeCoeff = -0.75;
-
    bool isAxesSpecified = false;
-    // axes and scales from buffer, partical size.
    std::vector<int> axes;
-    std::vector<float> scales;
-    // target shape is dst dim, full size.
-    SizeVector dstDim;
-    SizeVector srcDim;
-    SizeVector srcDimPad;
-    int spatialDimSize = 1;

    mkldnn::primitive_attr attr;
-    std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;

-    InferenceEngine::Precision inputPrec, outputPrec;
-    size_t srcDataSize = 0;
-    size_t dstDataSize = 0;
+    std::vector<float> lastScales;
+    std::vector<int32_t> lastSizes;

-    InterpolateLayoutType configured_for_layout = InterpolateLayoutType::planar;
-
-    std::vector<int> indexTable;
-
-    std::shared_ptr<jit_uni_interpolate_kernel> interpolateKernel = nullptr;
+    VectorDims lastOutputDims;

    std::string errorPrefix;
 };
--- a/Show More
+++ b/Show More