Merge remote-tracking branch 'github/master' into auto-batch-master

# Conflicts: # samples/cpp/benchmark_app/inputs_filling.cpp # samples/cpp/benchmark_app/remote_blobs_filling.cpp # src/plugins/auto/executable_network.cpp # src/tests/unit/auto/exec_network_get_metrics.cpp
2021-12-17 16:39:50 +03:00 · 2021-12-17 16:39:50 +03:00 · 9426db9b00
commit 9426db9b00
parent 8a5d21e284 ba4802554e
3635 changed files with 13486 additions and 6064 deletions
--- a/.ci/azure/linux.yml
+++ b/.ci/azure/linux.yml
@ -241,7 +241,7 @@ jobs:
      . $(SETUPVARS) -pyver 3.8 && python3 -m pytest -s $(INSTALL_DIR)/tests/mo/unit_tests --junitxml=TEST-ModelOptimizer.xml
    displayName: 'Model Optimizer UT'
    continueOnError: false
-    enabled: false
+    enabled: true

  - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/ov_core_unit_tests --gtest_print_time=1 --gtest_filter=-*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml
    workingDirectory: $(INSTALL_TEST_DIR)
@ -334,7 +334,7 @@ jobs:
    displayName: 'Samples Smoke Tests'
    continueOnError: false
    condition: eq(variables['CMAKE_BUILD_SHARED_LIBS'], 'ON')
-    enabled: false
+    enabled: true

  - script: |
      export DATA_PATH=$(MODELS_PATH)
@ -353,7 +353,7 @@ jobs:
    workingDirectory: $(LAYER_TESTS_DIR)
    displayName: 'Layer Tests'
    continueOnError: false
-    enabled: false
+    enabled: true

  - task: PublishTestResults@2
    condition: always()
--- a/.ci/azure/linux_lohika.yml
+++ b/.ci/azure/linux_lohika.yml
@ -35,7 +35,7 @@ jobs:

  - checkout: none

-  - script: git -C ~/work/openvino checkout -m --recurse-submodules $(Build.SourceVersion)
+  - script: git -C ~/work/openvino checkout -m $(Build.SourceVersion) && git -C ~/work/openvino submodule update --init --recursive
    displayName: checkout

    # Should be after 'Install dependencies' because Git lfs is not installed
@ -71,7 +71,7 @@ jobs:
      ./buildreleasenolto.sh
      libinference_engine_preproc.so
      MKLDNNPlugin
-      clDNNPlugin
+      ov_intel_gpu_plugin
      clDNN_unit_tests64
      gpuFuncTests
    displayName: Build Lin
--- a/.ci/azure/windows.yml
+++ b/.ci/azure/windows.yml
@ -83,7 +83,7 @@ jobs:
    displayName: 'Make dir'

  - script: |
-      certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
+      curl -O https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat
      call install_ib_console.bat
    workingDirectory: $(WORK_DIR)
    displayName: 'Install IncrediBuild'
@ -117,9 +117,9 @@ jobs:
      python -m pip install -r $(REPO_DIR)\tools\mo\requirements.txt
      python -m pip install -r $(REPO_DIR)\tools\mo\requirements_dev.txt
      rem Speed up build
-      certutil -urlcache -split -f https://github.com/Kitware/CMake/releases/download/v$(CMAKE_VERSION)/cmake-$(CMAKE_VERSION)-windows-x86_64.zip cmake-$(CMAKE_VERSION)-windows-x86_64.zip
+      powershell -command "Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v$(CMAKE_VERSION)/cmake-$(CMAKE_VERSION)-windows-x86_64.zip -OutFile cmake-$(CMAKE_VERSION)-windows-x86_64.zip"
      powershell -command "Expand-Archive -Force cmake-$(CMAKE_VERSION)-windows-x86_64.zip"
-      certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
+      powershell -command "Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip -OutFile ninja-win.zip"
      powershell -command "Expand-Archive -Force ninja-win.zip"
      git clone https://github.com/google/gtest-parallel.git
    workingDirectory: $(WORK_DIR)
--- a/.ci/azure/windows_conditional_compilation.yml
+++ b/.ci/azure/windows_conditional_compilation.yml
@ -59,7 +59,7 @@ jobs:

  - script: |
      rem Speed up build
-      certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
+      powershell -command "Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip -OutFile ninja-win.zip"
      powershell -command "Expand-Archive -Force ninja-win.zip"
    workingDirectory: $(WORK_DIR)
    displayName: 'Install dependencies'
--- a/22
+++ b/22
@ -35,7 +35,7 @@ Jenkinsfile  @openvinotoolkit/openvino-admins
 /src/common/  @openvinotoolkit/openvino-ie-maintainers
 /src/core/  @openvinotoolkit/openvino-ngraph-maintainers
 /src/frontends/  @openvinotoolkit/openvino-ngraph-maintainers
-/inference-engine/tests_deprecated/readers/  @openvinotoolkit/openvino-ngraph-maintainers
+/src/tests_deprecated/readers/  @openvinotoolkit/openvino-ngraph-maintainers

 # IE CPU:
 /inference-engine/src/mkldnn_plugin/  @openvinotoolkit/openvino-ie-cpu-maintainers @openvinotoolkit/openvino-ie-cpu-developers
@ -53,12 +53,12 @@ Jenkinsfile  @openvinotoolkit/openvino-admins
 /inference-engine/src/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
 /src/inference/include/ie/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers
 /inference-engine/thirdparty/movidius/  @openvinotoolkit/openvino-ie-vpu-maintainers
-/inference-engine/tests_deprecated/unit/engines/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
-/inference-engine/tests_deprecated/functional/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
-/inference-engine/tests_deprecated/behavior/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
-/inference-engine/tests/functional/plugin/myriad/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
-/inference-engine/tests/unit/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
-/inference-engine/tests/unit/engines/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/src/tests_deprecated/unit/engines/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/src/tests_deprecated/functional/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/src/tests_deprecated/behavior/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/src/tests/functional/plugin/myriad/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/src/tests/unit/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
+/src/tests/unit/engines/vpu/  @openvinotoolkit/openvino-ie-vpu-maintainers @openvinotoolkit/openvino-ie-tests-maintainers
 /inference-engine/scripts/run_tests_myriad_multistick.sh  @openvinotoolkit/openvino-ie-vpu-maintainers

 # IE GNA:
@ -70,10 +70,10 @@ Jenkinsfile  @openvinotoolkit/openvino-admins
 /src/inference/include/ie/multi-device/  @openvinotoolkit/openvino-ie-multi-maintainers

 # IE Tests:
-/inference-engine/tests/  @openvinotoolkit/openvino-ie-tests-maintainers
-/inference-engine/tests_deprecated/  @openvinotoolkit/openvino-ie-tests-maintainers
-/inference-engine/tests/functional/inference_engine/ngraph_reader/  @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers
-/inference-engine/tests/functional/inference_engine/transformations/  @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers
+/src/tests/  @openvinotoolkit/openvino-ie-tests-maintainers
+/src/tests_deprecated/  @openvinotoolkit/openvino-ie-tests-maintainers
+/src/tests/functional/inference_engine/ngraph_reader/  @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers
+/src/tests/functional/inference_engine/transformations/  @openvinotoolkit/openvino-ie-tests-maintainers @openvinotoolkit/openvino-ngraph-maintainers

 # Documentation:
 /docs/  @openvinotoolkit/openvino-docs-maintainers
--- a/cmake/developer_package/api_validator/api_validator.cmake
+++ b/cmake/developer_package/api_validator/api_validator.cmake
@ -79,8 +79,20 @@ function(_ie_add_api_validator_post_build_step)
    _ie_add_api_validator_post_build_step_recursive(TARGET ${API_VALIDATOR_TARGET})

    # remove targets which were tested before
-
-    foreach(item IN LISTS VALIDATED_LIBRARIES)
+    foreach(target IN LISTS API_VALIDATOR_TARGETS)
+        list(FIND VALIDATED_LIBRARIES ${target} index)
+        if (NOT index EQUAL -1)
+            list(APPEND VALIDATED_TARGETS ${target})
+        endif()
+        if(TARGET "${target}")
+            get_target_property(orig_target ${target} ALIASED_TARGET)
+            list(FIND VALIDATED_LIBRARIES ${orig_target} index)
+            if (NOT index EQUAL -1)
+                list(APPEND VALIDATED_TARGETS ${target})
+            endif()
+        endif()
+    endforeach()
+    foreach(item IN LISTS VALIDATED_TARGETS)
        list(REMOVE_ITEM API_VALIDATOR_TARGETS ${item})
    endforeach()

--- a/cmake/developer_package/frontends/frontends.cmake
+++ b/cmake/developer_package/frontends/frontends.cmake
@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 #

-set(FRONTEND_INSTALL_INCLUDE "runtime/include/ngraph/frontend")
+set(FRONTEND_INSTALL_INCLUDE "runtime/include/")
 set(FRONTEND_NAME_SUFFIX "_ov_frontend")

 set(FRONTEND_NAMES "" CACHE INTERNAL "")
@ -225,7 +225,7 @@ macro(ov_add_frontend)

        if(OV_FRONTEND_LINKABLE_FRONTEND)
            # install -dev part
-            install(DIRECTORY ${${TARGET_NAME}_INCLUDE_DIR}/${OV_FRONTEND_NAME}_frontend
+            install(DIRECTORY ${${TARGET_NAME}_INCLUDE_DIR}/
                    DESTINATION ${FRONTEND_INSTALL_INCLUDE}
                    COMPONENT core_dev
                    FILES_MATCHING PATTERN "*.hpp")
--- a/cmake/developer_package/frontends/ov_frontends.hpp.in
+++ b/cmake/developer_package/frontends/ov_frontends.hpp.in
@ -4,7 +4,7 @@

 #pragma once

-#include "common/frontend.hpp"
+#include "openvino/frontend/frontend.hpp"

@OV_FRONTEND_DECLARATIONS@

--- a/cmake/test_model_zoo.cmake
+++ b/cmake/test_model_zoo.cmake
@ -66,22 +66,22 @@ ov_model_convert("${CMAKE_CURRENT_SOURCE_DIR}/src/core/tests"
                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/core"
                  onnx_out_files)

-set(rel_path "inference-engine/tests/functional/plugin/shared/models")
+set(rel_path "src/tests/functional/plugin/shared/models")
 ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/func_tests/models"
                 ft_out_files)

-set(rel_path "inference-engine/tests/functional/inference_engine/onnx_reader")
+set(rel_path "src/tests/functional/inference_engine/onnx_reader")
 ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_reader"
                 ie_onnx_out_files)

-set(rel_path "inference-engine/tests/functional/inference_engine/ir_serialization")
+set(rel_path "src/tests/functional/inference_engine/ir_serialization")
 ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ir_serialization"
                 ie_serialize_out_files)

-set(rel_path "inference-engine/tests/unit/frontends/onnx_import/models")
+set(rel_path "src/tests/unit/frontends/onnx_import/models")
 ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_import"
                 ie_onnx_import_out_files)
--- a/docs/HOWTO/mo_extensions/front/tf/Complex.py
+++ b/docs/HOWTO/mo_extensions/front/tf/Complex.py
@ -2,12 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0

 #! [complex:transformation]
-import logging as log

-import numpy as np
-
-from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import Graph
+from openvino.tools.mo.front.common.replacement import FrontReplacementSubgraph
+from openvino.tools.mo.graph.graph import Graph


 class Complex(FrontReplacementSubgraph):
@ -41,4 +38,3 @@ class Complex(FrontReplacementSubgraph):
        # change the connection so now all consumers of "complex_node" get data from input node of strided slice nodes
        complex_node.out_port(0).get_connection().set_source(input_node_output_port)
 #! [complex:transformation]
-
--- a/docs/HOWTO/mo_extensions/front/tf/ComplexAbs.py
+++ b/docs/HOWTO/mo_extensions/front/tf/ComplexAbs.py
@ -4,11 +4,11 @@
 #! [complex_abs:transformation]
 import numpy as np

-from extensions.ops.elementwise import Pow
-from extensions.ops.ReduceOps import ReduceSum
-from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Graph, Node
-from mo.ops.const import Const
+from openvino.tools.mo.ops.elementwise import Pow
+from openvino.tools.mo.ops.ReduceOps import ReduceSum
+from openvino.tools.mo.front.common.replacement import FrontReplacementOp
+from openvino.tools.mo.graph.graph import Graph, Node
+from openvino.tools.mo.ops.const import Const


 class ComplexAbs(FrontReplacementOp):
--- a/docs/HOWTO/mo_extensions/front/tf/FFT_ext.py
+++ b/docs/HOWTO/mo_extensions/front/tf/FFT_ext.py
@ -3,8 +3,7 @@

 # ! [fft_ext:extractor]
 from ...ops.FFT import FFT
-from mo.front.extractor import FrontExtractorOp
-from mo.utils.error import Error
+from openvino.tools.mo.front.extractor import FrontExtractorOp


 class FFT2DFrontExtractor(FrontExtractorOp):
--- a/docs/HOWTO/mo_extensions/ops/FFT.py
+++ b/docs/HOWTO/mo_extensions/ops/FFT.py
@ -2,9 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0

 #! [fft:operation]
-from mo.front.common.partial_infer.elemental import copy_shape_infer
-from mo.graph.graph import Node, Graph
-from mo.ops.op import Op
+from openvino.tools.mo.front.common.partial_infer.elemental import copy_shape_infer
+from openvino.tools.mo.graph.graph import Graph
+from openvino.tools.mo.ops.op import Op


 class FFT(Op):
--- a/docs/IE_PLUGIN_DG/Doxyfile
+++ b/docs/IE_PLUGIN_DG/Doxyfile
@ -868,7 +868,7 @@ EXAMPLE_PATH           = ../template_plugin/src \
                         ../template_plugin/tests/functional/CMakeLists.txt \
                         ../template_plugin/tests/functional/transformations \
                         ../template_plugin/tests/functional/shared_tests_instances/ \
-                         ../../inference-engine/tests/functional/plugin/shared/include \
+                         ../../src/tests/functional/plugin/shared/include \
                         ../snippets

 # If the value of the EXAMPLE_PATH tag contains directories, you can use the
--- a/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md
+++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md
@ -59,11 +59,14 @@ Framework-agnostic parameters:
  --reverse_input_channels
                        Switch the input channels order from RGB to BGR (or
                        vice versa). Applied to original inputs of the model
-                        if and only if a number of channels equals 3. Applied
-                        after application of --mean_values and --scale_values
-                        options, so numbers in --mean_values and
-                        --scale_values go in the order of channels used in the
-                        original model.
+                        if and only if a number of channels equals 3.
+                        When --mean_values/--scale_values are also specified,
+                        reversing of channels will be applied to user's input
+                        data first, so that numbers in --mean_values and
+                        --scale_values go in the order of channels used in
+                        the original model. In other words, if both options are
+                        specified then the data flow in the model looks as following:
+                        Parameter -> ReverseInputChannels -> Mean/Scale apply -> the original body of the model.
  --log_level {CRITICAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}
                        Logger level
  --input INPUT         Quoted list of comma-separated input nodes names with
--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md
@ -639,9 +639,9 @@ graph. Consider the extractor for the TensorFlow\* operation `Const` (refer to t
 `extensions/front/tf/const_ext.py`):

 ```py
-from mo.front.extractor import FrontExtractorOp
-from mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, tf_tensor_content
-from mo.ops.const import Const
+from openvino.tools.mo.front.extractor import FrontExtractorOp
+from openvino.tools.mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, tf_tensor_content
+from openvino.tools.mo.ops.const import Const


 class ConstExtractor(FrontExtractorOp):
@ -679,9 +679,9 @@ Consider another example with an extractor of ONNX\* operation `Constant` (refer
 from onnx import numpy_helper
 from onnx.numpy_helper import to_array

-from mo.front.extractor import FrontExtractorOp
-from mo.front.onnx.extractors.utils import onnx_attr
-from mo.ops.const import Const
+from openvino.tools.mo.front.extractor import FrontExtractorOp
+from openvino.tools.mo.front.onnx.extractors.utils import onnx_attr
+from openvino.tools.mo.ops.const import Const


 class ConstantExtractor(FrontExtractorOp):
@ -814,11 +814,11 @@ fusing of the sub-graph defining the [Mish](../../../ops/activation/Mish_4.md) a
 operation:

 ```py
-from extensions.front.Softplus_fusion import SoftplusFusion
-from extensions.ops.activation_ops import Mish
-from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.front.subgraph_matcher import SubgraphMatch
-from mo.graph.graph import Graph, rename_nodes
+from openvino.tools.mo.front.Softplus_fusion import SoftplusFusion
+from openvino.tools.mo.ops.activation_ops import Mish
+from openvino.tools.mo.front.common.replacement import FrontReplacementSubgraph
+from openvino.tools.mo.front.subgraph_matcher import SubgraphMatch
+from openvino.tools.mo.graph.graph import Graph, rename_nodes


 class MishFusion(FrontReplacementSubgraph):
@ -886,12 +886,12 @@ transformation.
 Consider an example transformation from the file is `extensions/front/Pack.py`  which replaces operation `Pack` from
 the TensorFlow\*:
 ```py
-from mo.front.common.partial_infer.utils import int64_array
-from mo.front.common.replacement import FrontReplacementOp
-from mo.front.tf.graph_utils import create_op_with_const_inputs
-from mo.graph.graph import Node, Graph, rename_nodes
-from mo.ops.concat import Concat
-from mo.ops.unsqueeze import Unsqueeze
+from openvino.tools.mo.front.common.partial_infer.utils import int64_array
+from openvino.tools.mo.front.common.replacement import FrontReplacementOp
+from openvino.tools.mo.front.tf.graph_utils import create_op_with_const_inputs
+from openvino.tools.mo.graph.graph import Node, Graph, rename_nodes
+from openvino.tools.mo.ops.concat import Concat
+from openvino.tools.mo.ops.unsqueeze import Unsqueeze


 class Pack(FrontReplacementOp):
@ -932,11 +932,11 @@ specification.
 ```py
 import logging as log

-from mo.front.common.partial_infer.utils import int64_array
-from mo.front.common.replacement import FrontReplacementPattern
-from mo.graph.graph import Graph
-from mo.ops.const import Const
-from mo.utils.error import Error
+from openvino.tools.mo.front.common.partial_infer.utils import int64_array
+from openvino.tools.mo.front.common.replacement import FrontReplacementPattern
+from openvino.tools.mo.graph.graph import Graph
+from openvino.tools.mo.ops.const import Const
+from openvino.tools.mo.utils.error import Error


 class SqueezeNormalize(FrontReplacementPattern):
@ -1200,13 +1200,13 @@ The example of the configuration file for this type of transformation is `extens
 and the corresponding transformation file is `./extensions/front/YOLO.py`:

 ```py
-from extensions.front.no_op_eraser import NoOpEraser
-from extensions.front.standalone_const_eraser import StandaloneConstEraser
-from extensions.ops.regionyolo import RegionYoloOp
-from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
-from mo.graph.graph import Node, Graph
-from mo.ops.result import Result
-from mo.utils.error import Error
+from openvino.tools.mo.front.no_op_eraser import NoOpEraser
+from openvino.tools.mo.front.standalone_const_eraser import StandaloneConstEraser
+from openvino.tools.mo.ops.regionyolo import RegionYoloOp
+from openvino.tools.mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
+from openvino.tools.mo.graph.graph import Node, Graph
+from openvino.tools.mo.ops.result import Result
+from openvino.tools.mo.utils.error import Error


 class YoloRegionAddon(FrontReplacementFromConfigFileGeneral):
--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_MXNet_Model_Optimizer_with_New_Primitives.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_MXNet_Model_Optimizer_with_New_Primitives.md
@ -20,9 +20,9 @@ assume that we have already created the `CustomOp` class (inherited from `Op` cl
 for this MXNet custom operation as described in the [Customize_Model_Optimizer](Customize_Model_Optimizer.md).

 ```py
-from extension.ops.custom_op import CustomOp  # implementation of the MO operation class
-from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
-from mo.front.extractor import MXNetCustomFrontExtractorOp
+from openvino.tools.mo.ops.custom_op import CustomOp  # implementation of the MO operation class
+from openvino.tools.mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
+from openvino.tools.mo.front.extractor import MXNetCustomFrontExtractorOp

 class CustomProposalFrontExtractor(MXNetCustomFrontExtractorOp):  # inherit from specific base class
    op = 'MyCustomOp'  # the value corresponding to the `op_type` value of the MXNet operation
--- a/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md
+++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Extending_Model_Optimizer_with_Caffe_Python_Layers.md
@ -40,8 +40,8 @@ operation `ProposalOp` which corresponds to `Proposal` operation described in th
 document. Refer to the source code below for a detailed explanation of the extractor.

 ```py
-from extensions.ops.proposal import ProposalOp
-from mo.front.extractor import CaffePythonFrontExtractorOp
+from openvino.tools.mo.ops.proposal import ProposalOp
+from openvino.tools.mo.front.extractor import CaffePythonFrontExtractorOp


 class ProposalPythonFrontExtractor(CaffePythonFrontExtractorOp):
--- a/docs/nGraph_DG/nGraphTransformation.md
+++ b/docs/nGraph_DG/nGraphTransformation.md
@ -430,7 +430,7 @@ PassConfig instance taken from pass::Manager is shared across all registered tra
 ## Transformations testing <a name="transformations_testing"></a>

 If you are developing new transformation inside plugin, you need to add test into the `template_plugin/tests/functional/transformations` folder.
-We have two types of tests: nGraph reader tests located in `inference-engine/tests/functional/inference_engine/ngraph_reader` and transformation tests located in `inference-engine/tests/functional/inference_engine/transformations`
+We have two types of tests: nGraph reader tests located in `src/tests/functional/inference_engine/ngraph_reader` and transformation tests located in `src/tests/functional/inference_engine/transformations`
 Reader tests are IR based and test end-to-end conversion from IR to CNNNetwork. Transformation tests test single ngraph transformations or low-level functions that are used inside transformations.

 The basic transformation test looks like this:
--- a/docs/template_plugin/backend/CMakeLists.txt
+++ b/docs/template_plugin/backend/CMakeLists.txt
@ -38,17 +38,10 @@ target_include_directories(interpreter_backend PUBLIC $<BUILD_INTERFACE:${CMAKE_
 file(GLOB_RECURSE all_backends_src "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp")
 add_clang_format_target(interpreter_backend_clang FOR_SOURCES ${all_backends_src})

-
 # developer package
+
 openvino_developer_export_targets(COMPONENT core TARGETS interpreter_backend)

-install(TARGETS interpreter_backend
-        RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
-        ARCHIVE DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
-        LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL)
-if(NOT BUILD_SHARED_LIBS)
-    install(TARGETS interpreter_backend
-            RUNTIME DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
-            ARCHIVE DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
-            LIBRARY DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL)
-endif()
+# install
+
+ov_install_static_lib(interpreter_backend template)
--- a/docs/template_plugin/backend/evaluates_map.cpp
+++ b/docs/template_plugin/backend/evaluates_map.cpp
@ -1707,7 +1707,24 @@ bool evaluate(const shared_ptr<op::v0::Log>& op, const HostTensorVector& outputs
 }

 namespace ctc_loss_v4 {
-template <element::Type_t t1, element::Type_t t2>
+template <element::Type_t t1,
+          element::Type_t t2,
+          typename std::enable_if<!std::is_floating_point<typename element_type_traits<t1>::value_type>::value &&
+                                      !std::is_same<typename element_type_traits<t1>::value_type, bfloat16>::value &&
+                                      !std::is_same<typename element_type_traits<t1>::value_type, float16>::value,
+                                  bool>::type = true>
+inline void evaluate(const shared_ptr<op::v4::CTCLoss>& op,
+                     const HostTensorVector& outputs,
+                     const HostTensorVector& inputs) {
+    OPENVINO_ASSERT(false, "The data type for logits is expected to be a floating point type. Got:", element::Type(t1));
+}
+
+template <element::Type_t t1,
+          element::Type_t t2,
+          typename std::enable_if<std::is_floating_point<typename element_type_traits<t1>::value_type>::value ||
+                                      std::is_same<typename element_type_traits<t1>::value_type, bfloat16>::value ||
+                                      std::is_same<typename element_type_traits<t1>::value_type, float16>::value,
+                                  bool>::type = true>
 inline void evaluate(const shared_ptr<op::v4::CTCLoss>& op,
                     const HostTensorVector& outputs,
                     const HostTensorVector& inputs) {
@ -1944,6 +1961,30 @@ bool evaluate(const shared_ptr<op::v0::RNNCell>& op, const HostTensorVector& out
    return true;
 }

+template <element::Type_t ET>
+bool evaluate(const shared_ptr<op::v0::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
+    using T = typename element_type_traits<ET>::value_type;
+    runtime::reference::lstm_cell<T>(inputs[0]->get_data_ptr<ET>(),
+                                     inputs[0]->get_shape(),
+                                     inputs[1]->get_data_ptr<ET>(),
+                                     inputs[1]->get_shape(),
+                                     inputs[2]->get_data_ptr<ET>(),
+                                     inputs[2]->get_shape(),
+                                     inputs[3]->get_data_ptr<ET>(),
+                                     inputs[3]->get_shape(),
+                                     inputs[4]->get_data_ptr<ET>(),
+                                     inputs[4]->get_shape(),
+                                     inputs[5]->get_data_ptr<ET>(),
+                                     inputs[5]->get_shape(),
+                                     outputs[0]->get_data_ptr<ET>(),
+                                     outputs[1]->get_data_ptr<ET>(),
+                                     op->get_activations()[0],
+                                     op->get_activations()[1],
+                                     op->get_activations()[2],
+                                     op->get_clip());
+    return true;
+}
+
 template <element::Type_t ET>
 bool evaluate(const shared_ptr<op::v4::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
    using T = typename element_type_traits<ET>::value_type;
--- a/docs/template_plugin/backend/opset_int_tbl.hpp
+++ b/docs/template_plugin/backend/opset_int_tbl.hpp
@ -20,6 +20,7 @@ NGRAPH_OP(Gelu, op::v0)
 NGRAPH_OP(GRN, op::v0)
 NGRAPH_OP(HardSigmoid, op::v0)
 NGRAPH_OP(LRN, ngraph::op::v0)
+NGRAPH_OP(LSTMCell, op::v0)
 NGRAPH_OP(MVN, ngraph::op::v0)
 NGRAPH_OP(NormalizeL2, op::v0)
 NGRAPH_OP(PriorBox, ngraph::op::v0)
--- a/docs/template_plugin/src/CMakeLists.txt
+++ b/docs/template_plugin/src/CMakeLists.txt
@ -37,4 +37,3 @@ set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_REL
 # ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
 #                     POSSIBLE_PLUGINS ${TARGET_NAME})
 # [cmake:plugin]
-ov_install_static_lib(interpreter_backend tests)
--- a/docs/template_plugin/tests/functional/op_reference/einsum.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/einsum.cpp
@ -0,0 +1,182 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "openvino/opsets/opset7.hpp"
+#include "openvino/opsets/opset1.hpp"
+#include "base_reference_test.hpp"
+
+using namespace reference_tests;
+using namespace ov;
+
+namespace {
+struct EinsumParams {
+    std::vector<Tensor> inputs;
+    std::string equation;
+    Tensor expectedResult;
+    std::string testcaseName;
+};
+
+struct Builder : ParamsBuilder<EinsumParams> {
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputs);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, equation);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
+};
+
+class ReferenceEinsumTest : public testing::TestWithParam<EinsumParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateModel(params);
+        for (const auto& input_tensor : params.inputs) {
+            inputData.push_back(input_tensor.data);
+        }
+        refOutData = {params.expectedResult.data};
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<EinsumParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "iType=" << param.inputs[0].type;
+        result << "_iShape=" << param.inputs[0].shape;
+        result << "_equation=" << param.equation;
+        result << "_eType=" << param.expectedResult.type;
+        result << "_eShape=" << param.expectedResult.shape;
+        if (param.testcaseName != "") {
+            result << "_=" << param.testcaseName;
+        }
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Model> CreateModel(const EinsumParams& params) {
+        OutputVector output_vector;
+        ParameterVector param_vector;
+        for (const auto& input_tensor : params.inputs) {
+            auto param = std::make_shared<opset1::Parameter>(input_tensor.type, input_tensor.shape);
+            output_vector.push_back(param);
+            param_vector.push_back(param);
+        }
+        const auto einsum = std::make_shared<opset7::Einsum>(output_vector, params.equation);
+        const auto f = std::make_shared<Model>(OutputVector{einsum}, param_vector);
+        return f;
+    }
+};
+
+TEST_P(ReferenceEinsumTest, CompareWithRefs) {
+    Exec();
+}
+
+template <element::Type_t ET>
+std::vector<EinsumParams> generateParams() {
+    using T = typename element_type_traits<ET>::value_type;
+    std::vector<EinsumParams> params {
+        Builder {}
+            .inputs({{ET, {1, 2}, std::vector<T>{1, 2}},
+                     {ET, {3, 4}, std::vector<T>{3, 4, 5, 6,
+                                                 7, 8, 9, 10,
+                                                 11, 12, 13, 14}}})
+            .equation("ab,cd->abcd")
+            .expectedResult({ET, {1, 2, 3, 4}, std::vector<T>{3,  4,  5,  6,  7,  8,  9,  10,
+                                                              11, 12, 13, 14, 6,  8,  10, 12,
+                                                              14, 16, 18, 20, 22, 24, 26, 28}})
+            .testcaseName("einsum_no_reduction"),
+        Builder {}
+            .inputs({{ET, {1, 2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
+            .equation("ijk->kij")
+            .expectedResult({ET, {3, 1, 2}, std::vector<T>{1, 4, 2, 5, 3, 6}})
+            .testcaseName("einsum_transpose"),
+
+        Builder {}
+            .inputs({{ET, {2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
+            .equation("ab->a")
+            .expectedResult({ET, {2}, std::vector<T>{6, 15}})
+            .testcaseName("einsum_reduce"),
+
+        Builder {}
+            .inputs({{ET, {2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}},
+                     {ET, {3, 2}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
+            .equation("ab,bc->ac")
+            .expectedResult({ET, {2, 2}, std::vector<T>{22, 28, 49, 64}})
+            .testcaseName("einsum_matrix_multiplication"),
+
+        Builder {}
+            .inputs({{ET, {2, 4}, std::vector<T>{1, 3, 2, 7, 5, 6, 0, 1}},
+                     {ET, {4, 3, 1}, std::vector<T>{1, 2, 3, 4, 5, 6, 5, 7, 3, 7, 9, 1}},
+                     {ET, {4, 3}, std::vector<T>{4, 3, 1, 6, 4, 2, 2, 5, 3, 1, 9, 4}}})
+            .equation("ab,bcd,bc->ca")
+            .expectedResult({ET, {3, 2}, std::vector<T>{145, 171, 703, 231, 85, 91}})
+            .testcaseName("einsum_multiple_multiplication"),
+
+        Builder {}
+            .inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}})
+            .equation("a...->...")
+            .expectedResult({ET, {2, 3}, std::vector<T>{4, 8, 4, 8, 5, 13}})
+            .testcaseName("einsum_ellipsis_one_input_reduction"),
+
+        Builder {}
+            .inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}})
+            .equation("a...->...a")
+            .expectedResult({ET, {2, 3, 2}, std::vector<T>{1, 3, 3, 5, 2, 2, 7, 1, 5, 0, 6, 7}})
+            .testcaseName("einsum_ellipsis_one_input_transpose"),
+
+        Builder {}
+            .inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}},
+                     {ET, {1}, std::vector<T>{2}}})
+            .equation("ab...,...->ab...")
+            .expectedResult({ET, {2, 2, 3}, std::vector<T>{2, 6, 4, 14, 10, 12, 6, 10, 4, 2, 0, 14}})
+            .testcaseName("einsum_ellipsis_mul_by_1dscalar"),
+
+        Builder {}
+            .inputs({{ET, {1, 1, 4, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}},
+                     {ET, {3, 4, 2, 1}, std::vector<T>{3, 1, 6, 2, 3, 10, 9,  8, 2, 9, 3, 2,
+                                                       4, 2, 3, 1, 9, 1,  11, 4, 7, 2, 3, 1}}})
+            .equation("a...j,j...->a...")
+            .expectedResult({ET, {1, 4, 2, 4}, std::vector<T>{27, 85,  37, 66, 30, 58, 50, 8,
+                                                              37, 123, 55, 83, 16, 48, 24, 30,
+                                                              29, 83,  43, 52, 20, 92, 44, 24,
+                                                              24, 96,  48, 30, 13, 67, 31, 15}})
+            .testcaseName("einsum_ellipsis_complex_mul"),
+
+        Builder {}
+            .inputs({{ET, {1, 3, 3}, std::vector<T>{1, 2, 3, 4, 5, 6, 7, 8, 9}}})
+            .equation("kii->ki")
+            .expectedResult({ET, {1, 3}, std::vector<T>{1, 5, 9}})
+            .testcaseName("einsum_diagonal"),
+
+        Builder {}
+            .inputs({{ET, {2, 3, 3, 2, 4}, std::vector<T>{4, 2, 5, 4, 5, 5, 1, 1, 3, 3, 1, 1, 2, 2, 4, 1, 3, 4,
+                                                          4, 5, 1, 3, 1, 3, 1, 4, 3, 5, 4, 4, 5, 4, 4, 5, 4, 2,
+                                                          2, 2, 3, 3, 1, 1, 4, 3, 4, 2, 2, 1, 1, 2, 3, 1, 1, 4,
+                                                          2, 3, 1, 3, 4, 2, 5, 5, 3, 4, 3, 4, 5, 4, 4, 5, 1, 3,
+                                                          4, 4, 5, 3, 1, 3, 2, 5, 3, 2, 5, 4, 4, 2, 4, 4, 1, 4,
+                                                          4, 5, 4, 4, 4, 2, 3, 3, 4, 2, 4, 2, 5, 1, 3, 2, 4, 3,
+                                                          5, 1, 2, 3, 1, 1, 2, 5, 1, 1, 2, 1, 4, 5, 3, 4, 1, 3,
+                                                          3, 1, 3, 2, 4, 5, 1, 1, 5, 4, 5, 2, 2, 3, 3, 1, 2, 4}},
+                     {ET, {3, 2, 1}, std::vector<T>{1, 4, 4, 5, 3, 3}}})
+            .equation("abbac,bad->ad")
+            .expectedResult({ET, {2, 1}, std::vector<T>{123, 129}})
+            .testcaseName("einsum_diagonal_with_matmul"),
+    };
+    return params;
+}
+
+std::vector<EinsumParams> generateCombinedParams() {
+    const std::vector<std::vector<EinsumParams>> generatedParams {
+        generateParams<element::Type_t::i32>(),
+        generateParams<element::Type_t::f32>(),
+    };
+    std::vector<EinsumParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_Einsum_With_Hardcoded_Refs, ReferenceEinsumTest,
+    testing::ValuesIn(generateCombinedParams()), ReferenceEinsumTest::getTestCaseName);
+} // namespace
--- a/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp
@ -0,0 +1,246 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include "openvino/opsets/opset3.hpp"
+#include "openvino/opsets/opset1.hpp"
+#include "base_reference_test.hpp"
+
+using namespace reference_tests;
+using namespace ov;
+
+namespace {
+struct ExtractImagePatchesParams {
+    Tensor data;
+    Shape sizes;
+    Strides strides;
+    Shape rates;
+    op::PadType autoPad;
+    Tensor expectedResult;
+    std::string testcaseName;
+};
+
+struct Builder : ParamsBuilder<ExtractImagePatchesParams> {
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, data);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, sizes);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, strides);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, rates);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, autoPad);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
+};
+
+class ReferenceExtractImagePatchesTest : public testing::TestWithParam<ExtractImagePatchesParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateModel(params);
+        inputData = {params.data.data};
+        refOutData = {params.expectedResult.data};
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<ExtractImagePatchesParams>& obj) {
+        auto param = obj.param;
+        std::ostringstream result;
+        result << "dType=" << param.data.type;
+        result << "_dShape=" << param.data.shape;
+        result << "_sizes=" << param.sizes;
+        result << "_strides=" << param.strides;
+        result << "_rates=" << param.rates;
+        result << "_autoPad=" << param.autoPad;
+        result << "_eType=" << param.expectedResult.type;
+        result << "_eShape=" << param.expectedResult.shape;
+        if (param.testcaseName != "") {
+            result << "_=" << param.testcaseName;
+        }
+        return result.str();
+    }
+
+private:
+    static std::shared_ptr<Model> CreateModel(const ExtractImagePatchesParams& params) {
+        const auto data = std::make_shared<opset1::Parameter>(params.data.type, params.data.shape);
+        const auto extrace_image_patches = std::make_shared<opset3::ExtractImagePatches>(data,
+                                                                                         params.sizes,
+                                                                                         params.strides,
+                                                                                         params.rates,
+                                                                                         params.autoPad);
+        const auto f = std::make_shared<Model>(extrace_image_patches, ParameterVector{data});
+        return f;
+    }
+};
+
+TEST_P(ReferenceExtractImagePatchesTest, CompareWithRefs) {
+    Exec();
+}
+
+template <element::Type_t ET>
+std::vector<ExtractImagePatchesParams> generateParams() {
+    using T = typename element_type_traits<ET>::value_type;
+    std::vector<ExtractImagePatchesParams> params {
+        Builder {}
+            .data({ET, {1, 1, 10, 10}, std::vector<T>{
+                1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+                11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
+            .sizes({3, 3})
+            .strides({5, 5})
+            .rates({1, 1})
+            .autoPad(op::PadType::VALID)
+            .expectedResult({ET, {1, 9, 2, 2}, std::vector<T>{
+                1,  6,  51, 56,
+                2,  7,  52, 57,
+                3,  8,  53, 58,
+                11, 16, 61, 66,
+                12, 17, 62, 67,
+                13, 18, 63, 68,
+                21, 26, 71, 76,
+                22, 27, 72, 77,
+                23, 28, 73, 78}}),
+
+        Builder {}
+            .data({ET, {1, 1, 10, 10}, std::vector<T>{
+                1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+                11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
+            .sizes({4, 4})
+            .strides({8, 8})
+            .rates({1, 1})
+            .autoPad(op::PadType::VALID)
+            .expectedResult({ET, {1, 16, 1, 1}, std::vector<T>{
+                1,  2,  3,  4,
+                11, 12, 13, 14,
+                21, 22, 23, 24,
+                31, 32, 33, 34}}),
+
+        Builder {}
+            .data({ET, {1, 1, 10, 10}, std::vector<T>{
+                1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+                11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
+            .sizes({4, 4})
+            .strides({9, 9})
+            .rates({1, 1})
+            .autoPad(op::PadType::SAME_UPPER)
+            .expectedResult({ET, {1, 16, 2, 2}, std::vector<T>{
+                0,  0,  0,  89,
+                0,  0,  81, 90,
+                0,  0,  82, 0,
+                0,  0,  83, 0,
+                0,  9,  0,  99,
+                1,  10, 91, 100,
+                2,  0,  92, 0,
+                3,  0,  93, 0,
+                0,  19, 0,  0,
+                11, 20, 0,  0,
+                12, 0,  0,  0,
+                13, 0,  0,  0,
+                0,  29, 0,  0,
+                21, 30, 0,  0,
+                22, 0,  0,  0,
+                23, 0,  0,  0}}),
+
+        Builder {}
+            .data({ET, {1, 1, 10, 10}, std::vector<T>{
+                1,  2,  3,  4,  5,  6,  7,  8,  9,  10,
+                11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
+            .sizes({3, 3})
+            .strides({5, 5})
+            .rates({2, 2})
+            .autoPad(op::PadType::VALID)
+            .expectedResult({ET, {1, 9, 2, 2}, std::vector<T>{
+                1,  6,  51, 56,
+                3,  8,  53, 58,
+                5,  10, 55, 60,
+                21, 26, 71, 76,
+                23, 28, 73, 78,
+                25, 30, 75, 80,
+                41, 46, 91, 96,
+                43, 48, 93, 98,
+                45, 50, 95, 100}}),
+
+        Builder {}
+            .data({ET, {1, 2, 5, 5}, std::vector<T>{
+                1,  2,  3,  4,  5,
+                6,  7,  8,  9,  10,
+                11, 12, 13, 14, 15,
+                16, 17, 18, 19, 20,
+                21, 22, 23, 24, 25,
+                26, 27, 28, 29, 30,
+                31, 32, 33, 34, 35,
+                36, 37, 38, 39, 40,
+                41, 42, 43, 44, 45,
+                46, 47, 48, 49, 50}})
+            .sizes({2, 2})
+            .strides({3, 3})
+            .rates({1, 1})
+            .autoPad(op::PadType::VALID)
+            .expectedResult({ET, {1, 8, 2, 2}, std::vector<T>{
+                1,  4,  16, 19,
+                26, 29, 41, 44,
+                2,  5,  17, 20,
+                27, 30, 42, 45,
+                6,  9,  21, 24,
+                31, 34, 46, 49,
+                7,  10, 22, 25,
+                32, 35, 47, 50}}),
+    };
+    return params;
+}
+
+std::vector<ExtractImagePatchesParams> generateCombinedParams() {
+    const std::vector<std::vector<ExtractImagePatchesParams>> generatedParams {
+        generateParams<element::Type_t::i8>(),
+        generateParams<element::Type_t::i16>(),
+        generateParams<element::Type_t::i32>(),
+        generateParams<element::Type_t::i64>(),
+        generateParams<element::Type_t::u8>(),
+        generateParams<element::Type_t::u16>(),
+        generateParams<element::Type_t::u32>(),
+        generateParams<element::Type_t::u64>(),
+        generateParams<element::Type_t::bf16>(),
+        generateParams<element::Type_t::f16>(),
+        generateParams<element::Type_t::f32>(),
+        generateParams<element::Type_t::f64>(),
+    };
+    std::vector<ExtractImagePatchesParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_ExtractImagePatches_With_Hardcoded_Refs, ReferenceExtractImagePatchesTest,
+    testing::ValuesIn(generateCombinedParams()), ReferenceExtractImagePatchesTest::getTestCaseName);
+} // namespace
--- a/docs/template_plugin/tests/functional/op_reference/lstm_cell.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/lstm_cell.cpp
@ -4,7 +4,8 @@

 #include <gtest/gtest.h>

-#include "openvino/op/lstm_cell.hpp"
+#include "openvino/opsets/opset4.hpp"
+#include "openvino/opsets/opset1.hpp"
 #include "base_reference_test.hpp"

 using namespace reference_tests;
@ -12,13 +13,6 @@ using namespace ov;

 namespace {
 struct LSTMCellParams {
-    LSTMCellParams(
-        int32_t batchSize, int32_t inputSize, int32_t hiddenSize, int32_t gatesCount,
-        const Tensor& X, const Tensor& W, const Tensor& R, const Tensor& H_t, const Tensor& C_t, const Tensor& B,
-        const Tensor& Ho, const Tensor& Co, const std::string& testcaseName = "") :
-        batchSize(batchSize), inputSize(inputSize), hiddenSize(hiddenSize), gatesCount(gatesCount),
-        X(X), W(W), R(R), H_t(H_t), C_t(C_t), B(B), Ho(Ho), Co(Co), testcaseName(testcaseName) {}
-
    int32_t batchSize;
    int32_t inputSize;
    int32_t hiddenSize;
@ -34,6 +28,22 @@ struct LSTMCellParams {
    std::string testcaseName;
 };

+struct Builder : ParamsBuilder<LSTMCellParams> {
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, batchSize);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputSize);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, hiddenSize);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, gatesCount);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, X);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, W);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, R);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, H_t);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, C_t);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, B);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, Ho);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, Co);
+    REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
+};
+
 class ReferenceLSTMCellTest : public testing::TestWithParam<LSTMCellParams>, public CommonReferenceTest {
 public:
    void SetUp() override {
@ -63,26 +73,24 @@ public:
        result << "_hoType=" << param.Ho.type;
        result << "_hoShape=" << param.Ho.shape;
        result << "_coType=" << param.Co.type;
+        result << "_coShape=" << param.Co.shape;
        if (param.testcaseName != "") {
-            result << "_coShape=" << param.Co.shape;
            result << "_=" << param.testcaseName;
-        } else {
-            result << "_coShape=" << param.Co.shape;
        }
        return result.str();
    }

 private:
    static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
-        const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
-        const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
-        const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
-        const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
-        const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
-        const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);

        const auto lstm_cell =
-            std::make_shared<op::v4::LSTMCell>(X,
+            std::make_shared<opset4::LSTMCell>(X,
                                               H_t,
                                               C_t,
                                               op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
@ -107,15 +115,15 @@ public:

 private:
    static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
-        const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
-        const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
-        const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
-        const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
-        const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
-        const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);

        const auto lstm_cell =
-            std::make_shared<op::v4::LSTMCell>(X,
+            std::make_shared<opset4::LSTMCell>(X,
                                               H_t,
                                               C_t,
                                               op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
@ -142,15 +150,15 @@ private:
    static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
        const float clip_threshold = 3.5f;

-        const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
-        const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
-        const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
-        const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
-        const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
-        const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);

        const auto lstm_cell =
-            std::make_shared<op::v4::LSTMCell>(X,
+            std::make_shared<opset4::LSTMCell>(X,
                                               H_t,
                                               C_t,
                                               W,
@ -179,36 +187,130 @@ TEST_P(ReferenceLSTMCellTestBiasClip, CompareWithRefs) {
    Exec();
 }

+class ReferenceLSTMCellV1Test : public ReferenceLSTMCellTest {
+private:
+    static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
+
+        const auto lstm_cell =
+            std::make_shared<opset1::LSTMCell>(X,
+                                               H_t,
+                                               C_t,
+                                               op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
+                                               op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
+                                               op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
+                                               params.hiddenSize);
+
+        auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
+        return function;
+    }
+};
+
+class ReferenceLSTMCellV1TestBiasDefaultAttrs : public ReferenceLSTMCellTestBiasDefaultAttrs {
+private:
+    static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
+
+        const auto lstm_cell =
+            std::make_shared<opset1::LSTMCell>(X,
+                                               H_t,
+                                               C_t,
+                                               op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
+                                               op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
+                                               op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
+                                               params.hiddenSize);
+
+        auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
+        return function;
+    }
+};
+
+class ReferenceLSTMCellV1TestBiasClip : public ReferenceLSTMCellTestBiasClip {
+private:
+    static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
+        const float clip_threshold = 3.5f;
+
+        const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
+        const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
+        const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
+        const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
+        const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
+        const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
+
+        const auto lstm_cell =
+            std::make_shared<opset1::LSTMCell>(X,
+                                               H_t,
+                                               C_t,
+                                               W,
+                                               R,
+                                               B,
+                                               params.hiddenSize,
+                                               op::LSTMWeightsFormat::IFCO,
+                                               std::vector<std::string>{"sigmoid", "tanh", "tanh"},
+                                               std::vector<float>{},
+                                               std::vector<float>{},
+                                               clip_threshold);
+
+        auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
+        return function;
+    }
+};
+
+TEST_P(ReferenceLSTMCellV1Test, CompareWithRefs) {
+    Exec();
+}
+
+TEST_P(ReferenceLSTMCellV1TestBiasDefaultAttrs, CompareWithRefs) {
+    Exec();
+}
+
+TEST_P(ReferenceLSTMCellV1TestBiasClip, CompareWithRefs) {
+    Exec();
+}
+
 template <element::Type_t ET>
 std::vector<LSTMCellParams> generateParams() {
    using T = typename element_type_traits<ET>::value_type;
    std::vector<LSTMCellParams> params {
-        LSTMCellParams(
-            2, 3, 3, 4,
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
-                9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
-                6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
-                8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
-                5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
-                3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
-                0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
-                0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
-                0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
-                0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
-                0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
-            Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)),
-            Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}),
-            Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}),
-            "lstm_cell_zero_bias_default_attrs"),
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
+            0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
+            0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
+        .testcaseName("lstm_cell_zero_bias_default_attrs")
    };
    return params;
 }
@ -232,53 +334,56 @@ template <element::Type_t ET>
 std::vector<LSTMCellParams> generateParamsBiasDefaultAttrs() {
    using T = typename element_type_traits<ET>::value_type;
    std::vector<LSTMCellParams> params {
-        LSTMCellParams(
-            2, 3, 3, 4,
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
-                9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
-                6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
-                8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
-                5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
-                3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
-                0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
-                0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
-                0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
-                0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
-                0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
-            Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
-                                               1.15248052f,
-                                               1.16671345f,
-                                               0.21450312f,
-                                               1.2380678f,
-                                               1.51688835f,
-                                               0.46718366f,
-                                               0.91810346f,
-                                               1.1274234f,
-                                               0.51022074f,
-                                               1.11389844f,
-                                               0.74174305f}),
-            Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
+            0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
+            0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
+                                              1.15248052f,
+                                              1.16671345f,
+                                              0.21450312f,
+                                              1.2380678f,
+                                              1.51688835f,
+                                              0.46718366f,
+                                              0.91810346f,
+                                              1.1274234f,
+                                              0.51022074f,
+                                              1.11389844f,
+                                              0.74174305f}))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
                                              0.76665538549423218,
                                              0.82509011030197144,
                                              0.6479143500328064,
                                              0.66586339473724365,
-                                              0.74838578701019287}),
-            Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
+                                              0.74838578701019287}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
                                              1.1150213479995728,
                                              1.4578367471694946,
                                              1.0649888515472412,
                                              0.93761754035949707,
-                                              1.3659683465957642}),
-            "lstm_cell_bias_default_attrs"),
+                                              1.3659683465957642}))
+        .testcaseName("lstm_cell_bias_default_attrs"),
    };
    return params;
 }
@ -302,53 +407,56 @@ template <element::Type_t ET>
 std::vector<LSTMCellParams> generateParamsBiasClip() {
    using T = typename element_type_traits<ET>::value_type;
    std::vector<LSTMCellParams> params {
-        LSTMCellParams(
-            2, 3, 3, 4,
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
-                9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
-                6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
-                8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
-                5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
-                3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
-            Tensor(ET, {4 * 3, 3}, std::vector<T>{
-                0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
-                0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
-                0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
-                0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
-                0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
-                0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
-            Tensor(ET, {2, 3}, std::vector<T>{
-                0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
-            Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
-                                               1.15248052f,
-                                               1.16671345f,
-                                               0.21450312f,
-                                               1.2380678f,
-                                               1.51688835f,
-                                               0.46718366f,
-                                               0.91810346f,
-                                               1.1274234f,
-                                               0.51022074f,
-                                               1.11389844f,
-                                               0.74174305f}),
-            Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
+            0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
+            0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
+                                              1.15248052f,
+                                              1.16671345f,
+                                              0.21450312f,
+                                              1.2380678f,
+                                              1.51688835f,
+                                              0.46718366f,
+                                              0.91810346f,
+                                              1.1274234f,
+                                              0.51022074f,
+                                              1.11389844f,
+                                              0.74174305f}))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
                                              0.76665538549423218,
                                              0.82387429475784302,
                                              0.6479143500328064,
                                              0.66586339473724365,
-                                              0.74838578701019287}),
-            Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
+                                              0.74838578701019287}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
                                              1.1150213479995728,
                                              1.4510968923568726,
                                              1.0649888515472412,
                                              0.93761754035949707,
-                                              1.3659683465957642}),
-            "lstm_cell_bias_clip"),
+                                              1.3659683465957642}))
+        .testcaseName("lstm_cell_bias_clip"),
    };
    return params;
 }
@ -376,4 +484,211 @@ INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTe

 INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTestBiasClip,
    testing::ValuesIn(generateCombinedParamsBiasClip()), ReferenceLSTMCellTest::getTestCaseName);
-} // namespace
+
+template <element::Type_t ET>
+std::vector<LSTMCellParams> generateParamsV1() {
+    using T = typename element_type_traits<ET>::value_type;
+    std::vector<LSTMCellParams> params {
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
+            0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
+            0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
+        .testcaseName("lstm_cell_v1_zero_bias_default_attrs")
+    };
+    return params;
+}
+
+std::vector<LSTMCellParams> generateCombinedParamsV1() {
+    const std::vector<std::vector<LSTMCellParams>> generatedParams {
+        generateParamsV1<element::Type_t::bf16>(),
+        generateParamsV1<element::Type_t::f16>(),
+        generateParamsV1<element::Type_t::f32>(),
+        generateParamsV1<element::Type_t::f64>(),
+    };
+    std::vector<LSTMCellParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+template <element::Type_t ET>
+std::vector<LSTMCellParams> generateParamsBiasDefaultAttrsV1() {
+    using T = typename element_type_traits<ET>::value_type;
+    std::vector<LSTMCellParams> params {
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
+            0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
+            0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
+                                              1.15248052f,
+                                              1.16671345f,
+                                              0.21450312f,
+                                              1.2380678f,
+                                              1.51688835f,
+                                              0.46718366f,
+                                              0.91810346f,
+                                              1.1274234f,
+                                              0.51022074f,
+                                              1.11389844f,
+                                              0.74174305f}))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
+                                              0.76665538549423218,
+                                              0.82509011030197144,
+                                              0.6479143500328064,
+                                              0.66586339473724365,
+                                              0.74838578701019287}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
+                                              1.1150213479995728,
+                                              1.4578367471694946,
+                                              1.0649888515472412,
+                                              0.93761754035949707,
+                                              1.3659683465957642}))
+        .testcaseName("lstm_cell_v1_bias_default_attrs"),
+    };
+    return params;
+}
+
+std::vector<LSTMCellParams> generateCombinedParamsBiasDefaultAttrsV1() {
+    const std::vector<std::vector<LSTMCellParams>> generatedParams {
+        generateParamsBiasDefaultAttrsV1<element::Type_t::bf16>(),
+        generateParamsBiasDefaultAttrsV1<element::Type_t::f16>(),
+        generateParamsBiasDefaultAttrsV1<element::Type_t::f32>(),
+        generateParamsBiasDefaultAttrsV1<element::Type_t::f64>(),
+    };
+    std::vector<LSTMCellParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+template <element::Type_t ET>
+std::vector<LSTMCellParams> generateParamsBiasClipV1() {
+    using T = typename element_type_traits<ET>::value_type;
+    std::vector<LSTMCellParams> params {
+        Builder {}
+        .batchSize(2)
+        .inputSize(3)
+        .hiddenSize(3)
+        .gatesCount(4)
+        .X(Tensor(ET, {2, 3}, std::vector<T>{
+            0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
+        .W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
+            9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
+            6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
+            8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
+            5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
+            3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
+        .R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
+            0.0987983f,  0.52032113f, 0.5848073f,  0.5356095f,  0.74497133f, 0.73260087f,
+            0.1700787f,  0.45684233f, 0.1495722f,  0.42734373f, 0.4433832f,  0.25906256f,
+            0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
+            0.6881257f,  0.8170279f,  0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
+            0.86843914f, 0.45967972f, 0.6237719f,  0.11074839f, 0.6029616f,  0.3149305f,
+            0.46504205f, 0.5843412f,  0.8733427f,  0.7687243f,  0.07074859f, 0.39188156f}))
+        .H_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
+        .C_t(Tensor(ET, {2, 3}, std::vector<T>{
+            0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
+        .B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
+                                              1.15248052f,
+                                              1.16671345f,
+                                              0.21450312f,
+                                              1.2380678f,
+                                              1.51688835f,
+                                              0.46718366f,
+                                              0.91810346f,
+                                              1.1274234f,
+                                              0.51022074f,
+                                              1.11389844f,
+                                              0.74174305f}))
+        .Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
+                                              0.76665538549423218,
+                                              0.82387429475784302,
+                                              0.6479143500328064,
+                                              0.66586339473724365,
+                                              0.74838578701019287}))
+        .Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
+                                              1.1150213479995728,
+                                              1.4510968923568726,
+                                              1.0649888515472412,
+                                              0.93761754035949707,
+                                              1.3659683465957642}))
+        .testcaseName("lstm_cell_v1_bias_clip"),
+    };
+    return params;
+}
+
+std::vector<LSTMCellParams> generateCombinedParamsBiasClipV1() {
+    const std::vector<std::vector<LSTMCellParams>> generatedParams {
+        generateParamsBiasClipV1<element::Type_t::bf16>(),
+        generateParamsBiasClipV1<element::Type_t::f16>(),
+        generateParamsBiasClipV1<element::Type_t::f32>(),
+        generateParamsBiasClipV1<element::Type_t::f64>(),
+    };
+    std::vector<LSTMCellParams> combinedParams;
+
+    for (const auto& params : generatedParams) {
+        combinedParams.insert(combinedParams.end(), params.begin(), params.end());
+    }
+    return combinedParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1Test,
+    testing::ValuesIn(generateCombinedParamsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasDefaultAttrs,
+    testing::ValuesIn(generateCombinedParamsBiasDefaultAttrsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasClip,
+    testing::ValuesIn(generateCombinedParamsBiasClipV1()), ReferenceLSTMCellV1Test::getTestCaseName);
+} // namespace
--- a/docs/template_plugin/tests/functional/op_reference/topk.cpp
+++ b/docs/template_plugin/tests/functional/op_reference/topk.cpp
@ -4,8 +4,8 @@

 #include <gtest/gtest.h>

-#include "openvino/op/topk.hpp"
-#include "openvino/op/constant.hpp"
+#include "openvino/opsets/opset3.hpp"
+#include "openvino/opsets/opset1.hpp"
 #include "base_reference_test.hpp"

 using namespace reference_tests;
@ -15,7 +15,7 @@ namespace {
 struct TopKParams {
    TopKParams(
        const Tensor& A, const Tensor& k, const int64_t axis,
-        const op::v1::TopK::Mode mode, const op::v1::TopK::SortType sort,
+        const opset1::TopK::Mode mode, const opset1::TopK::SortType sort,
        const Tensor& result0, const Tensor& result1, const size_t outIdx,
        const std::string& testcaseName = "") :
        A(A), k(k), axis(axis), mode(mode), sort(sort),
@ -25,8 +25,8 @@ struct TopKParams {
    Tensor A;
    Tensor k;
    int64_t axis;
-    op::v1::TopK::Mode mode;
-    op::v1::TopK::SortType sort;
+    opset1::TopK::Mode mode;
+    opset1::TopK::SortType sort;
    Tensor result0;
    Tensor result1;
    size_t outIdx;
@ -71,7 +71,6 @@ struct TopKParamsResnet50 {
    std::string testcaseName;
 };

-
 class ReferenceTopKTestResnet50 : public testing::TestWithParam<TopKParamsResnet50>, public CommonReferenceTest {
 public:
    void SetUp() override {
@ -101,18 +100,18 @@ public:

 private:
    static std::shared_ptr<Model> CreateFunction(const TopKParamsResnet50& params) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                           params.A.shape);
-        const auto B = std::make_shared<op::v1::TopK>(A,
-                                                      op::v0::Constant::create(element::i64, {}, {5}),
+        const auto B = std::make_shared<opset1::TopK>(A,
+                                                      opset1::Constant::create(element::i64, {}, {5}),
                                                      1,
-                                                      op::v1::TopK::Mode::MAX,
-                                                      op::v1::TopK::SortType::SORT_VALUES);
-        const auto C = std::make_shared<op::v1::TopK>(A,
-                                                      op::v0::Constant::create(element::i64, {}, {1}),
+                                                      opset1::TopK::Mode::MAX,
+                                                      opset1::TopK::SortType::SORT_VALUES);
+        const auto C = std::make_shared<opset1::TopK>(A,
+                                                      opset1::Constant::create(element::i64, {}, {1}),
                                                      1,
-                                                      op::v1::TopK::Mode::MAX,
-                                                      op::v1::TopK::SortType::SORT_VALUES);
+                                                      opset1::TopK::Mode::MAX,
+                                                      opset1::TopK::SortType::SORT_VALUES);

        const auto out5_value = B->output(0);
        const auto out5_index = B->output(1);
@ -220,12 +219,12 @@ public:

 private:
    static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                           params.A.shape);
-        const auto k = op::v0::Constant::create(params.k.type,
+        const auto k = opset1::Constant::create(params.k.type,
                                                params.k.shape,
                                                params.k.data.data());
-        const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
        const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
        return f;
    }
@ -253,8 +252,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
            }({128, 1000})),
            Tensor(ET2, {}, std::vector<T2>{5}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::NONE,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::NONE,
            Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
                std::vector<T> expected_value;
                for (size_t i = 0; i < rshape[0]; i++) {
@ -292,8 +291,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
            }({128, 1000})),
            Tensor(ET2, {}, std::vector<T2>{5}),
            1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::NONE,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::NONE,
            Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
                std::vector<T> expected_value;
                for (size_t i = 0; i < rshape[0]; i++) {
@ -331,8 +330,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
            }({128, 1000})),
            Tensor(ET2, {}, std::vector<T2>{5}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
                std::vector<T> expected_value;
                for (size_t i = 0; i < rshape[0]; i++) {
@ -366,8 +365,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
            }({128, 1000})),
            Tensor(ET2, {}, std::vector<T2>{5}),
            1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
                std::vector<T> expected_value;
                for (size_t i = 0; i < rshape[0]; i++) {
@ -401,8 +400,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
            }({128, 1000})),
            Tensor(ET2, {}, std::vector<T2>{5}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_INDICES,
            Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
                std::vector<T> expected_value;
                for (size_t i = 0; i < rshape[0]; i++) {
@ -440,8 +439,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
            }({128, 1000})),
            Tensor(ET2, {}, std::vector<T2>{5}),
            1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_INDICES,
            Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
                std::vector<T> expected_value;
                for (size_t i = 0; i < rshape[0]; i++) {
@ -467,8 +466,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
            Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {3}, std::vector<T>{5, 4, 3}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{3, 4, 0}),
            0,
@ -478,8 +477,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
            Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_INDICES,
            Tensor(ET, {3}, std::vector<T>{3, 5, 4}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 3, 4}),
            0,
@ -489,8 +488,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
            Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{1, 2, 0}),
            0,
@ -500,8 +499,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
            Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_INDICES,
            Tensor(ET, {3}, std::vector<T>{3, 1, 2}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 1, 2}),
            0,
@ -536,7 +535,7 @@ std::vector<TopKParams> generateCombinedParamsMaxMinSort() {
 INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSort,
    testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTest::getTestCaseName);

-class ReferenceTopKTestV3 : public ReferenceTopKTest {
+class ReferenceTopKTestBackend : public ReferenceTopKTest {
 public:
    void SetUp() override {
        auto params = GetParam();
@ -547,18 +546,18 @@ public:

 private:
    static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                           params.A.shape);
-        const auto k = op::v0::Constant::create(params.k.type,
+        const auto k = opset1::Constant::create(params.k.type,
                                                params.k.shape,
                                                params.k.data.data());
-        const auto B = std::make_shared<op::v3::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
        const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
        return f;
    }
 };

-TEST_P(ReferenceTopKTestV3, CompareWithRefs) {
+TEST_P(ReferenceTopKTestBackend, CompareWithRefs) {
    Exec();
 }

@ -572,8 +571,8 @@ std::vector<TopKParams> generateParamsV3() {
            Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {3}, std::vector<T>{5, 4, 3}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{3, 4, 0}),
            0,
@ -583,8 +582,8 @@ std::vector<TopKParams> generateParamsV3() {
            Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_INDICES,
            Tensor(ET, {3}, std::vector<T>{3, 5, 4}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 3, 4}),
            0,
@ -594,8 +593,8 @@ std::vector<TopKParams> generateParamsV3() {
            Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{1, 2, 0}),
            0,
@ -605,8 +604,8 @@ std::vector<TopKParams> generateParamsV3() {
            Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_INDICES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_INDICES,
            Tensor(ET, {3}, std::vector<T>{3, 1, 2}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 1, 2}),
            0,
@ -615,7 +614,7 @@ std::vector<TopKParams> generateParamsV3() {
    return params;
 }

-std::vector<TopKParams> generateCombinedParamsV3() {
+std::vector<TopKParams> generateCombinedParamsBackend() {
    const std::vector<std::vector<TopKParams>> generatedParams {
        generateParamsMaxMinSort<element::Type_t::i8, element::Type_t::i64, element::Type_t::i32>(),
        generateParamsMaxMinSort<element::Type_t::i16, element::Type_t::i64, element::Type_t::i32>(),
@ -638,8 +637,8 @@ std::vector<TopKParams> generateCombinedParamsV3() {
    return combinedParams;
 }

-INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestV3,
-    testing::ValuesIn(generateCombinedParamsV3()), ReferenceTopKTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackend,
+    testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTest::getTestCaseName);

 class ReferenceTopKTest1dMaxMin : public ReferenceTopKTest {
 public:
@ -673,12 +672,12 @@ public:

 private:
    static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                           params.A.shape);
-        const auto k = op::v0::Constant::create(params.k.type,
+        const auto k = opset1::Constant::create(params.k.type,
                                                params.k.shape,
                                                params.k.data.data());
-        const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
        const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
        return f;
    }
@ -698,8 +697,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
            Tensor(ET2, {}, std::vector<T2>{6}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
            Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
            0,
@ -709,8 +708,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
            Tensor(ET2, {}, std::vector<T2>{6}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
            Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
            1,
@ -720,8 +719,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {3}, std::vector<T>{6, 5, 4}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
            0,
@ -731,8 +730,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {3}, std::vector<T>{6, 5, 4}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
            1,
@ -742,8 +741,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
            Tensor(ET2, {}, std::vector<T2>{1}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {1}, std::vector<T>{6}),
            Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
            0,
@ -753,8 +752,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
            Tensor(ET2, {}, std::vector<T2>{1}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {1}, std::vector<T>{6}),
            Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
            1,
@ -764,8 +763,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
            Tensor(ET2, {}, std::vector<T2>{6}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
            Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
            0,
@ -775,8 +774,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
            Tensor(ET2, {}, std::vector<T2>{6}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
            Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
            1,
@ -786,8 +785,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
            0,
@ -797,8 +796,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
            Tensor(ET2, {}, std::vector<T2>{3}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
            Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
            1,
@ -808,8 +807,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
            Tensor(ET2, {}, std::vector<T2>{1}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {1}, std::vector<T>{1}),
            Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
            0,
@ -819,8 +818,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
            Tensor(ET2, {}, std::vector<T2>{1}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {1}, std::vector<T>{1}),
            Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
            1,
@ -832,8 +831,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{3}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 3, 2}, std::vector<T>{
                10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
            }),
@ -849,8 +848,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{3}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 3, 2}, std::vector<T>{
                10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
            }),
@ -882,8 +881,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{2}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 2, 3, 2, 4}, std::vector<T>{
                169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251,
                187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222,
@ -923,8 +922,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{2}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 2, 3, 2, 4}, std::vector<T>{
                169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251,
                187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222,
@ -948,8 +947,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{2}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 2, 2}, std::vector<T>{
                10, 12, 9, 4, 11, 7, 6, 3
            }),
@ -965,8 +964,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{2}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 2, 2}, std::vector<T>{
                10, 12, 9, 4, 11, 7, 6, 3
            }),
@ -982,8 +981,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{1}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 1, 2}, std::vector<T>{
                10, 12, 11, 7
            }),
@ -999,8 +998,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{1}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 1, 2}, std::vector<T>{
                10, 12, 11, 7
            }),
@ -1016,8 +1015,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{3}),
            1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 3, 2}, std::vector<T>{
                8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7
            }),
@ -1033,8 +1032,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{3}),
            1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 3, 2}, std::vector<T>{
                8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7
            }),
@ -1050,8 +1049,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{2}),
            1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 2, 2}, std::vector<T>{
                8, 2, 10, 4, 5, 1, 6, 3
            }),
@ -1067,8 +1066,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{2}),
            1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 2, 2}, std::vector<T>{
                8, 2, 10, 4, 5, 1, 6, 3
            }),
@ -1084,8 +1083,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{1}),
            1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 1, 2}, std::vector<T>{
                8, 2, 5, 1
            }),
@ -1101,8 +1100,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{1}),
            1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 1, 2}, std::vector<T>{
                8, 2, 5, 1
            }),
@ -1118,8 +1117,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{4}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {4, 3}, std::vector<T>{
                12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4
            }),
@ -1135,8 +1134,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{4}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {4, 3}, std::vector<T>{
                12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4
            }),
@ -1152,8 +1151,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{2}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 3}, std::vector<T>{
                12, 11, 10, 9, 8, 7
            }),
@ -1169,8 +1168,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{2}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 3}, std::vector<T>{
                12, 11, 10, 9, 8, 7
            }),
@ -1186,8 +1185,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{1}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {1, 3}, std::vector<T>{
                12, 11, 10
            }),
@ -1203,8 +1202,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{1}),
            0,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {1, 3}, std::vector<T>{
                12, 11, 10
            }),
@ -1220,8 +1219,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{1}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 1}, std::vector<T>{
                4, 3
            }),
@ -1237,8 +1236,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{1}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 1}, std::vector<T>{
                4, 3
            }),
@ -1254,8 +1253,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{4}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {4, 3}, std::vector<T>{
                3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10
            }),
@ -1271,8 +1270,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{4}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {4, 3}, std::vector<T>{
                3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10
            }),
@ -1288,8 +1287,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{2}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 3}, std::vector<T>{
                3, 1, 4, 6, 2, 5
            }),
@ -1305,8 +1304,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{2}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 3}, std::vector<T>{
                3, 1, 4, 6, 2, 5
            }),
@ -1322,8 +1321,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{1}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::NONE,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::NONE,
            Tensor(ET, {1, 3}, std::vector<T>{
                3, 1, 4
            }),
@ -1339,8 +1338,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
            }),
            Tensor(ET2, {}, std::vector<T2>{1}),
            0,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::NONE,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::NONE,
            Tensor(ET, {1, 3}, std::vector<T>{
                3, 1, 4
            }),
@ -1380,12 +1379,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxM
 class ReferenceTopKTestInt64 : public ReferenceTopKTest1dMaxMin {
 private:
    static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                           params.A.shape);
-        const auto k = op::v0::Constant::create(params.k.type,
+        const auto k = opset1::Constant::create(params.k.type,
                                                params.k.shape,
                                                params.k.data.data());
-        const auto B = std::make_shared<op::v1::TopK>(A,
+        const auto B = std::make_shared<opset1::TopK>(A,
                                                      k,
                                                      params.axis,
                                                      params.mode,
@ -1412,8 +1411,8 @@ std::vector<TopKParams> generateParamsInt64() {
            }),
            Tensor(ET2, {}, std::vector<T2>{3}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 3, 2}, std::vector<T>{
                10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
            }),
@ -1428,8 +1427,8 @@ std::vector<TopKParams> generateParamsInt64() {
            }),
            Tensor(ET2, {}, std::vector<T2>{3}),
            1,
-            op::v1::TopK::Mode::MAX,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MAX,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 3, 2}, std::vector<T>{
                10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
            }),
@ -1468,12 +1467,12 @@ public:

 private:
    static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
-        const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
                                                           params.A.shape);
-        const auto k = op::v0::Constant::create(params.k.type,
+        const auto k = opset1::Constant::create(params.k.type,
                                                params.k.shape,
                                                params.k.data.data());
-        const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
        const auto f = std::make_shared<Model>(OutputVector{B->output(1)}, ParameterVector{A});
        return f;
    }
@ -1493,8 +1492,8 @@ std::vector<TopKParams> generateParamsSingleOutput() {
            Tensor(ET, {2, 3, 2}, std::vector<T>{12, 2, 10, 9, 8, 4, 6, 1, 5, 3, 11, 7}),
            Tensor(ET2, {}, std::vector<T2>{2}),
            1,
-            op::v1::TopK::Mode::MIN,
-            op::v1::TopK::SortType::SORT_VALUES,
+            opset1::TopK::Mode::MIN,
+            opset1::TopK::SortType::SORT_VALUES,
            Tensor(ET, {2, 2, 2}, std::vector<T>{}),
            Tensor(ET_OUT, {2, 2, 2}, std::vector<T_OUT>{2, 0, 1, 2, 1, 0, 0, 1}),
            0,
@ -1530,19 +1529,181 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingle
    testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTest::getTestCaseName);

 TEST(ReferenceTopKTestInvalid, topk_v1_invalid_strings) {
-    const auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 3});
-    const auto k = op::v0::Constant::create(element::i64, Shape{}, {1});
-    EXPECT_THROW(op::v1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
-    EXPECT_THROW(op::v1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
+    const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
+    const auto k = opset1::Constant::create(element::i64, Shape{}, {1});
+    EXPECT_THROW(opset1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
+    EXPECT_THROW(opset1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
 }

 TEST(ReferenceTopKTestInvalid, topk_v1_invalid_k) {
-    const auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 3});
-    const auto k_non_scalar = op::v0::Constant::create(element::i64, Shape{2}, {1, 2});
-    EXPECT_THROW(op::v1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
-    const auto k_float = op::v0::Constant::create(element::f32, Shape{}, {1.0f});
-    EXPECT_THROW(op::v1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
-    const auto k_negative = op::v0::Constant::create(element::i8, Shape{}, {-1});
-    EXPECT_THROW(op::v1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
+    const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
+    const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2});
+    EXPECT_THROW(opset1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
+    const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f});
+    EXPECT_THROW(opset1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
+    const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1});
+    EXPECT_THROW(opset1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
+}
+
+
+
+
+class ReferenceTopKTestResnet50V3 : public ReferenceTopKTestResnet50 {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParamsResnet50& params) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto B = std::make_shared<opset3::TopK>(A,
+                                                      opset1::Constant::create(element::i64, {}, {5}),
+                                                      1,
+                                                      opset1::TopK::Mode::MAX,
+                                                      opset1::TopK::SortType::SORT_VALUES);
+        const auto C = std::make_shared<opset3::TopK>(A,
+                                                      opset1::Constant::create(element::i64, {}, {1}),
+                                                      1,
+                                                      opset1::TopK::Mode::MAX,
+                                                      opset1::TopK::SortType::SORT_VALUES);
+
+        const auto out5_value = B->output(0);
+        const auto out5_index = B->output(1);
+        const auto out1_value = C->output(0);
+        const auto out1_index = C->output(1);
+        const auto f = std::make_shared<Model>(OutputVector{out5_value, out5_index, out1_value, out1_index}, ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTestResnet50V3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestResnet50V3,
+    testing::ValuesIn(generateCombinedParamsResnet50()), ReferenceTopKTestResnet50V3::getTestCaseName);
+
+class ReferenceTopKTestMaxMinSortV3 : public ReferenceTopKTestMaxMinSort {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto k = opset1::Constant::create(params.k.type,
+                                                params.k.shape,
+                                                params.k.data.data());
+        const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTestMaxMinSortV3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSortV3,
+    testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTestMaxMinSortV3::getTestCaseName);
+
+class ReferenceTopKTestBackendV3 : public ReferenceTopKTestBackend {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto k = opset1::Constant::create(params.k.type,
+                                                params.k.shape,
+                                                params.k.data.data());
+        const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTestBackendV3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackendV3,
+    testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTestBackendV3::getTestCaseName);
+
+class ReferenceTopKTest1dMaxMinV3 : public ReferenceTopKTest1dMaxMin {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto k = opset1::Constant::create(params.k.type,
+                                                params.k.shape,
+                                                params.k.data.data());
+        const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTest1dMaxMinV3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxMinV3,
+    testing::ValuesIn(generateCombinedParams1dMaxMin()), ReferenceTopKTest1dMaxMinV3::getTestCaseName);
+
+class ReferenceTopKTestInt64V3 : public ReferenceTopKTestInt64 {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto k = opset1::Constant::create(params.k.type,
+                                                params.k.shape,
+                                                params.k.data.data());
+        const auto B = std::make_shared<opset3::TopK>(A,
+                                                      k,
+                                                      params.axis,
+                                                      params.mode,
+                                                      params.sort,
+                                                      element::i64);
+        const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTestInt64V3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestInt64V3,
+    testing::ValuesIn(generateCombinedParamsInt64()), ReferenceTopKTestInt64V3::getTestCaseName);
+
+class ReferenceTopKTestSingleOutputV3 : public ReferenceTopKTestSingleOutput {
+private:
+    static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
+        const auto A = std::make_shared<opset1::Parameter>(params.A.type,
+                                                           params.A.shape);
+        const auto k = opset1::Constant::create(params.k.type,
+                                                params.k.shape,
+                                                params.k.data.data());
+        const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
+        const auto f = std::make_shared<Model>(OutputVector{B->output(1)}, ParameterVector{A});
+        return f;
+    }
+};
+
+TEST_P(ReferenceTopKTestSingleOutputV3, CompareWithRefs) {
+    Exec();
+}
+
+INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingleOutputV3,
+    testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTestSingleOutputV3::getTestCaseName);
+
+TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_strings) {
+    const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
+    const auto k = opset1::Constant::create(element::i64, Shape{}, {1});
+    EXPECT_THROW(opset3::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
+    EXPECT_THROW(opset3::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
+}
+
+TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_k) {
+    const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
+    const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2});
+    EXPECT_THROW(opset3::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
+    const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f});
+    EXPECT_THROW(opset3::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
+    const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1});
+    EXPECT_THROW(opset3::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
 }
 } // namespace
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@ -7,8 +7,3 @@ project(InferenceEngine)
 if(ENABLE_PYTHON)
    add_subdirectory(ie_bridges/python)
 endif()
-
-if(ENABLE_TESTS)
-    add_subdirectory(tests_deprecated)
-    add_subdirectory(tests)
-endif()
--- a/inference-engine/ie_bridges/python/wheel/.env.in
+++ b/inference-engine/ie_bridges/python/wheel/.env.in
@ -1,15 +0,0 @@
-WHEEL_PACKAGE_NAME=@WHEEL_PACKAGE_NAME@
-WHEEL_VERSION=@WHEEL_VERSION@
-WHEEL_BUILD=@WHEEL_BUILD@
-WHEEL_LICENCE_TYPE=@WHEEL_LICENCE_TYPE@
-WHEEL_AUTHOR=@WHEEL_AUTHOR@
-WHEEL_AUTHOR_EMAIL=@WHEEL_AUTHOR_EMAIL@
-WHEEL_DESC=@WHEEL_DESC@
-WHEEL_LICENSE=@WHEEL_LICENSE@
-WHEEL_REQUIREMENTS=@WHEEL_REQUIREMENTS@
-WHEEL_OVERVIEW=@WHEEL_OVERVIEW@
-
-CMAKE_BUILD_DIR=@CMAKE_BINARY_DIR@
-OV_RUNTIME_LIBS_DIR=@IE_CPACK_RUNTIME_PATH@
-TBB_LIBS_DIR=@TBB_LIBS_DIR@
-PY_PACKAGES_DIR=@PY_PACKAGES_DIR@
--- a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt
@ -1,40 +1,14 @@
 # Copyright (C) 2018-2021 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
-
-set(WHEEL_PACKAGE_NAME "openvino" CACHE STRING "Name of the package")
-set(WHEEL_LICENCE_TYPE "OSI Approved :: Apache Software License" CACHE STRING "License type for the package")
-set(WHEEL_AUTHOR "Intel Corporation" CACHE STRING "Package author’s name")
-set(WHEEL_AUTHOR_EMAIL "openvino_pushbot@intel.com" CACHE STRING "Email address of the package author")
-set(WHEEL_DESC "Inference Engine Python* API" CACHE STRING "Short, summary description of the package")
-set(WHEEL_URL "https://docs.openvinotoolkit.org/latest/index.html" CACHE STRING "Home page url")
-set(WHEEL_DOWNLOAD_URL "https://github.com/openvinotoolkit/openvino/tags" CACHE STRING "Download page url")
 set(WHEEL_VERSION "${IE_VERSION}" CACHE STRING "Version of this release" FORCE)
 set(WHEEL_BUILD "${IE_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE)
-set(WHEEL_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE" CACHE STRING "Wheel license file")
-set(WHEEL_REQUIREMENTS "${CMAKE_CURRENT_SOURCE_DIR}/meta/openvino.requirements.txt" CACHE STRING "Wheel requirements.txt file")
-set(WHEEL_OVERVIEW "${CMAKE_CURRENT_SOURCE_DIR}/meta/pypi_overview.md" CACHE STRING "Detailed description")
-
-set(SETUP_PY "${CMAKE_CURRENT_SOURCE_DIR}/setup.py")
-set(SETUP_ENV "${CMAKE_CURRENT_SOURCE_DIR}/.env.in")
-set(SETUP_ENV_OUT "${CMAKE_CURRENT_SOURCE_DIR}/.env")
-
 set(PY_PACKAGES_DIR ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION})
 set(TBB_LIBS_DIR runtime/3rdparty/tbb/lib)
-
-if(APPLE)
-    set(WHEEL_PLATFORM macosx_10_15_x86_64)
-elseif(UNIX)
-    set(WHEEL_PLATFORM manylinux2014_x86_64)
-elseif(WIN32)
-    set(WHEEL_PLATFORM win_amd64)
+if(WIN32)
    set(TBB_LIBS_DIR runtime/3rdparty/tbb/bin)
-else()
-    message(FATAL_ERROR "This platform is not supported")
 endif()

-configure_file(${SETUP_ENV} ${SETUP_ENV_OUT} @ONLY)
-
 if(LINUX)
    find_host_program(patchelf_program
                      NAMES patchelf
@ -55,21 +29,30 @@ endforeach()

 execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.bdist_wheel ; print(f'{wheel.bdist_wheel.get_abi_tag()}')" OUTPUT_VARIABLE PYTHON_ABI)
 execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{tags.interpreter_name()}{tags.interpreter_version()}')" OUTPUT_VARIABLE INTERPRETER)
+execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{next(tags._platform_tags())}')" OUTPUT_VARIABLE WHEEL_PLATFORM)
 string(STRIP ${PYTHON_ABI} PYTHON_ABI)
 string(STRIP ${INTERPRETER} INTERPRETER)
+string(STRIP ${WHEEL_PLATFORM} WHEEL_PLATFORM)

 set(openvino_wheel_name "openvino-${WHEEL_VERSION}-${WHEEL_BUILD}-${INTERPRETER}-${PYTHON_ABI}-${WHEEL_PLATFORM}.whl")
 set(openvino_wheels_output_dir "${CMAKE_BINARY_DIR}/wheels")
 set(openvino_wheel_path "${openvino_wheels_output_dir}/${openvino_wheel_name}")

 add_custom_command(OUTPUT ${openvino_wheel_path}
+    COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}"
+    COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_SOURCE_DIR}/licensing" "${CMAKE_BINARY_DIR}/licensing"
    COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/site-packages"
-    COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} clean bdist_wheel
+    COMMAND ${CMAKE_COMMAND} -E env WHEEL_VERSION=${WHEEL_VERSION}
+        WHEEL_BUILD=${WHEEL_BUILD}
+        CMAKE_BUILD_DIR=${CMAKE_BINARY_DIR}
+        OV_RUNTIME_LIBS_DIR=${IE_CPACK_RUNTIME_PATH}
+        TBB_LIBS_DIR=${TBB_LIBS_DIR}
+        PY_PACKAGES_DIR=${PY_PACKAGES_DIR}
+        ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/setup.py" clean bdist_wheel
        --dist-dir ${openvino_wheels_output_dir}
        --build=${WHEEL_BUILD}
        --plat-name=${WHEEL_PLATFORM}
-    # COMMAND ${CMAKE_COMMAND} -E remove ${SETUP_ENV_OUT}
-    DEPENDS ${openvino_wheel_deps} ${SETUP_ENV_OUT}
+    DEPENDS ${openvino_wheel_deps} 
    WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
    COMMENT "Building Python wheel ${openvino_wheel_name}"
    VERBATIM)
--- a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
+++ b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt
@ -1,28 +0,0 @@
-defusedxml>=0.7.1
-scipy~=1.5.4
-jstyleson~=0.0.2
-numpy>=1.16.6,<1.20
-addict>=2.4.0
-pandas~=1.1.5
-hyperopt~=0.1.2
-networkx~=2.5
-tqdm>=4.54.1
-texttable~=1.6.3
-py-cpuinfo>=7.0.0
-PyYAML>=5.4.1
-pillow>=8.1.2
-scikit-image>=0.17.2
-scikit-learn>=0.24.1
-yamlloader>=0.5
-shapely>=1.7.1
-nibabel>=3.2.1
-pydicom>=2.1.2
-sentencepiece>=0.1.95
-tokenizers>=0.10.1
-editdistance>=0.5.3
-parasail>=1.2.4
-fast-ctc-decode>=0.2.5
-rawpy>=0.16.0
-nltk>=3.5
-opencv-python==4.5.*
-progress>=1.5
--- a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
+++ b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg
@ -1,22 +0,0 @@
-[options]
-py_modules =
-    mo
-    mo_tf
-    mo_caffe
-    mo_mxnet
-    mo_onnx
-    mo_kaldi
-
-[options.package_data]
-    * = *
-
-[options.entry_points]
-console_scripts =
-
-[metadata]
-license_files =
-    readme*
-    *LICENSE*
-    *license*
-    *third-party-programs*
-    *EULA*
--- a/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt
+++ b/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt
@ -1 +0,0 @@
-numpy>=1.16.6,<1.20
--- a/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md
+++ b/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md
@ -1,32 +0,0 @@
-## OpenVINO™ Toolkit
-
-OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNNs), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The OpenVINO™ toolkit includes the Deep Learning Deployment Toolkit (DLDT).
-
-OpenVINO™ toolkit:
-
- Enables CNN-based deep learning inference on the edge
- Supports heterogeneous execution across an Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs
- Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels
- Includes optimized calls for computer vision standards, including OpenCV\* and OpenCL™
-
-Operating Systems:
- Ubuntu* 18.04 long-term support (LTS), 64-bit
- Windows* 10, 64-bit
- macOS* 10.15, 64-bit
-
-## Install the Runtime Package Using the PyPI Repository
-1. Set up and update pip to the highest version:
-   ```sh
-   python3 -m pip install --upgrade pip
-   ```
-2. Install the Intel® distribution of OpenVINO™ toolkit:
-   ```sh
-   pip install openvino
-   ```
-
-3. Verify that the package is installed:
-   ```sh
-   python3 -c "from openvino.inference_engine import IECore"
-   ```
-   
-Now you are ready to develop and run your application.
--- a/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
+++ b/inference-engine/ie_bridges/python/wheel/requirements-dev.txt
@ -1,3 +1,3 @@
 setuptools>=53.0.0
 wheel>=0.36.2
-python-decouple>=3.4
+
--- a/inference-engine/ie_bridges/python/wheel/setup.cfg
+++ b/inference-engine/ie_bridges/python/wheel/setup.cfg
@ -1,7 +1,11 @@
 [metadata]
 license_files =
-	readme* 
-	*LICENSE*
-	*license*
-	*third-party-programs*
-	*EULA*
+    readme* 
+    *LICENSE*
+    *license*
+    *third-party-programs*
+    ../../../../licensing/runtime-third-party-programs.txt
+    ../../../../licensing/tbb_third-party-programs.txt
+    ../../../../licensing/onednn_third-party-programs.txt
+    ../../../../LICENSE
+
--- a/inference-engine/ie_bridges/python/wheel/setup.py
+++ b/inference-engine/ie_bridges/python/wheel/setup.py
@ -21,7 +21,6 @@ from setuptools import setup, find_namespace_packages, Extension
 from setuptools.command.build_ext import build_ext
 from setuptools.command.build_clib import build_clib
 from setuptools.command.install import install
-from decouple import config

 WHEEL_LIBS_INSTALL_DIR = os.path.join('openvino', 'libs')
 WHEEL_LIBS_PACKAGE = 'openvino.libs'
@ -41,10 +40,11 @@ elif machine == 'aarch64':
    ARCH = 'arm64'

 # The following variables can be defined in environment or .env file
-CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.')
-OV_RUNTIME_LIBS_DIR = config('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}')
-TBB_LIBS_DIR = config('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}')
-PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
+SCRIPT_DIR = Path(__file__).resolve().parents[0]
+CMAKE_BUILD_DIR = os.getenv('CMAKE_BUILD_DIR', '.')
+OV_RUNTIME_LIBS_DIR = os.getenv('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}')
+TBB_LIBS_DIR = os.getenv('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}')
+PY_PACKAGES_DIR = os.getenv('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}')
 LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path'

 LIB_INSTALL_CFG = {
@ -428,28 +428,28 @@ if not any(pl in sys.platform for pl in platforms):
    sys.exit(f'Unsupported platform: {sys.platform}, expected: linux, win32, darwin')

 # copy license file into the build directory
-package_license = config('WHEEL_LICENSE', '')
+package_license = os.getenv('WHEEL_LICENSE', SCRIPT_DIR.parents[3] / 'LICENSE')
 if os.path.exists(package_license):
    copyfile(package_license, 'LICENSE')

 packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG))
 package_data: typing.Dict[str, list] = {}
-pkg_name = config('WHEEL_PACKAGE_NAME', 'openvino')
+pkg_name = os.getenv('WHEEL_PACKAGE_NAME', 'openvino')
 ext_modules = find_prebuilt_extensions(get_dir_list(PY_INSTALL_CFG)) if pkg_name == 'openvino' else []

 setup(
-    version=config('WHEEL_VERSION', '0.0.0'),
-    build=config('WHEEL_BUILD', '000'),
-    author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
+    version=os.getenv('WHEEL_VERSION', '0.0.0'),
+    build=os.getenv('WHEEL_BUILD', '000'),
+    author_email=os.getenv('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'),
    name=pkg_name,
-    license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
-    author=config('WHEEL_AUTHOR', 'Intel Corporation'),
-    description=config('WHEEL_DESC', 'Inference Engine Python* API'),
-    install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'meta/openvino.requirements.txt')),
-    long_description=get_description(config('WHEEL_OVERVIEW', 'meta/pypi_overview.md')),
+    license=os.getenv('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'),
+    author=os.getenv('WHEEL_AUTHOR', 'Intel(R) Corporation'),
+    description=os.getenv('WHEEL_DESC', 'OpenVINO(TM) Runtime'),
+    install_requires=get_dependencies(os.getenv('WHEEL_REQUIREMENTS', SCRIPT_DIR.parents[0] / 'requirements.txt')),
+    long_description=get_description(os.getenv('WHEEL_OVERVIEW', SCRIPT_DIR.parents[3] / 'docs/install_guides/pypi-openvino-rt.md')),
    long_description_content_type='text/markdown',
-    download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
-    url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
+    download_url=os.getenv('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'),
+    url=os.getenv('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'),
    cmdclass={
        'build': CustomBuild,
        'install': CustomInstall,
--- a/inference-engine/src/mkldnn_plugin/cpu_shape.h
+++ b/inference-engine/src/mkldnn_plugin/cpu_shape.h
@ -25,6 +25,8 @@ public:
        type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic;

        initDims();
+
+        hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
    }

    explicit Shape(const InferenceEngine::SizeVector& shape) {
@ -33,6 +35,8 @@ public:
        type = ShapeType::Static;

        initDims();
+
+        hasZeroDimensions = std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
    }

    /**
@ -106,6 +110,10 @@ public:
        return type == ShapeType::Dynamic;
    }

+    bool hasZeroDims() const {
+        return hasZeroDimensions;
+    }
+
    size_t getRank() const {
        return minDims.size();
    }
@ -169,6 +177,8 @@ private:
        Dynamic
    } type {ShapeType::Static};

+    bool hasZeroDimensions = false;
+
    VectorDims minDims;
    VectorDims maxDims;
    VectorDims dims;
--- a/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.cpp
+++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.cpp
@ -16,7 +16,8 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
    offsetPadding = 0;
    offsetPaddingToData.resize(dims.size(), 0);
    strides.resize(order.size());
-    strides[strides.size() - 1] = 1;
+    // for empty tensor case we fill all strides with 0 values
+    strides[strides.size() - 1] = shape.hasZeroDims() ? 0 : 1;
    for (size_t i = 2; i <= order.size(); i++) {
        strides[strides.size() - i] = strides[strides.size() - (i - 1)] * blockedDims[blockedDims.size() - (i - 1)];
    }
@ -33,6 +34,15 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
        IE_THROW() << "CpuBlockedMemoryDesc doesn't support undefined blockedDims.";
    }

+    if (shape.hasZeroDims()) {
+        const auto& dims = shape.getDims();
+        for (size_t i = 0; i < shape.getRank(); i++) {
+            if (dims[order[i]] == 0 && !dimsEqualWeak(blockedDims[i], 0)) {
+                IE_THROW() << "Can't create CpuBlockedMemoryDesc. Mistmatch zero dims in dims and blocked dims";
+            }
+        }
+    }
+
    this->order = order;
    this->blockedDims = blockedDims;
    this->offsetPadding = offsetPadding;
@ -44,7 +54,9 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
    }

    if (strides.empty() && !order.empty()) {
-        if (std::any_of(this->blockedDims.begin(), this->blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
+        if (shape.hasZeroDims()) {
+            this->strides.resize(order.size(), 0);
+        } else if (std::any_of(this->blockedDims.begin(), this->blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
            this->strides.resize(order.size(), Shape::UNDEFINED_DIM);
        } else {
            this->strides.resize(order.size());
@ -54,6 +66,9 @@ CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const
            }
        }
    } else {
+        if (shape.hasZeroDims() && std::any_of(strides.begin(), strides.end(), [](size_t stride) { return stride != 0; } )) {
+            IE_THROW() << "Can't create CpuBlockedMemoryDesc with zero dim, but with non zero strides";
+        }
        this->strides = strides;
    }

@ -92,11 +107,16 @@ bool CpuBlockedMemoryDesc::isCompatible(const DnnlBlockedMemoryDesc &rhs) const
    return rhs.isCompatible(*this);
 }

+bool CpuBlockedMemoryDesc::canComputeMemSizeZeroDims() const {
+    return getShape().hasZeroDims() && getOffsetPadding() != Shape::UNDEFINED_DIM;
+}
+
 size_t CpuBlockedMemoryDesc::getCurrentMemSizeImp() const {
    int64_t e_size = getOffsetPadding() + 1;  // size in bytes (from begin of data to last element)
-    for (int j = 0; j < getBlockDims().size(); j++)
-        e_size += (getBlockDims()[j] - 1) * getStrides()[j];
-
+    if (!getShape().hasZeroDims()) {
+        for (int j = 0; j < getBlockDims().size(); j++)
+            e_size += (getBlockDims()[j] - 1) * getStrides()[j];
+    }

    e_size *= getPrecision() == InferenceEngine::Precision::BIN ? 1 : getPrecision().size();

@ -104,14 +124,14 @@ size_t CpuBlockedMemoryDesc::getCurrentMemSizeImp() const {
 }

 size_t CpuBlockedMemoryDesc::getMaxMemSize() const {
-    if (shape.isStatic()) {
+    if (shape.isStatic() || shape.hasZeroDims()) {
        return getCurrentMemSize();
    }

-    auto& maxDims = shape.getMaxDims();
+    const auto& maxDims = shape.getMaxDims();
    if (std::any_of(maxDims.begin(), maxDims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x ||
                                                                         // WA: for some nodes ngraph compute upper bound depending on precision max value
-                                                                         std::numeric_limits<int32_t>::max() == x; })) {
+                                                                         x >= std::numeric_limits<int32_t>::max(); })) {
        return UNDEFINED_SIZE;
    }

@ -270,15 +290,23 @@ bool CpuBlockedMemoryDesc::blocksExtended() const {
 }

 size_t CpuBlockedMemoryDesc::getPaddedElementsCount() const {
-    if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; }))
+    if (getShape().hasZeroDims()) {
+        return 0;
+    }
+    if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; })) {
        IE_THROW() << "Can't compute padded elements count for non undefined blocked dims";
+    }
    return std::accumulate(blockedDims.begin(), blockedDims.end(), size_t{1}, std::multiplies<size_t>());
 }

 MemoryDescPtr CpuBlockedMemoryDesc::cloneWithUndefStridesAndOffset() const {
    const auto orderSize = getOrder().size();
-    return std::make_shared<CpuBlockedMemoryDesc>(getPrecision(), getShape(), getBlockDims(), getOrder(), Shape::UNDEFINED_DIM,
-                                                  VectorDims(orderSize, 0), VectorDims(orderSize, Shape::UNDEFINED_DIM));
+    CpuBlockedMemoryDescPtr newDesc = std::make_shared<CpuBlockedMemoryDesc>(*this);
+    newDesc->strides = VectorDims(orderSize, Shape::UNDEFINED_DIM);
+    newDesc->offsetPadding = Shape::UNDEFINED_DIM;
+    newDesc->offsetPaddingToData =  VectorDims(orderSize, 0);
+    newDesc->status = descStatus::Undefined;
+    return newDesc;
 }

 MemoryDescPtr CpuBlockedMemoryDesc::cloneWithDefaultStridesAndOffset() const {
--- a/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.h
+++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.h
@ -84,6 +84,7 @@ public:

 private:
    size_t getElementOffset(size_t elemNumber) const override;
+    bool canComputeMemSizeZeroDims() const override;
    size_t getCurrentMemSizeImp() const override;
    size_t getOffset(const InferenceEngine::SizeVector& v) const;
    bool isPlainFormat() const;
--- a/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc.h
+++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc.h
@ -93,7 +93,7 @@ public:
     */
    size_t getCurrentMemSize() const {
        size_t retVal = UNDEFINED_SIZE;
-        if (isDefined()) {
+        if (canComputeMemSize()) {
            retVal = getCurrentMemSizeImp();
        }
        return retVal;
@ -140,8 +140,13 @@ protected:
    // Get offset to the n'th element. Returns physical index of the element by the logical one considering padding, layout, blocking etc.
    virtual size_t getElementOffset(size_t elemNumber) const = 0;

+    virtual bool canComputeMemSizeZeroDims() const = 0;
    virtual bool isDefinedImp() const = 0;

+    bool canComputeMemSize() const {
+        return isDefined() || canComputeMemSizeZeroDims();
+    }
+
    virtual MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const = 0;

    MemoryDescType type;
--- a/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.cpp
+++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.cpp
@ -48,17 +48,37 @@ DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const Memo
 CpuBlockedMemoryDesc MemoryDescUtils::convertToCpuBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) {
    if (desc.getLayout() == InferenceEngine::Layout::ANY)
        IE_THROW() << "Cannot convert InferenceEngine::TensorDesc with ANY layout to CpuBlockedMemoryDesc";
-    const auto &blkDesc = desc.getBlockingDesc();
-    return CpuBlockedMemoryDesc(desc.getPrecision(), Shape(desc.getDims()), blkDesc.getBlockDims(), blkDesc.getOrder(), blkDesc.getOffsetPadding(),
-                                blkDesc.getOffsetPaddingToData(), blkDesc.getStrides());
+
+    const auto& blkDesc = desc.getBlockingDesc();
+    const auto& dims = desc.getDims();
+
+    auto strides = blkDesc.getStrides();
+    // for empty tensor case InferenceEngine::TensorDesc fill strides with non zero values before first 0 dims
+    // i.e. dims[1, 0, 2, 3] -> strides [0, 6, 3, 1]
+    if (std::any_of(dims.begin(), dims.end(), [](size_t dim){ return dim == 0; })) {
+        std::fill(strides.begin(), strides.end(), 0);
+    }
+
+    return CpuBlockedMemoryDesc(desc.getPrecision(), Shape(dims), blkDesc.getBlockDims(), blkDesc.getOrder(), blkDesc.getOffsetPadding(),
+                                blkDesc.getOffsetPaddingToData(), strides);
 }

 DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) {
-    const auto &blkDesc = desc.getBlockingDesc();
    if (desc.getLayout() == InferenceEngine::Layout::ANY)
        IE_THROW() << "Cannot convert InferenceEngine::TensorDesc with ANY layout to DnnlBlockedMemoryDesc";
+
+    const auto& blkDesc = desc.getBlockingDesc();
+    const auto& dims = desc.getDims();
+
+    auto strides = blkDesc.getStrides();
+    // for empty tensor case InferenceEngine::TensorDesc fill strides with non zero values before first 0 dims
+    // i.e. dims[1, 0, 2, 3] -> strides [0, 6, 3, 1]
+    if (std::any_of(dims.begin(), dims.end(), [](size_t dim){ return dim == 0; })) {
+        std::fill(strides.begin(), strides.end(), 0);
+    }
+
    return DnnlBlockedMemoryDesc(desc.getPrecision(), Shape(desc.getDims()), blkDesc.getBlockDims(), blkDesc.getOrder(), blkDesc.getOffsetPadding(),
-                                 blkDesc.getOffsetPaddingToData(), blkDesc.getStrides());
+                                 blkDesc.getOffsetPaddingToData(), strides);
 }

 BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDescPtr &desc) {
@ -80,9 +100,16 @@ InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &

 InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDesc& desc) {
    if (auto blockingDesc = dynamic_cast<const BlockedMemoryDesc*>(&desc)) {
-        return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(),
-                                           {blockingDesc->getBlockDims(), blockingDesc->getOrder(), blockingDesc->getOffsetPadding(),
-                                            blockingDesc->getOffsetPaddingToData(), blockingDesc->getStrides()});
+        InferenceEngine::BlockingDesc blkDesc = desc.getShape().hasZeroDims() ? InferenceEngine::BlockingDesc(blockingDesc->getBlockDims(),
+                                                                                                              blockingDesc->getOrder(),
+                                                                                                              blockingDesc->getOffsetPadding(),
+                                                                                                              blockingDesc->getOffsetPaddingToData()) :
+                                                                                InferenceEngine::BlockingDesc(blockingDesc->getBlockDims(),
+                                                                                                              blockingDesc->getOrder(),
+                                                                                                              blockingDesc->getOffsetPadding(),
+                                                                                                              blockingDesc->getOffsetPaddingToData(),
+                                                                                                              blockingDesc->getStrides());
+        return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(), blkDesc);
    } else {
        IE_THROW() << "Cannot convert MemoryDesc to InferenceEngine::TensorDesc";
    }
--- a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp
+++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp
@ -15,12 +15,17 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
    const auto &dims = shape.getDims();

    if (!strides.empty()) { // custom strides
+        if (shape.hasZeroDims() && std::any_of(strides.begin(), strides.end(), [](size_t stride) { return stride != 0; } )) {
+            IE_THROW() << "Can't create DnnlBlockedMemoryDesc with zero dim, but with non zero strides";
+        }
        desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims),
                MKLDNNExtensionUtils::IEPrecisionToDataType(prc),
                MKLDNNExtensionUtils::convertToDnnlDims(strides)};
    } else {
        mkldnn::memory::dims plain_strides;
-        if (std::any_of(dims.begin(), dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
+        if (shape.hasZeroDims()) {
+            plain_strides.resize(ndims, 0);
+        } else if (std::any_of(dims.begin(), dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
            plain_strides.resize(ndims, DNNL_RUNTIME_DIM_VAL);
        } else {
            plain_strides.resize(ndims, 1);
@ -58,8 +63,8 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
 *   Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N]
 */
 DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& blockedDims,
-                                                 const VectorDims& order, size_t offsetPadding, const VectorDims& offsetPaddingToData,
-                                                 const VectorDims& strides) : MemoryDesc(shape, DnnlBlocked) {
+                                             const VectorDims& order, size_t offsetPadding, const VectorDims& offsetPaddingToData,
+                                             const VectorDims& strides) : MemoryDesc(shape, DnnlBlocked) {
    using namespace mkldnn;
    // scalar case
    if (shape.getRank() == 0) {
@ -90,8 +95,8 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
        IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined order.";
    }

-    if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) {
-        IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined blockedDims.";
+    if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM || val == 0; })) {
+        IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined or zero blockedDims.";
    }

    auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims());
@ -106,7 +111,12 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con

    size_t inner_ndims = order.size() - dims.size();

+    const bool emptyDesc = shape.hasZeroDims();
    if (!strides.empty()) {
+        if (emptyDesc && std::any_of(strides.begin(), strides.end(), [](size_t dim) { return dim != 0; } )) {
+            IE_THROW() << "Can't create DnnlBlockedMemoryDesc with zero dim, but with non zero strides";
+        }
+
        bool is_descending_strides = true;
        for (int i = 1; i < strides.size(); i++) {
            is_descending_strides &= (strides[i - 1] >= strides[i]);
@ -118,7 +128,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con
            IE_THROW() << "Can not construct DnnlBlockedMemoryDesc from strides: " << vec2str(strides);
    }

-    if (!strides.empty() && std::none_of(strides.begin(), strides.end(), [](size_t x) { return Shape::UNDEFINED_DIM == x; })) {
+    if (!strides.empty() && !emptyDesc && std::none_of(strides.begin(), strides.end(), [](size_t x) { return Shape::UNDEFINED_DIM == x; })) {
        bool inner_block_are_dense = one_of(strides.back(), 0, 1);  // stride 1 - is dense case, 0 - broad casted
        for (int i = outer_ndims; i < strides.size() - 1; i++) {
            inner_block_are_dense &= (strides[i] == strides[i + 1] * blockedDims[i + 1]);
@ -203,6 +213,11 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const Shape& shape, mkldnn::memory:
    order.swap(perm);
    order.insert(order.end(), inner_idxs.begin(), inner_idxs.end());

+    if (shape.hasZeroDims()) {
+        auto& blk = desc.data.format_desc.blocking;
+        std::fill(std::begin(blk.strides), std::begin(blk.strides) + desc.data.ndims, 0);
+    }
+
    initBlockedParams();
 }

@ -296,6 +311,12 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc)
        IE_THROW(Unexpected) << "Can't create DnnlBlockedMemoryDesc from not blocking desc";

    order = extractOrder(desc);
+
+    if (getShape().hasZeroDims()) {
+        auto& blk = desc.data.format_desc.blocking;
+        std::fill(std::begin(blk.strides), std::begin(blk.strides) + desc.data.ndims, 0);
+    }
+
    initBlockedParams();
 }

@ -368,6 +389,7 @@ bool DnnlBlockedMemoryDesc::isTailCFormat() const {
 static mkldnn::memory::desc cloneDescWithNewDims(const mkldnn::memory::desc& desc, const VectorDims& dims, const VectorDims& order) {
    using namespace dnnl::impl::utils;
    auto mklDims = MKLDNNExtensionUtils::convertToDnnlDims(dims);
+    const auto offsetPadding = desc.data.offset0;
    mkldnn::memory::desc newMklDesc = desc;
    array_copy(newMklDesc.data.dims, mklDims.data(), mklDims.size());
    std::vector<int> perm(order.begin(), order.begin() + mklDims.size());
@ -379,6 +401,9 @@ static mkldnn::memory::desc cloneDescWithNewDims(const mkldnn::memory::desc& des
    if (retCode != dnnl::impl::status::success) {
        IE_THROW() << "Can not clone DnnlBlockedMemoryDesc with dims: " << MemoryDescUtils::dims2str(dims);
    }
+    // dnnl::impl::fill_blocked always set offset0 to 0
+    // so we need to restore actual value
+    newMklDesc.data.offset0 = offsetPadding;
    return newMklDesc;
 }

@ -476,14 +501,14 @@ bool DnnlBlockedMemoryDesc::isSame(mkldnn::memory::format_tag fmt) const {
 }

 size_t DnnlBlockedMemoryDesc::getMaxMemSize() const {
-    if (shape.isStatic()) {
+    if (shape.isStatic() || shape.hasZeroDims()) {
        return getCurrentMemSize();
    }

-    auto& maxDims = shape.getMaxDims();
+    const auto& maxDims = shape.getMaxDims();
    if (std::any_of(maxDims.begin(), maxDims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x ||
                                                                         // WA: for some nodes ngraph compute upper bound depending on precision max value
-                                                                         std::numeric_limits<int32_t>::max() == x; })) {
+                                                                         x >= std::numeric_limits<int32_t>::max(); })) {
        return UNDEFINED_SIZE;
    }

@ -492,6 +517,13 @@ size_t DnnlBlockedMemoryDesc::getMaxMemSize() const {
 }

 size_t DnnlBlockedMemoryDesc::getPaddedElementsCount() const {
+    if (getShape().hasZeroDims()) {
+        return 0;
+    }
+    if (std::any_of(std::begin(desc.data.padded_dims), std::begin(desc.data.padded_dims) + desc.data.ndims,
+            [](dnnl_dim_t dim) { return dim == DNNL_RUNTIME_DIM_VAL; })) {
+        IE_THROW() << "Can't compute padded elements count for non undefined blocked dims";
+    }
    return std::accumulate(std::begin(desc.data.padded_dims), std::begin(desc.data.padded_dims) + desc.data.ndims, size_t{1},
                           std::multiplies<int64_t>());
 }
@ -548,7 +580,7 @@ void DnnlBlockedMemoryDesc::initStrides() {
    const size_t total_ndims = outer_ndims + inner_ndims;

    // strides of inner dims. In case of 4i16o4i will be {64, 4, 1}
-    VectorDims inner_strides(inner_ndims, 1);
+    VectorDims inner_strides(inner_ndims, getShape().hasZeroDims() ? 0 : 1);
    for (size_t i = 1; i < blk_desc.inner_nblks; i++) {
        inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i];
    }
@ -600,7 +632,9 @@ void DnnlBlockedMemoryDesc::recomputeDefaultStrides() {
        IE_THROW() << "Can't recompute stride: order size != blocked dims size";

    auto &oneDnnStrides = desc.data.format_desc.blocking.strides;
-    if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim val) { return val == Shape::UNDEFINED_DIM; })) {
+    if (getShape().hasZeroDims()) {
+        std::fill(std::begin(oneDnnStrides), std::begin(oneDnnStrides) + getShape().getRank(), 0);
+    } else if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim val) { return val == Shape::UNDEFINED_DIM; })) {
        std::fill(std::begin(oneDnnStrides), std::begin(oneDnnStrides) + rank, DNNL_RUNTIME_DIM_VAL);
        initStrides();
    } else {
@ -633,6 +667,11 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc,

    desc = cloneDescWithNewDims(mdesc, shape.getDims(), order);

+    if (shape.hasZeroDims()) {
+        auto& blk = desc.data.format_desc.blocking;
+        std::fill(std::begin(blk.strides), std::begin(blk.strides) + desc.data.ndims, 0);
+    }
+
    initBlockedParams();
 }

--- a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h
+++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h
@ -71,7 +71,7 @@ private:
    explicit DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc);

    // Creates DnnlBlockedMemoryDesc using the shape parameter as a true shape but all other params (layout, blocks, etc.) are used from the mdesc, but
-    // the mdesc own shape is ignored. The main purpose of this constructor is making dynamic descriptor form some dummy mdesc, which stores info about
+    // the mdesc own shape is ignored. The main purpose of this constructor is making dynamic descriptor from some dummy mdesc, which stores info about
    // layout, blocking, strides, etc., and the provided dynamic shape.
    DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc, const Shape& shape);

--- a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.cpp
+++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.cpp
@ -15,6 +15,10 @@ DnnlMemoryDesc::DnnlMemoryDesc(const mkldnn::memory::desc& desc) :
        IE_THROW(Unexpected) << "Memory format any is prohibited!";
 }

+bool DnnlMemoryDesc::canComputeMemSizeZeroDims() const {
+    return getShape().hasZeroDims() && desc.data.offset0 != DNNL_RUNTIME_DIM_VAL;
+}
+
 size_t DnnlMemoryDesc::getCurrentMemSizeImp() const {
    return MKLDNNExtensionUtils::getMemSizeForDnnlDesc(desc);
 }
--- a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.h
+++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.h
@ -63,6 +63,7 @@ private:

    size_t getElementOffset(size_t elemNumber) const override;

+    bool canComputeMemSizeZeroDims() const override;
    size_t getCurrentMemSizeImp() const override;
    bool isDefinedImp() const override;
    MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const override;
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@ -319,7 +319,6 @@ void MKLDNNGraph::InitGraph() {
    SortTopologically();

    InitDescriptors();
-    RemoveDroppedEdges();

    InitOptimalPrimitiveDescriptors();

@ -385,15 +384,16 @@ void MKLDNNGraph::InitOptimalPrimitiveDescriptors() {
 void MKLDNNGraph::ExtractConstantAndExecutableNodes() {
    OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExtractConstantAndExecutableNodes");
    for (const auto& graphNode : graphNodes) {
-        if (graphNode->isConstant())
+        if (graphNode->isConstant()) {
            constantGraphNodes.emplace_back(graphNode);
-        else if (CPU_DEBUG_CAPS_ALWAYS_TRUE(graphNode->isExecutable()))
+        } else if (CPU_DEBUG_CAPS_ALWAYS_TRUE(graphNode->isExecutable())) {
            /* @todo
             * Revise implementation.
             * With current way it is possible that with debug_caps enabled
             * we execute a node, which is not ready to be executed
             */
            executableGraphNodes.emplace_back(graphNode);
+        }
    }
 }

@ -793,7 +793,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {

        // check for empty output blob
        if (std::any_of(outDims.begin(), outDims.end(), [](const Dim dim) {return dim == 0;})) {
-            return;
+            continue;
        }

        auto srcPrec = actualDesc.getPrecision();
@ -836,10 +836,11 @@ inline void MKLDNNGraph::ExecuteNode(const MKLDNNNodePtr& node, const mkldnn::st
    DUMP(node, infer_count);
    OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, node->profiling.execute);

-    if (node->isDynamicNode())
+    if (node->isDynamicNode()) {
        node->executeDynamic(stream);
-    else
+    } else {
        node->execute(stream);
+    }
 }

 void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {
@ -855,7 +856,6 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) {

        if (request)
            request->ThrowIfCanceled();
-
        ExecuteNode(node, stream);
    }

@ -994,22 +994,6 @@ Config MKLDNNGraph::getProperty() const {
    return config;
 }

-Blob::Ptr MKLDNNGraph::getInputBlob(const std::string& name) {
-    auto itr = inputNodesMap.find(name);
-    if (itr != inputNodesMap.end()) {
-        return MemoryDescUtils::interpretAsBlob(itr->second->getChildEdgeAt(0)->getMemory());
-    }
-    return nullptr;
-}
-
-Blob::Ptr MKLDNNGraph::getOutputBlob(const std::string& name) {
-    auto itr = outputNodesMap.find(name);
-    if (itr != outputNodesMap.end()) {
-        return MemoryDescUtils::interpretAsBlob(itr->second->getParentEdgeAt(0)->getMemory());
-    }
-    return nullptr;
-}
-
 void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) {
    for (auto it = graphEdges.begin(); it != graphEdges.end(); it++) {
        if ((*it) == edge) {
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@ -44,9 +44,6 @@ public:
    void setProperty(const std::map<std::string, std::string> &properties);
    Config getProperty() const;

-    InferenceEngine::Blob::Ptr getInputBlob(const std::string& name);
-    InferenceEngine::Blob::Ptr getOutputBlob(const std::string& name);
-
    template<typename NET>
    void CreateGraph(NET &network,
                     const MKLDNNExtensionManager::Ptr& extMgr,
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@ -59,7 +59,7 @@ MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {}

 void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
    OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "ApplyCommonGraphOptimizations", "FuseConvolutionAndBias");
-    FuseConvolutionAndBias(graph);
+    FuseConvolutionMatMulAndBias(graph);
    graph.RemoveDroppedNodes();

    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMultiplyAndAdd");
@ -166,37 +166,38 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
    graph.RemoveDroppedEdges();
 }

-void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
+void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) {
    auto& graphNodes = graph.GetNodes();

-    auto isSuitableParentNode = [](MKLDNNNodePtr node) {
-        return node->getType() == Convolution &&
+    auto isSuitableParentNode = [](const MKLDNNNodePtr& node) {
+        return (node->getType() == Convolution || node->getType() == MatMul) &&
               node->getChildEdges().size() == 1 &&
               node->getParentEdges().size() == 2 &&
               node->getFusedWith().empty();
    };

-    auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+    auto isSuitableChildNode = [&](const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) {
        if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2)
            return false;

-        auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent();
+        const auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent();
        if (biasNode->getType() != Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1)
            return false;

-        auto convOutDims = parentNode->getOutputShapeAtPort(0).getDims();
-        auto biasDims = getNormalizedDimsBySize(biasNode->getOutputShapeAtPort(0).getDims(),
-                                                convOutDims.size());
+        const auto parentOutDims = parentNode->getOutputShapeAtPort(0).getDims();
+        const auto biasDims = getNormalizedDimsBySize(biasNode->getOutputShapeAtPort(0).getDims(),
+                                                parentOutDims.size());
        // TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases.
        // Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant.
-        if (convOutDims.size() != biasDims.size() || biasDims.size() < 2)
+        if (parentOutDims.size() != biasDims.size() || biasDims.size() < 2)
            return false;

-        if (biasDims[0] != 1 || !dimsEqualStrong(biasDims[1], convOutDims[1]))
+        const auto channelAxis = parentNode->getFusingAxis();
+        if (!dimsEqualStrong(biasDims[channelAxis], parentOutDims[channelAxis]))
            return false;

-        for (int i = 2; i < biasDims.size(); i++) {
-            if (biasDims[i] != 1)
+        for (int i = 0; i < biasDims.size(); i++) {
+            if (biasDims[i] != 1 && i != channelAxis)
                return false;
        }

@ -262,13 +263,13 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
                    graph.RemoveEdge(remEdge);
                }

-                auto parentEltwise = parentNode;
+                const auto& parentEltwise = parentNode;
                MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size()));
-                auto &graphEdges = graph.GetEdges();
+                auto& graphEdges = graph.GetEdges();
                graphEdges.push_back(newEdge);
                parent->addEdge(newEdge);

-                auto partialShape = { parentEltwise->outputShapes[0].toPartialShape()[1] };
+                auto partialShape = { parentEltwise->outputShapes[0].toPartialShape()[parentEltwise->getFusingAxis()] };
                parent->outputShapes[inNum] = Shape(partialShape);
                parentEltwise->inputShapes.push_back(parent->outputShapes[0]);
            }
@ -627,7 +628,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
    }
 }

-static bool BF16QuantizeNodeFusing(MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+/**
+ * @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support
+ *       for bf16 depthwise postops.
+ *       This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as
+ *       multiple binary post ops.
+ *       This check can already be removed for FC fusing, but should be kept for Convolution,
+ *       which still uses legacy depthwise postops for performance reasons.
+ */
+static bool BF16QuantizeNodeFusing(const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) {
    return childNode->getType() == FakeQuantize &&
        one_of(Precision::BF16,
            parentNode->getOriginalOutputPrecisionAtPort(0),
@ -638,7 +647,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
    auto& graphNodes = graph.GetNodes();

    auto isSuitableParentNode = [](MKLDNNNodePtr node) {
-        return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getInputShapeAtPort(0).getRank() != 3;
+        return node->getType() == FullyConnected && node->getChildEdges().size() == 1;
    };

    auto parent = graphNodes.begin();
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
@ -19,7 +19,7 @@ public:
    void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);

 private:
-    void FuseConvolutionAndBias(MKLDNNGraph &graph);
+    void FuseConvolutionMatMulAndBias(MKLDNNGraph &graph);
    void FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph);
    void FuseMultiplyAndAdd(MKLDNNGraph &graph);
    void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph);
--- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
@ -190,8 +190,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::redefineMemoryForInputNodes() {
        const auto inputNode = cpuInputNodes.find(blob.first);
        if (inputNode == cpuInputNodes.end())
            IE_THROW() << "CPU execution graph doesn't contain input node with name: " << blob.first;
-        if (inputNode->second->isDynamicNode())
+        if (inputNode->second->isDynamicNode()) {
            inputNode->second->redefineOutputMemory({blob.second->getTensorDesc().getDims()});
+        }
    }
 }

--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@ -4,6 +4,7 @@

 #include "mkldnn_node.h"
 #include "dnnl_debug.h"
+#include "mkldnn_edge.h"
 #include "mkldnn_extension_mngr.h"
 #include "mkldnn_itt.h"

@ -83,7 +84,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
    for (size_t i = 0; i < op->get_input_size(); i++) {
        const auto &shape = op->get_input_partial_shape(i);
        if (shape.rank().is_dynamic()) {
-            IE_THROW(Unexpected) << "CPU plug-in doesn't support operation with dynamic rank";
+            IE_THROW(Unexpected) << "CPU plug-in doesn't support " << getTypeStr() << " operation with dynamic rank. Operation name: " << getName();
        }

        bool isScalar = shape.rank().get_length() == 0;
@ -98,7 +99,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
        for (size_t i = 0; i < op->get_output_size(); i++) {
            const auto &shape = op->get_output_partial_shape(i);
            if (shape.rank().is_dynamic()) {
-                IE_THROW(Unexpected) << "CPU plug-in doesn't support operation with dynamic rank";
+                IE_THROW(Unexpected) << "CPU plug-in doesn't support " << getTypeStr() << " operation with dynamic rank. Operation name: " << getName();
            }

            bool isScalar = shape.rank().get_length() == 0;
@ -229,6 +230,15 @@ bool MKLDNNNode::isEdgesEmpty(const std::vector<MKLDNNEdgeWeakPtr>& edges) const
    return true;
 }

+void MKLDNNNode::createPrimitive() {
+    if (inputShapesDefined() && isExecutable()) {
+        if (needPrepareParams()) {
+            prepareParams();
+        }
+        updateLastInputDims();
+    }
+}
+
 void MKLDNNNode::selectOptimalPrimitiveDescriptor() {
    selectPreferPrimitiveDescriptor(getPrimitivesPriority(), false);
 }
@ -509,12 +519,14 @@ void MKLDNNNode::executeDynamic(mkldnn::stream strm) {
    if (needShapeInfer()) {
        redefineOutputMemory(shapeInfer());
    }
-    if (needPrepareParams()) {
-        IE_ASSERT(inputShapesDefined()) << "Can't prepare params for " << getTypeStr() << " node with name: " << getName() <<
-            " since the input shapes are not defined.";
-        prepareParams();
+    if (isExecutable()) {
+        if (needPrepareParams()) {
+            IE_ASSERT(inputShapesDefined()) << "Can't prepare params for " << getTypeStr() << " node with name: " << getName() <<
+                " since the input shapes are not defined.";
+            prepareParams();
+        }
+        executeDynamicImpl(strm);
    }
-    executeDynamicImpl(strm);
    updateLastInputDims();
 }

@ -716,7 +728,7 @@ void MKLDNNNode::initDescriptor(const NodeConfig& config) {
    selectedPD->setConfig(rightConfig);
 }

-void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
+void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) {
    for (size_t i = 0; i < getChildEdges().size(); i++) {
        auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
        if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
@ -1048,6 +1060,18 @@ void MKLDNNNode::setDynamicBatchLim(int lim) {
    }
 }

+void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr,
+                                  std::unordered_map<int, mkldnn::memory>& primArgs,
+                                  const std::vector<MKLDNNMemoryPtr>& binaryPostOpsArgs) {
+    auto post_ops = attr.get_post_ops();
+    int idx = 0;
+    for (int i = 0; i < post_ops.len(); i++) {
+        if (post_ops.kind(i) == mkldnn::primitive::kind::binary) {
+            primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]->GetPrimitive()});
+        }
+    }
+}
+
 bool MKLDNNNode::isFusedWith(Type fusedNodeType) const {
    for (auto fusedNode : fusedWith) {
        if (fusedNode->type == fusedNodeType)
@ -1078,10 +1102,14 @@ Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
    }
 }

-void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
+void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
    IE_THROW() << "Fusing of " << this->getType() << " operation is not implemented";
 }

+void MKLDNNNode::appendBinPostOps(mkldnn::post_ops& ops, const std::vector<size_t>& binaryShape, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) {
+    IE_THROW() << "Binary fusing of " << this->getType() << " operation is not implemented";
+}
+
 std::vector<InferenceEngine::Precision> MKLDNNNode::getInputPrecisions() const {
    std::vector<InferenceEngine::Precision> inputPrecisions;
    for (size_t i = 0; i < getParentEdges().size(); i++) {
@ -1205,6 +1233,9 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>

 bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const {
    size_t fusingPort = 0;
+    // @todo graph optimizer can provide parentNode as nullptr. Should be avoided
+    const size_t channelAxis = parentNode ? parentNode->getFusingAxis() : MKLDNNNode::getFusingAxis();
+
    for (size_t i = (parentNode == nullptr ? 1 : 0); i < getParentEdges().size(); i++) {
        MKLDNNNode *node = getParentEdgesAtPort(i)[0]->getParent().get();
        if (node == nullptr) {
@ -1225,7 +1256,8 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
            if (i == fusingPort)
                continue;
            auto& weightShape = getInputShapeAtPort(i).getDims();
-            if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, true))
+            if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 ||
+                !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, channelAxis, true))
                return false;
        }
        return true;
@ -1246,6 +1278,9 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
            || isConvertablePowerStatic();
 }

+// @todo shifts for Subtract and scales for Divide are replaced with
+// Add (with opposite sign) and Multiply (with inverse value) for legacy dephwise post ops
+// This can be avoided after dephwise post ops are gone
 std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts(const MKLDNNNode *parentNode) const {
    std::vector<float> scales, shifts;

@ -1309,6 +1344,36 @@ std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts
    return {scales, shifts};
 }

+bool MKLDNNNode::isInputTensorAtPortEmpty(size_t port) const {
+    if (inputShapes.size() <= port) {
+        IE_THROW() << "Incorrect input port number for node " << getName();
+    }
+    return getParentEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims();
+}
+
+bool MKLDNNNode::isOutputTensorAtPortEmpty(size_t port) const {
+    if (outputShapes.size() <= port) {
+        IE_THROW() << "Incorrect output port number for node " << getName();
+    }
+    return getChildEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims();
+}
+
+bool MKLDNNNode::hasEmptyInputTensors() const {
+    for (size_t i = 0; i < getParentEdges().size(); i++) {
+        if (isInputTensorAtPortEmpty(i))
+            return true;
+    }
+    return false;
+}
+
+bool MKLDNNNode::hasEmptyOutputTensors() const {
+    for (size_t i = 0; i < outputShapes.size(); i++) {
+        if (isOutputTensorAtPortEmpty(i))
+            return true;
+    }
+    return false;
+}
+
 bool MKLDNNNode::inputShapesDefined() const {
    for (size_t i = 0; i < getParentEdges().size(); i++) {
        if (!getParentEdgesAtPort(i)[0]->getMemory().getDesc().isDefined())
@ -1382,8 +1447,11 @@ std::vector<VectorDims> MKLDNNNode::shapeInferGeneric(const std::vector<Shape>&
    std::vector<VectorDims> newOutputShapes(opToShapeInfer->get_output_size());
    for (size_t i = 0; i < newOutputShapes.size(); i++) {
        const auto &partShape = opToShapeInfer->get_output_partial_shape(i);
-        if (partShape.is_dynamic())
-            IE_THROW(NotImplemented) << "CPU plug-in doesn't support default shape infer for nodes with internal dynamism";
+        if (partShape.is_dynamic()) {
+            IE_THROW(NotImplemented) << "CPU plug-in doesn't support default shape infer for node " << getTypeStr()
+                                     << " with internal dynamism. Operation name: " << getName();
+        }
+
        newOutputShapes[i] = partShape.get_shape();
    }
    return newOutputShapes;
@ -1408,10 +1476,11 @@ bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
        }
        return ret;
    } else if (node->getType() == Eltwise) {
-        return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
-                                            EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
-                                            EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) ||
-                      node->canBePerformedAsScaleShift(this);
+        return one_of(node->getAlgorithm(),
+                      EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
+                      EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
+                      EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) ||
+            node->canBePerformedAsScaleShift(this);
    }
    return false;
 }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@ -199,11 +199,19 @@ public:

    // must be called only after MKLDNNGraph::InitEdges()
    virtual bool isExecutable() const {
-        return true;
+        return !hasEmptyInputTensors();
    }

    bool isConstant();

+    virtual size_t getFusingAxis() const {
+        return 1;
+    }
+
+    static void appendPostOpArgs(const mkldnn::primitive_attr& attr,
+                                 std::unordered_map<int, mkldnn::memory>& primArgs,
+                                 const std::vector<MKLDNNMemoryPtr>& binaryPostOpsArgs);
+
    bool isFusedWith(Type type) const;

    void addFusedNode(const MKLDNNNodePtr &fusingNode) {
@ -362,7 +370,7 @@ public:
     */
    virtual void filterSupportedPrimitiveDescriptors();

-    virtual void createPrimitive() = 0;
+    virtual void createPrimitive();

    virtual void selectOptimalPrimitiveDescriptor();
    virtual void initOptimalPrimitiveDescriptor();
@ -419,7 +427,7 @@ public:
                if (impl_type == selected_pd->getImplementationType() &&
                    descsCompatible(srcDescs, selected_pd->getConfig().inConfs) &&
                    descsCompatible(dstDescs, selected_pd->getConfig().outConfs)) {
-                    prepareMemory(selected_pd, itpd);
+                    prepareMemory(itpd);
                    PD prim_desc = createPd<PD, D, FPD>(desc);
                    return {itpd.get()};
                }
@ -594,8 +602,10 @@ protected:
     * Seed node should call this routine and pass its post operations list as parameter.
     * @param ops List of fused post operations
     */
-    virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false);
-    virtual AttrPtr initPrimitiveAttr() const { return nullptr; }
+    virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, int align = -1);
+    virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem);
+
+    virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() { return nullptr; }

    typedef std::function<DnnlMemoryDescPtr (mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx)>
            GetPrimitiveMemoryFormatFunc;
@ -636,7 +646,7 @@ protected:
    std::vector<MKLDNNMemoryPtr> internalBlobMemory;
    std::vector<NodeDesc> supportedPrimitiveDescriptors;
    std::unordered_map<int, mkldnn::memory> primArgs;
-    std::vector<mkldnn::memory> binaryPostOpsArgs;
+    std::vector<MKLDNNMemoryPtr> binaryPostOpsArgs;
    MKLDNNPrimitive prim;
    std::vector<MKLDNNDescriptor> descs;

@ -714,8 +724,16 @@ protected:
        supportedPrimitiveDescriptors.push_back({config, implType});
    }

+    void prepareMemory(mkldnn::primitive_desc_iterator& itpd);
+
    bool isDynamic = false;

+    bool isInputTensorAtPortEmpty(size_t port) const;
+    bool isOutputTensorAtPortEmpty(size_t port) const;
+
+    bool hasEmptyInputTensors() const;
+    bool hasEmptyOutputTensors() const;
+
    bool inputShapesDefined() const;
    bool outputShapesDefined() const;
    bool shapesDefined() const;
@ -738,6 +756,7 @@ protected:
    }

    std::vector<VectorDims> lastInputDims = {};
+
    std::shared_ptr<ngraph::Node> opToShapeInfer;

 private:
@ -780,7 +799,6 @@ private:
        return PD(*selected_desc_ptr, engine);
    }

-    void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
    enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
    ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);

--- a/inference-engine/src/mkldnn_plugin/mkldnn_nodes_factory.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_nodes_factory.cpp
@ -80,6 +80,7 @@
 #include "nodes/mkldnn_reduce_node.h"
 #include "nodes/mkldnn_if_node.h"
 #include "nodes/mkldnn_ctc_greedy_decoder_node.h"
+#include "nodes/mkldnn_non_zero.h"

 #define MKLDNN_NODE(__prim, __type) \
    registerNodeIfRequired(MKLDNNPlugin, __prim, __type, MKLDNNNodeImpl<__prim>)
@ -168,4 +169,5 @@ MKLDNNPlugin::MKLDNNNode::NodesFactory::NodesFactory()
    MKLDNN_NODE(MKLDNNTopKNode, TopK);
    MKLDNN_NODE(MKLDNNStridedSliceNode, StridedSlice);
    MKLDNN_NODE(MKLDNNGRNNode, GRN);
+    MKLDNN_NODE(MKLDNNNonZeroNode, NonZero);
 }
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@ -504,23 +504,24 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
    OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");

    // verification of supported input
-    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
-    for (const auto &ii : _networkInputs) {
+    for (const auto &ii : network.getInputsInfo()) {
        auto input_precision = ii.second->getPrecision();
-        if (input_precision != InferenceEngine::Precision::FP64 &&
-            input_precision != InferenceEngine::Precision::FP32 &&
-            input_precision != InferenceEngine::Precision::I32 &&
-            input_precision != InferenceEngine::Precision::U32 &&
-            input_precision != InferenceEngine::Precision::U16 &&
-            input_precision != InferenceEngine::Precision::I16 &&
-            input_precision != InferenceEngine::Precision::I8 &&
-            input_precision != InferenceEngine::Precision::U8 &&
-            input_precision != InferenceEngine::Precision::BF16 &&
-            input_precision != InferenceEngine::Precision::BOOL &&
-            input_precision != InferenceEngine::Precision::I64 &&
-            input_precision != InferenceEngine::Precision::U64) {
+
+        using hash_t = std::hash<typename std::underlying_type<Precision::ePrecision>::type>;
+
+        static const std::unordered_set<Precision::ePrecision, hash_t> supported_precisions = {
+            Precision::U8,   Precision::I8,
+            Precision::U16,  Precision::I16,
+            Precision::U32,  Precision::I32,
+            Precision::U64,  Precision::I64,
+            Precision::BF16, Precision::FP16,
+            Precision::FP32, Precision::FP64,
+            Precision::BOOL
+        };
+
+        if (!supported_precisions.count(input_precision)) {
            IE_THROW(NotImplemented)
-                               << "Input image format " << input_precision << " is not supported yet...";
+                        << "Input image format " << input_precision << " is not supported yet...";
        }
    }

--- a/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h
@ -18,7 +18,6 @@ public:
    operator bool() const;
    MKLDNNPrimitive& operator=(const std::shared_ptr<mkldnn::primitive>& primitive);
    mkldnn::primitive operator*();
-
    void reset(mkldnn::primitive* primitive);

 private:
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.cpp
@ -36,8 +36,9 @@ MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() {
        auto rank_a = shape_a.rank().get_length();
        auto rank_b = shape_b.rank().get_length();

-        // Transformation to FC is not supported for 1D second input
-        if (rank_b == 1) {
+        // Transformation to FC is not supported for 1D inputs
+        if (rank_a == 1 || rank_b == 1 ||
+            rank_a > 3 || rank_b > 3) {
            return false;
        }

@ -47,7 +48,6 @@ MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() {
            std::count_if(shape_b.begin(), shape_b.end(), [](ngraph::Dimension x) { return x != 1; }) > 2) {
            return false;
        }
-
        /*
         *  get_aligned_shapes function align two input shapes to have the same size and
         *  the same batch dimensions (last two dimensions are not comparable).
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
@ -7,7 +7,6 @@
 #include "ngraph/op/fake_quantize.hpp"
 #include "ngraph/pass/manager.hpp"
 #include "reshape_fc_fusion.hpp"
-#include "reshape_fully_connected.hpp"
 #include "align_matmul_input_ranks.hpp"
 #include "reshape_prelu.hpp"
 #include "convert_broadcast_to_tiles.hpp"
@ -29,7 +28,6 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphF
    manager.register_pass<AlignMatMulInputRanks>();
    manager.register_pass<ConvertTileToSeqTiles>();
    manager.register_pass<FullyConnectedBiasFusion>();
-    manager.register_pass<ReshapeFullyConnected>();
    manager.register_pass<ConvertToPowerStatic>();
    manager.register_pass<ConvertToLeakyRelu>();
    manager.register_pass<ReshapePRelu>();
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp
@ -1,114 +0,0 @@
-// Copyright (C) 2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "reshape_fully_connected.hpp"
-#include "op/fully_connected.hpp"
-#include <ngraph/opsets/opset1.hpp>
-#include <ngraph/opsets/opset7.hpp>
-#include <ngraph/rt_info.hpp>
-#include <ngraph/pattern/op/wrap_type.hpp>
-#include <ngraph/pattern/op/or.hpp>
-#include <transformations/utils/utils.hpp>
-#include <numeric>
-
-NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ReshapeFullyConnected, "ReshapeFullyConnected", 0);
-
-MKLDNNPlugin::ReshapeFullyConnected::ReshapeFullyConnected() {
-    ngraph::OutputVector twoInputs = {
-            ngraph::pattern::any_input(ngraph::pattern::has_static_rank()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape())};
-    ngraph::OutputVector threeInputs = {
-            ngraph::pattern::any_input(ngraph::pattern::has_static_rank()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
-                                        ngraph::pattern::any_input()};
-    auto fcTwoInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(twoInputs, ngraph::pattern::has_static_rank());
-    auto fcThreeInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(threeInputs, ngraph::pattern::has_static_rank());
-    const auto fcTwoOrThreeInputs = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{fcTwoInputs, fcThreeInputs});
-
-    ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
-        auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode>(m.get_match_root());
-        if (!fc || transformation_callback(fc)) {
-            return false;
-        }
-
-        auto fc_input_shape = fc->get_input_partial_shape(0);
-        auto input_rank = fc_input_shape.rank().get_length();
-        auto output_shape = fc->get_output_partial_shape(0);
-
-        if (input_rank == 2 || input_rank == 0) {
-            return false;
-        }
-
-        ngraph::NodeVector new_ops;
-        int64_t K = *(fc->get_input_shape(1).rbegin()); // requested 2nd input with static shape in the matcher
-        auto reshape = std::make_shared<ngraph::opset1::Reshape>(
-                fc->input_value(0), ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{-1, K}), false);
-        if (reshape->get_output_partial_shape(0).rank().is_dynamic())
-            return false;
-        new_ops.push_back(reshape);
-
-        reshape->set_friendly_name(fc->get_friendly_name() + "/Reshape");
-
-        // Calculate output shape for new FullyConnected layer
-        // [I, K] * [O, K] = [I, O]
-        auto I = reshape->get_output_partial_shape(0)[0];
-        auto O = fc->get_input_partial_shape(1)[0];
-        ngraph::PartialShape output_shape_new{I, O};
-
-        std::shared_ptr<ngraph::Node> fc_new;
-        if (fc->get_input_size() == 2) {
-            fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
-                                                                        fc->input_value(1),
-                                                                        output_shape_new.rank(),
-                                                                        fc->get_output_type());
-        } else if (fc->get_input_size() == 3) {
-            fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
-                                                                        fc->input_value(1),
-                                                                        fc->input_value(2),
-                                                                        output_shape_new.rank(),
-                                                                        fc->get_output_type());
-        } else {
-            return false;
-        }
-        new_ops.push_back(fc_new);
-
-        if (output_shape != output_shape_new) {
-            auto I_idxs = std::vector<size_t>(input_rank - 1);
-            std::iota(I_idxs.begin(), I_idxs.end(), 0);
-            auto A_input_shape = ngraph::op::util::make_try_fold<ngraph::opset7::ShapeOf>(fc->input_value(0));
-            auto B_input_shape = ngraph::op::util::make_try_fold<ngraph::opset7::ShapeOf>(fc->input_value(1));
-            auto I_node = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(A_input_shape, {I_idxs});
-            auto O_node = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(B_input_shape, {0});
-            ngraph::OutputVector output_shape_dims{I_node, O_node};
-
-            const auto original_rank = fc->get_output_rank();
-            NGRAPH_CHECK(original_rank.is_static());
-            if (input_rank < original_rank.get_length()) {
-                const size_t const_shape_value = original_rank.get_length() - input_rank;
-                output_shape_dims.insert(
-                    output_shape_dims.begin(), ngraph::opset1::Constant::create(I_node->get_element_type(), { const_shape_value }, { 1 }));
-            }
-
-            auto reshape_output_shape = ngraph::op::util::make_try_fold<ngraph::opset1::Concat>(output_shape_dims, 0);
-            auto reshape_output = std::make_shared<ngraph::opset1::Reshape>(fc_new, reshape_output_shape, false);
-            new_ops.push_back(A_input_shape);
-            new_ops.push_back(B_input_shape);
-            new_ops.push_back(I_node);
-            new_ops.push_back(O_node);
-            new_ops.push_back(reshape_output_shape);
-            new_ops.push_back(reshape_output);
-            reshape_output->set_friendly_name(fc->get_friendly_name());
-            fc_new->set_friendly_name(fc->get_friendly_name() + "/FC");
-            ngraph::copy_runtime_info(fc, new_ops);
-            ngraph::replace_node(fc, reshape_output);
-        } else {
-            fc_new->set_friendly_name(fc->get_friendly_name());
-            ngraph::copy_runtime_info(fc, new_ops);
-            ngraph::replace_node(fc, fc_new);
-        }
-
-        return true;
-    };
-
-    auto m = std::make_shared<ngraph::pattern::Matcher>(fcTwoOrThreeInputs, "ReshapeFullyConnected");
-    this->register_matcher(m, callback);
-}
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp
@ -1,25 +0,0 @@
-// Copyright (C) 2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <ngraph/pass/graph_rewrite.hpp>
-
-/*
- * Description:
- *     ReshapeFullyConnected transformation detects FullyConnected operations
- *     and for each operation where input shape is greater than 2 inserts Reshape
- *     operations before and after FullyConnected operation. This transformation is
- *     required because of IE restrictions.
- */
-
-namespace MKLDNNPlugin {
-
-class ReshapeFullyConnected: public ngraph::pass::MatcherPass {
-public:
-    NGRAPH_RTTI_DECLARATION;
-    ReshapeFullyConnected();
-};
-
-}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.cpp
@ -4,27 +4,208 @@

 #include "cpu_convert.h"
 #include "cpu_memcpy.h"
-#include "utils/bfloat16.hpp"
+#include <utils/bfloat16.hpp>
+#include <utils/general_utils.h>
 #include <mkldnn_selective_build.h>
+#include <ie_parallel.hpp>
+#include <openvino/core/type/float16.hpp>
+#include <cpu/x64/jit_generator.hpp>
+#include <algorithm>
 #include <type_traits>
 #include <tuple>
-#include <ie_parallel.hpp>
+#include <cmath>

+using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
+using namespace dnnl::impl::cpu::x64;
+using namespace dnnl::impl::utils;
+using namespace Xbyak;

 namespace {

-template<typename srcType, typename dstType>
-void convert(const void *srcPtr, void *dstPtr, const size_t size) {
-    if (std::is_same<srcType, dstType>::value) {
-        cpu_memcpy(dstPtr, srcPtr, size*sizeof(dstType));
-    } else {
-        const srcType *srcData = reinterpret_cast<const srcType *>(srcPtr);
-        dstType *dstData = reinterpret_cast<dstType *>(dstPtr);
+template <typename src_t, typename dst_t>
+void convert_vec(jit_generator & gen,
+                 const RegExp & src,
+                 const RegExp & dst);

-        parallel_for(size, [&](size_t i) {
-            dstData[i] = static_cast<dstType>(srcData[i]);
+template <>
+void convert_vec<ov::float16, float>(jit_generator & gen,
+                                     const RegExp & src,
+                                     const RegExp & dst) {
+    auto const & f16vec = gen.xmm3;
+    auto const & f32vec = gen.ymm4;
+
+    gen.movdqu(f16vec, gen.xword[src]);
+    gen.vcvtph2ps(f32vec, f16vec);
+    gen.vmovups(gen.yword[dst], f32vec);
+}
+
+template <>
+void convert_vec<float, ov::float16>(jit_generator & gen,
+                                     const RegExp & src,
+                                     const RegExp & dst) {
+    auto const & f16vec = gen.xmm3;
+    auto const & f32vec = gen.ymm4;
+
+    gen.vmovups(f32vec, gen.yword[src]);
+    gen.vcvtps2ph(f16vec, f32vec, 0);
+    gen.movdqu(gen.xword[dst], f16vec);
+}
+
+class jit_convert_array : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_convert_array)
+
+    void generate() override {
+        const size_t vlen = 8u;
+        const size_t vlen_log2 = 3;
+
+        auto reg_src = rax;
+        auto reg_dst = rbx;
+        auto reg_sz = rdx;
+
+        Label tail, exit;
+
+        preamble();
+
+        mov(reg_src, ptr[param1 + offsetof(args_t, src)]);
+        mov(reg_dst, ptr[param1 + offsetof(args_t, out)]);
+        mov(reg_sz, ptr[param1 + offsetof(args_t, count)]);
+
+        xor_(rsi, rsi);
+        mov(r8, reg_sz);
+        shr(r8, vlen_log2);
+
+        foreach(rsi, 1, r8, [&, this](const Xbyak::Reg64& idx) {
+            _convert_vec(*this, reg_src, reg_dst);
+            add(reg_src, _src_size * vlen);
+            add(reg_dst, _dst_size * vlen);
        });
+
+        L(tail);
+
+        shl(rsi, vlen_log2);
+        sub(reg_sz, rsi);
+        test(reg_sz, reg_sz);
+        jz(exit);
+
+        // allocate array for 8 floats on stack
+        sub(rsp, vlen * sizeof(float));
+        mov(r8, rsp);
+
+        vpxor(ymm4, ymm4, ymm4);
+        vmovups(yword[r8], ymm4);
+
+        // Tail conversion
+        copy(r8, reg_src, reg_sz, _src_size);
+        _convert_vec(*this, r8, r8);
+        copy(reg_dst, r8, reg_sz, _dst_size);
+
+        // Free the array on stack
+        add(rsp, vlen * sizeof(float));
+
+        L(exit);
+
+        postamble();
+    }
+
+    void foreach(const Xbyak::Reg64& idx,
+                 size_t step,
+                 const Xbyak::Reg64& end,
+                 std::function<void(const Xbyak::Reg64&)> && fn) {
+        Label loop, exit;
+
+        L(loop);
+        cmp(idx, end);
+        jge(exit);
+
+        fn(idx);
+
+        add(idx, step);
+        jmp(loop);
+        L(exit);
+    }
+
+    void copy(const Xbyak::Reg64& dst,
+              const Xbyak::Reg64& src,
+              const Xbyak::Reg64& size,
+              size_t item_size) {
+        push(rsi);
+        push(r15);
+
+        xor_(rsi, rsi);
+
+        auto address_frame = [this](size_t size) -> const AddressFrame& {
+            switch (size) {
+                case 1: return byte;
+                case 2: return word;
+                case 4: return dword;
+                case 8: return qword;
+                default:
+                    break;
+            }
+            return ptr;
+        };
+
+        const auto & addr_frame = address_frame(item_size);
+
+        foreach(rsi, 1, size, [&, this](const Xbyak::Reg64& idx) {
+            mov(r15, addr_frame[src + idx * item_size]);
+            mov(addr_frame[dst + idx * item_size], r15);
+        });
+
+        pop(r15);
+        pop(rsi);
+    }
+
+public:
+    typedef struct {
+        const void* src;
+        void* out;
+        const size_t count;
+    } args_t;
+
+    typedef void (*fn_t)(const args_t*);
+
+    typedef void (*convert_vec_t)(jit_generator &,
+                                  const RegExp &,
+                                  const RegExp &);
+
+    jit_convert_array(convert_vec_t convert_vec,
+                      size_t src_size,
+                      size_t dst_size)
+        : _convert_vec(convert_vec)
+        , _src_size(src_size)
+        , _dst_size(dst_size) {}
+
+    template<typename src_t, typename dst_t>
+    static fn_t get() {
+        if (mayiuse(avx2) && cpu().has(util::Cpu::tF16C)) {
+            static jit_convert_array converter(convert_vec<src_t, dst_t>, sizeof(src_t), sizeof(dst_t));
+            auto & generator = static_cast<jit_generator&>(converter);
+            generator.create_kernel();
+            return (fn_t)generator.jit_ker();
+        }
+        return nullptr;
+    }
+
+private:
+    convert_vec_t _convert_vec;
+    size_t _src_size;
+    size_t _dst_size;
+};
+
+template <typename TI, typename TO>
+void jit_convert(const TI* arg, TO* out, size_t count) {
+    using jit_impl = jit_convert_array;
+    static auto converter = jit_impl::get<TI, TO>();
+
+    if (converter) {
+        typename jit_impl::args_t args = { arg, out, count };
+        converter(&args);
+    } else {
+        for (size_t i = 0; i < count; ++i) {
+            out[i] = static_cast<TO>(arg[i]);
+        }
    }
 }

@ -35,84 +216,391 @@ struct PrecisionInfo {

 template <>
 struct PrecisionInfo<Precision::BF16> {
-    using value_type = MKLDNNPlugin::bfloat16_t;
+    using value_type = bfloat16_t;
 };

+template <>
+struct PrecisionInfo<Precision::FP16> {
+    using value_type = ov::float16;
+};
+
+template <>
+struct PrecisionInfo<Precision::BOOL> {
+    using value_type = uint8_t;
+};
+
+template<typename T,
+         typename U = typename std::conditional<
+                        std::is_same<ov::float16, T>::value
+                        || std::is_same<bfloat16_t, T>::value,
+                        float, T>::type>
+struct Range {
+    const std::tuple<U, U> & fit(const Precision & prec);
+
+private:
+    std::tuple<U, U> _range {
+        std::numeric_limits<T>::lowest(),
+        std::numeric_limits<T>::max()
+    };
+};
+
+template<typename T, typename U>
+const std::tuple<U, U> & Range<T, U>::fit(const Precision & prec) {
+    if (prec.is_float()) {
+        double lbound, ubound;
+        switch (prec) {
+            case Precision::BF16:
+                lbound = static_cast<double>(std::numeric_limits<bfloat16_t>::lowest());
+                ubound = static_cast<double>(std::numeric_limits<bfloat16_t>::max());
+                break;
+            case Precision::FP16:
+                lbound = static_cast<double>(std::numeric_limits<ov::float16>::lowest());
+                ubound = static_cast<double>(std::numeric_limits<ov::float16>::max());
+                break;
+            case Precision::FP32:
+                lbound = static_cast<double>(std::numeric_limits<float>::lowest());
+                ubound = static_cast<double>(std::numeric_limits<float>::max());
+                break;
+            case Precision::FP64:
+                lbound = std::numeric_limits<double>::lowest();
+                ubound = std::numeric_limits<double>::max();
+                break;
+            default:
+                IE_THROW() << "Unsupported precision";
+        }
+        std::get<0>(_range) = static_cast<U>(std::max(static_cast<double>(std::get<0>(_range)), lbound));
+        std::get<1>(_range) = static_cast<U>(std::min(static_cast<double>(std::get<1>(_range)), ubound));
+    } else {
+        int64_t lbound;
+        uint64_t ubound;
+        switch (prec) {
+            case Precision::BOOL:
+            case Precision::U8:
+                lbound = static_cast<int64_t>(std::numeric_limits<uint8_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<uint8_t>::max());
+                break;
+            case Precision::I8:
+                lbound = static_cast<int64_t>(std::numeric_limits<int8_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<int8_t>::max());
+                break;
+            case Precision::U16:
+                lbound = static_cast<int64_t>(std::numeric_limits<uint16_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max());
+                break;
+            case Precision::I16:
+                lbound = static_cast<int64_t>(std::numeric_limits<int16_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<int16_t>::max());
+                break;
+            case Precision::U32:
+                lbound = static_cast<int64_t>(std::numeric_limits<uint32_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max());
+                break;
+            case Precision::I32:
+                lbound = static_cast<int64_t>(std::numeric_limits<int32_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
+                break;
+            case Precision::U64:
+                lbound = static_cast<int64_t>(std::numeric_limits<uint64_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<uint64_t>::max());
+                break;
+            case Precision::I64:
+                lbound = static_cast<int64_t>(std::numeric_limits<int64_t>::lowest());
+                ubound = static_cast<uint64_t>(std::numeric_limits<int64_t>::max());
+                break;
+            default:
+                IE_THROW() << "Unsupported precision";
+        }
+        using ltype = typename std::conditional<
+                            std::is_floating_point<U>::value,
+                            double, int64_t>::type;
+        using utype = typename std::conditional<
+                            std::is_floating_point<U>::value,
+                            double, uint64_t>::type;
+        std::get<0>(_range) = static_cast<U>(std::max(static_cast<ltype>(std::get<0>(_range)), static_cast<ltype>(lbound)));
+        std::get<1>(_range) = static_cast<U>(std::min(static_cast<utype>(std::get<1>(_range)), static_cast<utype>(ubound)));
+    }
+    return _range;
+}
+
 struct ConvertContext {
    const void *srcPtr;
    void *dstPtr;
    size_t size;
+    Precision interimPrc;
+    Precision dstPrc;
    bool converted;
+
+    template<typename T>
+    std::tuple<T, T> range() const {
+        Range<T> r;
+        r.fit(interimPrc);
+        return r.fit(dstPrc);
+    }
 };

 template<typename T>
-struct ConvertPrecision {
-    using src_t = typename std::tuple_element<0, T>::type;
-    using dst_t = typename std::tuple_element<1, T>::type;
+struct ConvertPrecision;

+template<typename src_t, typename dst_t>
+struct ConvertPrecision<std::tuple<src_t, dst_t>> {
    void operator()(ConvertContext & ctx) {
-        convert<src_t, dst_t>(ctx.srcPtr, ctx.dstPtr, ctx.size);
+        auto src = static_cast<const src_t *>(ctx.srcPtr);
+        auto dst = static_cast<dst_t *>(ctx.dstPtr);
+        src_t lbound, ubound;
+        std::tie(lbound, ubound) = ctx.range<src_t>();
+
+        if (std::is_integral<src_t>::value
+            || ctx.interimPrc.is_float()
+            || std::is_integral<dst_t>::value) {
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = static_cast<dst_t>(std::max(std::min(src[i], ubound), lbound));
+            });
+        } else {
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = static_cast<dst_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
+            });
+        }
+
        ctx.converted = true;
    }
 };

+template<>
+struct ConvertPrecision<std::tuple<float, bfloat16_t>> {
+    void operator()(ConvertContext & ctx) {
+        auto src = static_cast<const float *>(ctx.srcPtr);
+        auto dst = static_cast<bfloat16_t *>(ctx.dstPtr);
+
+        if (ctx.interimPrc.is_float()) {
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = static_cast<bfloat16_t>(src[i]);
+            });
+        } else {
+            float lbound, ubound;
+            std::tie(lbound, ubound) = ctx.range<float>();
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = static_cast<bfloat16_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
+            });
+        }
+
+        ctx.converted = true;
+    }
+};
+
+template<>
+struct ConvertPrecision<std::tuple<bfloat16_t, float>> {
+    void operator()(ConvertContext & ctx) {
+        auto src = static_cast<const bfloat16_t *>(ctx.srcPtr);
+        auto dst = static_cast<float *>(ctx.dstPtr);
+
+        if (ctx.interimPrc.is_float()) {
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = static_cast<float>(src[i]);
+            });
+        } else {
+            float lbound, ubound;
+            std::tie(lbound, ubound) = ctx.range<bfloat16_t>();
+            parallel_for(ctx.size, [&](size_t i) {
+                dst[i] = std::trunc(std::max(std::min(static_cast<float>(src[i]), ubound), lbound));
+            });
+        }
+
+        ctx.converted = true;
+    }
+};
+
+template<typename src_t>
+struct ConvertPrecision<std::tuple<src_t, ov::float16>> {
+    void operator()(ConvertContext & ctx) {
+        auto src = static_cast<const src_t *>(ctx.srcPtr);
+        auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
+
+        constexpr size_t batch = 64;
+        const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
+        typedef float batch_type[batch];
+
+        src_t lbound, ubound;
+        std::tie(lbound, ubound) = ctx.range<src_t>();
+
+        if (std::is_integral<src_t>::value
+            || ctx.interimPrc.is_float()) {
+            parallel_for(iterations, [&](size_t i) {
+                batch_type tmp;
+                const size_t offset = i * batch;
+                const size_t current_batch_size = std::min(ctx.size - offset, batch);
+                for (size_t j = 0; j < current_batch_size; ++j)         // src_t -> fp32
+                    tmp[j] = static_cast<float>(std::max(std::min(src[offset + j], ubound), lbound));
+                jit_convert(tmp, dst + offset, current_batch_size);     // fp32 -> fp16
+            });
+        } else {
+            parallel_for(iterations, [&](size_t i) {
+                batch_type tmp;
+                const size_t offset = i * batch;
+                const size_t current_batch_size = std::min(ctx.size - offset, batch);
+                for (size_t j = 0; j < current_batch_size; ++j)         // src_t -> fp32
+                    tmp[j] = static_cast<float>(std::trunc(std::max(std::min(src[offset + j], ubound), lbound)));
+                jit_convert(tmp, dst + offset, current_batch_size);     // fp32 -> fp16
+            });
+        }
+
+        ctx.converted = true;
+    }
+};
+
+template<typename dst_t>
+struct ConvertPrecision<std::tuple<ov::float16, dst_t>> {
+    void operator()(ConvertContext & ctx) {
+        auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
+        auto dst = static_cast<dst_t *>(ctx.dstPtr);
+
+        constexpr size_t batch = 64;
+        const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
+        typedef float batch_type[batch];
+
+        float lbound, ubound;
+        std::tie(lbound, ubound) = ctx.range<ov::float16>();
+
+        if (ctx.interimPrc.is_float()
+            || std::is_integral<dst_t>::value) {
+            parallel_for(iterations, [&](size_t i) {
+                batch_type tmp;
+                const size_t offset = i * batch;
+                const size_t current_batch_size = std::min(ctx.size - offset, batch);
+                jit_convert(src + offset, tmp, current_batch_size);     // fp16 -> fp32
+                for (size_t j = 0; j < current_batch_size; ++j)         // fp32 -> dst_t
+                    dst[offset + j] = static_cast<dst_t>(std::max(std::min(tmp[j], ubound), lbound));
+            });
+        } else {
+            parallel_for(iterations, [&](size_t i) {
+                batch_type tmp;
+                const size_t offset = i * batch;
+                const size_t current_batch_size = std::min(ctx.size - offset, batch);
+                jit_convert(src + offset, tmp, current_batch_size);     // fp16 -> fp32
+                for (size_t j = 0; j < current_batch_size; ++j)         // fp32 -> dst_t
+                    dst[offset + j] = static_cast<dst_t>(std::trunc(std::max(std::min(tmp[j], ubound), lbound)));
+            });
+        }
+
+        ctx.converted = true;
+    }
+};
+
+template<>
+struct ConvertPrecision<std::tuple<ov::float16, ov::float16>> {
+    void operator()(ConvertContext & ctx) {
+        auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
+        auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
+
+        constexpr size_t batch = 64;
+        const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
+        typedef float batch_type[batch];
+
+        float lbound, ubound;
+        std::tie(lbound, ubound) = ctx.range<ov::float16>();
+
+        if (ctx.interimPrc.is_float()) {
+            cpu_memcpy(dst, src, ctx.size * sizeof(ov::float16));
+        } else {
+            parallel_for(iterations, [&](size_t i) {
+                batch_type tmp;
+                const size_t offset = i * batch;
+                const size_t current_batch_size = std::min(ctx.size - offset, batch);
+                jit_convert(src + offset, tmp, current_batch_size);     // fp16 -> fp32
+                for (size_t j = 0; j < current_batch_size; ++j)         // truncate fp32
+                    tmp[j] = std::trunc(std::max(std::min(tmp[j], ubound), lbound));
+                jit_convert(tmp, dst + offset, current_batch_size);     // fp32 -> fp16
+            });
+        }
+
+        ctx.converted = true;
+    }
+};
+
+bool isConversionTruncatesRange(const Precision & from, const Precision & to) {
+    return to.bitsSize() < from.bitsSize()
+            || (from.is_float() && !to.is_float())      // float -> integral
+            || (from.isSigned() != to.isSigned())       // signed <-> unsigned
+            || (to == Precision::BOOL && from != to);   // T -> bool
+}
+
 }   // namespace

 #define MKLDNN_CVT(ST, DT) OV_CASE2(Precision::ST, Precision::DT, PrecisionInfo<Precision::ST>::value_type, PrecisionInfo<Precision::DT>::value_type)

-void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
-    using namespace MKLDNNPlugin;
+#define MKLDNN_CVT_LIST                                                                             \
+    MKLDNN_CVT(U8, I8),     MKLDNN_CVT(U8, U16),    MKLDNN_CVT(U8, I16),    MKLDNN_CVT(U8, U32),    \
+    MKLDNN_CVT(U8, I32),    MKLDNN_CVT(U8, U64),    MKLDNN_CVT(U8, I64),    MKLDNN_CVT(U8, FP32),   \
+    MKLDNN_CVT(U8, FP16),   MKLDNN_CVT(U8, BF16),   MKLDNN_CVT(U8, FP64),   MKLDNN_CVT(U8, BOOL),   \
+    MKLDNN_CVT(I8, U8),     MKLDNN_CVT(I8, U16),    MKLDNN_CVT(I8, I16),    MKLDNN_CVT(I8, U32),    \
+    MKLDNN_CVT(I8, I32),    MKLDNN_CVT(I8, U64),    MKLDNN_CVT(I8, I64),    MKLDNN_CVT(I8, FP32),   \
+    MKLDNN_CVT(I8, FP16),   MKLDNN_CVT(I8, BF16),   MKLDNN_CVT(I8, FP64),   MKLDNN_CVT(I8, BOOL),   \
+    MKLDNN_CVT(U16, U8),    MKLDNN_CVT(U16, I8),    MKLDNN_CVT(U16, I16),   MKLDNN_CVT(U16, U32),   \
+    MKLDNN_CVT(U16, I32),   MKLDNN_CVT(U16, U64),   MKLDNN_CVT(U16, I64),   MKLDNN_CVT(U16, FP32),  \
+    MKLDNN_CVT(U16, FP16),  MKLDNN_CVT(U16, BF16),  MKLDNN_CVT(U16, FP64),  MKLDNN_CVT(U16, BOOL),  \
+    MKLDNN_CVT(I16, U8),    MKLDNN_CVT(I16, I8),    MKLDNN_CVT(I16, U16),   MKLDNN_CVT(I16, U32),   \
+    MKLDNN_CVT(I16, I32),   MKLDNN_CVT(I16, U64),   MKLDNN_CVT(I16, I64),   MKLDNN_CVT(I16, FP32),  \
+    MKLDNN_CVT(I16, FP16),  MKLDNN_CVT(I16, BF16),  MKLDNN_CVT(I16, FP64),  MKLDNN_CVT(I16, BOOL),  \
+    MKLDNN_CVT(U32, U8),    MKLDNN_CVT(U32, I8),    MKLDNN_CVT(U32, U16),   MKLDNN_CVT(U32, I16),   \
+    MKLDNN_CVT(U32, I32),   MKLDNN_CVT(U32, U64),   MKLDNN_CVT(U32, I64),   MKLDNN_CVT(U32, FP32),  \
+    MKLDNN_CVT(U32, FP16),  MKLDNN_CVT(U32, BF16),  MKLDNN_CVT(U32, FP64),  MKLDNN_CVT(U32, BOOL),  \
+    MKLDNN_CVT(I32, U8),    MKLDNN_CVT(I32, I8),    MKLDNN_CVT(I32, U16),   MKLDNN_CVT(I32, I16),   \
+    MKLDNN_CVT(I32, U32),   MKLDNN_CVT(I32, U64),   MKLDNN_CVT(I32, I64),   MKLDNN_CVT(I32, FP32),  \
+    MKLDNN_CVT(I32, FP16),  MKLDNN_CVT(I32, BF16),  MKLDNN_CVT(I32, FP64),  MKLDNN_CVT(I32, BOOL),  \
+    MKLDNN_CVT(U64, U8),    MKLDNN_CVT(U64, I8),    MKLDNN_CVT(U64, U16),   MKLDNN_CVT(U64, I16),   \
+    MKLDNN_CVT(U64, U32),   MKLDNN_CVT(U64, I32),   MKLDNN_CVT(U64, I64),   MKLDNN_CVT(U64, FP32),  \
+    MKLDNN_CVT(U64, FP16),  MKLDNN_CVT(U64, BF16),  MKLDNN_CVT(U64, FP64),  MKLDNN_CVT(U64, BOOL),  \
+    MKLDNN_CVT(I64, U8),    MKLDNN_CVT(I64, I8),    MKLDNN_CVT(I64, U16),   MKLDNN_CVT(I64, I16),   \
+    MKLDNN_CVT(I64, U32),   MKLDNN_CVT(I64, I32),   MKLDNN_CVT(I64, U64),   MKLDNN_CVT(I64, FP32),  \
+    MKLDNN_CVT(I64, FP16),  MKLDNN_CVT(I64, BF16),  MKLDNN_CVT(I64, FP64),  MKLDNN_CVT(I64, BOOL),  \
+    MKLDNN_CVT(FP32, U8),   MKLDNN_CVT(FP32, I8),   MKLDNN_CVT(FP32, U16),  MKLDNN_CVT(FP32, I16),  \
+    MKLDNN_CVT(FP32, U32),  MKLDNN_CVT(FP32, I32),  MKLDNN_CVT(FP32, U64),  MKLDNN_CVT(FP32, I64),  \
+    MKLDNN_CVT(FP32, FP16), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, FP64), MKLDNN_CVT(FP32, BOOL), \
+    MKLDNN_CVT(FP16, U8),   MKLDNN_CVT(FP16, I8),   MKLDNN_CVT(FP16, U16),  MKLDNN_CVT(FP16, I16),  \
+    MKLDNN_CVT(FP16, U32),  MKLDNN_CVT(FP16, I32),  MKLDNN_CVT(FP16, U64),  MKLDNN_CVT(FP16, I64),  \
+    MKLDNN_CVT(FP16, FP32), MKLDNN_CVT(FP16, BF16), MKLDNN_CVT(FP16, FP64), MKLDNN_CVT(FP16, BOOL), \
+    MKLDNN_CVT(BF16, U8),   MKLDNN_CVT(BF16, I8),   MKLDNN_CVT(BF16, U16),  MKLDNN_CVT(BF16, I16),  \
+    MKLDNN_CVT(BF16, U32),  MKLDNN_CVT(BF16, I32),  MKLDNN_CVT(BF16, U64),  MKLDNN_CVT(BF16, I64),  \
+    MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, FP16), MKLDNN_CVT(BF16, FP64), MKLDNN_CVT(BF16, BOOL), \
+    MKLDNN_CVT(FP64, U8),   MKLDNN_CVT(FP64, I8),   MKLDNN_CVT(FP64, U16),  MKLDNN_CVT(FP64, I16),  \
+    MKLDNN_CVT(FP64, U32),  MKLDNN_CVT(FP64, I32),  MKLDNN_CVT(FP64, U64),  MKLDNN_CVT(FP64, I64),  \
+    MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, FP16), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL), \
+    MKLDNN_CVT(BOOL, U8),   MKLDNN_CVT(BOOL, I8),   MKLDNN_CVT(BOOL, U16),  MKLDNN_CVT(BOOL, I16),  \
+    MKLDNN_CVT(BOOL, U32),  MKLDNN_CVT(BOOL, I32),  MKLDNN_CVT(BOOL, U64),  MKLDNN_CVT(BOOL, I64),  \
+    MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, FP16), MKLDNN_CVT(BOOL, BF16), MKLDNN_CVT(BOOL, FP64), \
+    MKLDNN_CVT(U8, U8),     MKLDNN_CVT(I8, I8),     MKLDNN_CVT(U16, U16),   MKLDNN_CVT(I16, I16),   \
+    MKLDNN_CVT(U32, U32),   MKLDNN_CVT(I32, I32),   MKLDNN_CVT(U64, U64),   MKLDNN_CVT(I64, I64),   \
+    MKLDNN_CVT(FP32, FP32), MKLDNN_CVT(FP16, FP16), MKLDNN_CVT(BF16, BF16), MKLDNN_CVT(FP64, FP64), \
+    MKLDNN_CVT(BOOL, BOOL)

+void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
+    cpu_convert(srcPtr, dstPtr, srcPrc, dstPrc, dstPrc, size);
+}
+
+void cpu_convert(const void *srcPtr,
+                 void *dstPtr,
+                 InferenceEngine::Precision srcPrc,
+                 InferenceEngine::Precision interimPrc,
+                 InferenceEngine::Precision dstPrc,
+                 const size_t size) {
    if (srcPtr == nullptr || dstPtr == nullptr)
        IE_THROW() << "cpu_convert has null data pointer";

-    if (srcPrc == dstPrc) {
-        cpu_memcpy(dstPtr, srcPtr, size*dstPrc.size());
-        return;
+    if (srcPrc == dstPrc && srcPrc == interimPrc) {
+        cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
+    } else {
+        ConvertContext ctx = {
+            srcPtr,
+            dstPtr,
+            size,
+            interimPrc,
+            dstPrc,
+            false
+        };
+        OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), MKLDNN_CVT_LIST);
+        if (!ctx.converted)
+            IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc;
    }
-
-    ConvertContext ctx = { srcPtr, dstPtr, size, false };
-
-    OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc),
-    MKLDNN_CVT(U8, I8),    MKLDNN_CVT(U8, U16),    MKLDNN_CVT(U8, I16),
-    MKLDNN_CVT(U8, I32),   MKLDNN_CVT(U8, U64),    MKLDNN_CVT(U8, I64),
-    MKLDNN_CVT(U8, FP32),  MKLDNN_CVT(U8, BF16),   MKLDNN_CVT(U8, BOOL),
-    MKLDNN_CVT(I8, U8),    MKLDNN_CVT(I8, U16),    MKLDNN_CVT(I8, I16),
-    MKLDNN_CVT(I8, I32),   MKLDNN_CVT(I8, U64),    MKLDNN_CVT(I8, I64),
-    MKLDNN_CVT(I8, FP32),  MKLDNN_CVT(I8, BF16),   MKLDNN_CVT(I8, BOOL),
-    MKLDNN_CVT(U16, U8),   MKLDNN_CVT(U16, I8),    MKLDNN_CVT(U16, I16),
-    MKLDNN_CVT(U16, I32),  MKLDNN_CVT(U16, U64),   MKLDNN_CVT(U16, I64),
-    MKLDNN_CVT(U16, FP32), MKLDNN_CVT(U16, BF16),  MKLDNN_CVT(U16, BOOL),
-    MKLDNN_CVT(I16, U8),   MKLDNN_CVT(I16, I8),    MKLDNN_CVT(I16, U16),
-    MKLDNN_CVT(I16, I32),  MKLDNN_CVT(I16, U64),   MKLDNN_CVT(I16, I64),
-    MKLDNN_CVT(I16, FP32), MKLDNN_CVT(I16, BF16),  MKLDNN_CVT(I16, BOOL),
-    MKLDNN_CVT(I32, U8),   MKLDNN_CVT(I32, I8),    MKLDNN_CVT(I32, U16),
-    MKLDNN_CVT(I32, I16),  MKLDNN_CVT(I32, U64),   MKLDNN_CVT(I32, I64),
-    MKLDNN_CVT(I32, FP32), MKLDNN_CVT(I32, BF16),  MKLDNN_CVT(I32, BOOL),
-    MKLDNN_CVT(U64, U8),   MKLDNN_CVT(U64, I8),    MKLDNN_CVT(U64, U16),
-    MKLDNN_CVT(U64, I16),  MKLDNN_CVT(U64, I32),   MKLDNN_CVT(U64, I64),
-    MKLDNN_CVT(U64, FP32), MKLDNN_CVT(U64, BF16),  MKLDNN_CVT(U64, BOOL),
-    MKLDNN_CVT(I64, U8),   MKLDNN_CVT(I64, I8),    MKLDNN_CVT(I64, U16),
-    MKLDNN_CVT(I64, I16),  MKLDNN_CVT(I64, I32),   MKLDNN_CVT(I64, U64),
-    MKLDNN_CVT(I64, FP32), MKLDNN_CVT(I64, BF16),  MKLDNN_CVT(I64, BOOL),
-    MKLDNN_CVT(FP32, U8),  MKLDNN_CVT(FP32, I8),   MKLDNN_CVT(FP32, U16),
-    MKLDNN_CVT(FP32, I16), MKLDNN_CVT(FP32, I32),  MKLDNN_CVT(FP32, U64),
-    MKLDNN_CVT(FP32, I64), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, BOOL),
-    MKLDNN_CVT(BF16, U8),  MKLDNN_CVT(BF16, I8),   MKLDNN_CVT(BF16, U16),
-    MKLDNN_CVT(BF16, I16), MKLDNN_CVT(BF16, I32),  MKLDNN_CVT(BF16, U64),
-    MKLDNN_CVT(BF16, I64), MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, BOOL),
-    MKLDNN_CVT(BOOL, U8),  MKLDNN_CVT(BOOL, I8),   MKLDNN_CVT(BOOL, U16),
-    MKLDNN_CVT(BOOL, I16), MKLDNN_CVT(BOOL, I32),  MKLDNN_CVT(BOOL, U64),
-    MKLDNN_CVT(BOOL, I64), MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, BF16),
-    MKLDNN_CVT(FP64, U8),  MKLDNN_CVT(FP64, I8),   MKLDNN_CVT(FP64, U16),
-    MKLDNN_CVT(FP64, I16), MKLDNN_CVT(FP64, I32),  MKLDNN_CVT(FP64, U64),
-    MKLDNN_CVT(FP64, I64), MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL),
-    MKLDNN_CVT(U32, U8),  MKLDNN_CVT(U32, I8),   MKLDNN_CVT(U32, U16),
-    MKLDNN_CVT(U32, I16), MKLDNN_CVT(U32, I32),  MKLDNN_CVT(U32, U64),
-    MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, BOOL));
-
-    if (!ctx.converted)
-        IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc;
 }

 #undef MKLDNN_CVT
+#undef MKLDNN_CVT_LIST
--- a/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.h
@ -19,5 +19,32 @@
 * number of elements in buffers to be converted
 * @return none.
 */
+void cpu_convert(const void *srcPtr,
+                 void *dstPtr,
+                 InferenceEngine::Precision srcPrc,
+                 InferenceEngine::Precision dstPrc,
+                 const size_t size);

-void cpu_convert(const void *srcPtr, void *dstPtr, InferenceEngine::Precision srcPrc, InferenceEngine::Precision dstPrc, const size_t size);
+/**
+ * @brief Copy size elements from buffer specified srcPtr pointer to buffer specified dstPtr.
+ * If the precisions srcPrc and dstPrc are different, a conversion from srcPrc to dstPrc is performed.
+ * @param srcPtr
+ * pointer to the buffer to convert from
+ * @param dstPtr
+ * pointer to the buffer to convert to
+ * @param srcPrc
+ * precision the buffer from which convert
+ * @param interimPrc
+ * intermediate precision used for type truncation
+ * @param dstPrc
+ * precision the buffer to which convert
+ * @param size
+ * number of elements in buffers to be converted
+ * @return none.
+ */
+void cpu_convert(const void *srcPtr,
+                 void *dstPtr,
+                 InferenceEngine::Precision srcPrc,
+                 InferenceEngine::Precision interimPrc,
+                 InferenceEngine::Precision dstPrc,
+                 const size_t size);
--- a/inference-engine/src/mkldnn_plugin/nodes/common/dnnl_executor.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/dnnl_executor.cpp
@ -0,0 +1,45 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "dnnl_executor.h"
+
+using namespace mkldnn;
+using namespace MKLDNNPlugin;
+
+DnnlExecutor::IntermReorder::IntermReorder(const mkldnn::memory::desc& descSrc,
+                                           const mkldnn::memory::desc& descDst,
+                                           const mkldnn::engine& engine) : m_descSrc(descSrc), m_descDst(descDst) {
+    auto reorderPd = mkldnn::reorder::primitive_desc(engine, descSrc, engine, descDst);
+    m_reorder = mkldnn::reorder(reorderPd);
+}
+
+void DnnlExecutor::IntermReorder::exec(mkldnn::memory& memSrc, mkldnn::memory& memDst, mkldnn::stream strm) {
+    m_reorder.execute(strm, memSrc, memDst);
+}
+
+void DnnlExecutor::exec(std::unordered_map<int, mkldnn::memory> primArgs, mkldnn::stream strm) {
+    for (auto &inReorder : inputReorders) {
+        if (primArgs.count(inReorder.first)) {
+            mkldnn::memory memDst(inReorder.second.getDstDesc(), strm.get_engine());
+            inReorder.second.exec(primArgs[inReorder.first], memDst, strm);
+            primArgs[inReorder.first] = memDst;
+        } else {
+            IE_THROW() << "DnnlExecutor has reorder for input " << inReorder.first << ", but doesn't have source memory";
+        }
+    }
+    std::unordered_map<int, mkldnn::memory> outputMem;
+    for (auto &outReorder : outputReorders) {
+        if (primArgs.count(outReorder.first)) {
+            mkldnn::memory memSrc(outReorder.second.getSrcDesc(), strm.get_engine());
+            outputMem[outReorder.first] = primArgs[outReorder.first];
+            primArgs[outReorder.first] = memSrc;
+        } else {
+            IE_THROW() << "DnnlExecutor has reorder for output " << outReorder.first << ", but doesn't have destination memory";
+        }
+    }
+    (*execPrim).execute(strm, primArgs);
+    for (auto &outReorder : outputReorders) {
+        outReorder.second.exec(primArgs[outReorder.first], outputMem[outReorder.first], strm);
+    }
+}
--- a/inference-engine/src/mkldnn_plugin/nodes/common/dnnl_executor.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/dnnl_executor.h
@ -0,0 +1,39 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "mkldnn_memory.h"
+#include "mkldnn_primitive.h"
+
+namespace MKLDNNPlugin {
+
+class DnnlExecutor {
+    protected:
+        class IntermReorder {
+            public:
+                IntermReorder(const mkldnn::memory::desc& descSrc, const mkldnn::memory::desc& descDst, const mkldnn::engine& engine);
+                void exec(mkldnn::memory& memSrc, mkldnn::memory& memDst, mkldnn::stream strm);
+                const mkldnn::memory::desc& getSrcDesc() const { return m_descSrc; }
+                const mkldnn::memory::desc& getDstDesc() const { return m_descDst; }
+
+            private:
+                mkldnn::reorder m_reorder;
+                mkldnn::memory::desc m_descSrc;
+                mkldnn::memory::desc m_descDst;
+        };
+
+    public:
+        void exec(std::unordered_map<int, mkldnn::memory> primArgs, mkldnn::stream strm);
+        virtual ~DnnlExecutor() = default;
+
+    protected:
+        DnnlExecutor() = default;
+        MKLDNNPrimitive execPrim;
+        // key is the port number for the primitive that needs memory reordering
+        std::unordered_map<int, IntermReorder> inputReorders;
+        std::unordered_map<int, IntermReorder> outputReorders;
+};
+
+}  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp
@ -145,6 +145,10 @@ void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() {
    }
 }

+void MKLDNNAdaptivePoolingNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
+}
+
 void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) {
    auto inputPrec = getParentEdgeAt(0)->getMemory().GetDataType();
    auto outputPrec = getChildEdgeAt(0)->getMemory().GetDataType();
@ -283,8 +287,6 @@ bool MKLDNNAdaptivePoolingNode::created() const {
    return getType() == AdaptivePooling;
 }

-void MKLDNNAdaptivePoolingNode::createPrimitive() {}
-
 inline void MKLDNNAdaptivePoolingNode::setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength) {
    *(startPtr) = idx * inputLength / outputLength;
    *(endPtr) = ceil(static_cast<float>((idx + 1) * inputLength) / outputLength);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h
@ -18,7 +18,6 @@ public:

    void getSupportedDescriptors() override;
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

@ -36,7 +35,7 @@ protected:
    bool needShapeInfer() const override;
    std::vector<VectorDims> shapeInfer() const override;
    bool needPrepareParams() const override { return false; };
-    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
+    void executeDynamicImpl(mkldnn::stream strm) override;
 };

 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp
@ -225,6 +225,10 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() {
    });
 }

+void MKLDNNBatchToSpaceNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
+}
+
 void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) {
    switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size()) {
        case 1: batchToSpaceKernel<PrecisionTrait<Precision::U8>::value_type>();  break;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h
@ -18,12 +18,11 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override {};
    void execute(mkldnn::stream strm) override;
    bool created() const override;

    bool needPrepareParams() const override { return false; };
-    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
+    void executeDynamicImpl(mkldnn::stream strm) override;

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp
@ -107,14 +107,6 @@ void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() {
    supportedPrimitiveDescriptors = getSupportedConfigs(this);
 }

-void MKLDNNBroadcastNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        if (needPrepareParams())
-            prepareParams();
-        updateLastInputDims();
-    }
-}
-
 bool MKLDNNBroadcastNode::needPrepareParams() const {
    return needPrepareParamsVar;
 }
@ -215,6 +207,14 @@ std::vector<VectorDims> MKLDNNBroadcastNode::shapeInfer() const {
    return newOutputShapes;
 }

+bool MKLDNNBroadcastNode::isExecutable() const {
+    return !isInputTensorAtPortEmpty(0);
+}
+
+void MKLDNNBroadcastNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
+}
+
 void MKLDNNBroadcastNode::execute(mkldnn::stream strm) {
    if (optimizedCase) {
        optimizedExecute(getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr());
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h
@ -19,13 +19,11 @@ public:

    void getSupportedDescriptors() override;
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
-    void executeDynamicImpl(mkldnn::stream strm) override {
-        execute(strm);
-    }
+    void executeDynamicImpl(mkldnn::stream strm) override;
    bool created() const override;

+    bool isExecutable() const override;
    static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;

 protected:
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp
@ -203,12 +203,8 @@ void MKLDNNBucketizeNode::prepareParams() {
        std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), size_t(1), std::multiplies<size_t>());
 }

-void MKLDNNBucketizeNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        if (needPrepareParams())
-            prepareParams();
-        updateLastInputDims();
-    }
+bool MKLDNNBucketizeNode::isExecutable() const {
+    return !isInputTensorAtPortEmpty(0);
 }

 std::vector<VectorDims> MKLDNNBucketizeNode::shapeInfer() const {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.h
@ -15,15 +15,16 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;
    void executeDynamicImpl(mkldnn::stream strm) override {
        execute(strm);
    }
+
    void prepareParams() override;
    std::vector<VectorDims> shapeInfer() const override;

+    bool isExecutable() const override;
    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

 private:
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
@ -31,6 +31,10 @@ namespace {
    constexpr size_t channelAxis = 1lu;
 }

+bool MKLDNNConcatNode::isExecutable() const {
+    return !hasEmptyOutputTensors() && !isOptimized();
+}
+
 bool MKLDNNConcatNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
        const auto concatOp = ngraph::as_type_ptr<const ngraph::op::v0::Concat>(op);
@ -173,7 +177,7 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
    }

    // TODO [DS]: inplace
-    if (!canBeInPlace)
+    if (!canBeInPlace || std::any_of(inputShapes.begin(), inputShapes.end(), [](const Shape& shape) { return shape.hasZeroDims(); }))
        return;

    // Optimized inplace case
@ -353,7 +357,6 @@ void MKLDNNConcatNode::prepareParams() {
        IE_THROW() << "Preferable primitive descriptor is not set.";

    std::vector<memory::desc> srcs_d;
-
    for (size_t i = 0; i < getParentEdges().size(); i++) {
        const auto& srcMemPtr = getParentEdgesAtPort(i)[0]->getMemoryPtr();
        if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr()) {
@ -362,6 +365,10 @@ void MKLDNNConcatNode::prepareParams() {
                               << getName() << ".";
        }

+        if (srcMemPtr->GetShape().hasZeroDims()) {
+            continue;
+        }
+
        auto desc = srcMemPtr->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
        const auto& dims = srcMemPtr->getStaticDims();
        for (size_t j = 0; j < dims.size(); j++) {
@ -382,14 +389,6 @@ void MKLDNNConcatNode::prepareParams() {
    prim.reset(new concat(primitive_desc));
 }

-void MKLDNNConcatNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        if (needPrepareParams())
-            prepareParams();
-        updateLastInputDims();
-    }
-}
-
 size_t MKLDNNConcatNode::inverseOrder(const SizeVector& order, size_t axis) {
    for (size_t i = 0; i < order.size(); i++) {
        if (axis == order[i]) {
@ -489,16 +488,23 @@ void MKLDNNConcatNode::execute(mkldnn::stream strm) {
        return;
    }

+    const MKLDNNMemory& dst_memory = getChildEdgeAt(0)->getMemory();
    if (canOptimizeNspc) {
        execNspcSpecCase();
        return;
    }

-    const MKLDNNMemory& dst_memory = getChildEdgeAt(0)->getMemory();
    const size_t num_src = getParentEdges().size();
    std::unordered_map<int, memory> mem_ags {{DNNL_ARG_DST, dst_memory.GetPrimitive()}};
-    for (int i = 0; i < num_src; i++)
-        mem_ags[DNNL_ARG_MULTIPLE_SRC + i] = getParentEdgeAt(i)->getMemory().GetPrimitive();
+    size_t nonZeroInShapes = 0;
+    for (int i = 0; i < num_src; i++) {
+        const auto& srcMem = getParentEdgesAtPort(i)[0]->getMemory();
+        if (srcMem.GetShape().hasZeroDims()) {
+            continue;
+        }
+        mem_ags[DNNL_ARG_MULTIPLE_SRC + nonZeroInShapes] = srcMem.GetPrimitive();
+        nonZeroInShapes++;
+    }

    (*prim).execute(strm, mem_ags);
 }
@ -518,21 +524,32 @@ void MKLDNNConcatNode::execNspcSpecCase() {
    std::vector<const uint8_t*> src_ptrs;
    std::vector<uint8_t*> dst_ptrs;

+    size_t nonZeroInShapes = 0;
+    int firstNonZeroEdge = -1;
    for (size_t i = 0; i < num_src; i++) {
-        const MKLDNNMemory& src_mem = getParentEdgeAt(i)->getMemory();
+        const MKLDNNMemory& src_mem = getParentEdgesAtPort(i)[0]->getMemory();
+        if (src_mem.GetShape().hasZeroDims()) {
+            continue;
+        }
        const size_t num_channels = src_mem.getStaticDims()[channelAxis];

        channelsDataSize.push_back(num_channels * dataSize);
        src_ptrs.push_back(reinterpret_cast<const uint8_t*>(src_mem.GetData()));
        dst_ptrs.push_back(dst_ptr + channels_size);
        channels_size += num_channels * dataSize;
+
+        if (firstNonZeroEdge == -1) {
+            firstNonZeroEdge = i;
+        }
+
+        nonZeroInShapes++;
    }

-    const size_t iter_count = getParentEdgeAt(0)->getMemory().GetSize() / channelsDataSize[0];
+    const size_t iter_count = getParentEdgeAt(firstNonZeroEdge)->getMemory().GetSize() / channelsDataSize[0];

    parallel_for(iter_count, [&](int i) {
        const size_t dst_off = i * channels_size;
-        for (int j = 0; j < num_src; j++) {
+        for (int j = 0; j < nonZeroInShapes; j++) {
            cpu_memcpy(dst_ptrs[j] + dst_off, src_ptrs[j] + i * channelsDataSize[j], channelsDataSize[j]);
        }
    });
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h
@ -19,7 +19,6 @@ public:
    void getSupportedDescriptors() override;
    void initSupportedPrimitiveDescriptors() override;
    void initOptimalPrimitiveDescriptor() override;
-    void createPrimitive() override;
    void selectOptimalPrimitiveDescriptor() override;
    bool created() const override;
    void execute(mkldnn::stream strm) override;
@ -28,10 +27,8 @@ public:
    bool isOptimized() const;

    InferenceEngine::Precision getRuntimePrecision() const override;
-    bool isExecutable() const override {
-        return !isOptimized();
-    }

+    bool isExecutable() const override;
    bool needPrepareParams() const override;
    void prepareParams() override;

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@ -330,48 +330,42 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
    }
 }

-void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false, bool initAsBinary = false) {
-    bool initBinaryMemory = initWeights;
+void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false) {
    mkldnn::post_ops ops;
+    bool useLegacyPostOps = true; // @todo remove after issue with performance of binary post ops fixed
+
+    auto getBinPostOpShape = [&](){
+        const auto outShape = getOutputShapeAtPort(0).getStaticDims();
+        const auto outShapeRank = getOutputShapeAtPort(0).getRank();
+        const auto chIdx = getFusingAxis();
+        std::vector<size_t> binaryShape(outShapeRank, 1);
+        binaryShape[chIdx] = outShape[chIdx];
+        return binaryShape;
+    };

    for (auto &node : fusedWith) {
        if (node->getType() == Split || node->getType() == Concatenation)
            continue;

-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-        if (eltwiseNode) {
+        if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
            if (eltwiseNode->isSpecialConvolutionAddFusing()) {
                ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
            } else {
-                constexpr int align = 16;
-                eltwiseNode->appendPostOps(ops, dims, align, initAsBinary, initBinaryMemory);
-                if (initBinaryMemory) {
-                    if (eltwiseNode->scalesMemory)
-                        binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive());
-                    if (eltwiseNode->shiftsMemory)
-                        binaryPostOpsArgs.push_back(eltwiseNode->shiftsMemory->GetPrimitive());
+                if (useLegacyPostOps || eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
+                    constexpr int align = 16;
+                    eltwiseNode->appendPostOps(ops, dims, align);
+                } else {
+                    eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
                }
            }
            continue;
        }

-        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
-        if (fakeQuantizeNode) {
-            constexpr int align = -1;
-            fakeQuantizeNode->appendPostOps(ops, dims, align, initAsBinary, initBinaryMemory);
-            if (initBinaryMemory) {
-                if (fakeQuantizeNode->cropHighMemory)
-                    binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive());
-                if (fakeQuantizeNode->cropLowMemory)
-                    binaryPostOpsArgs.push_back(fakeQuantizeNode->cropLowMemory->GetPrimitive());
-                if (fakeQuantizeNode->inputScaleMemory)
-                    binaryPostOpsArgs.push_back(fakeQuantizeNode->inputScaleMemory->GetPrimitive());
-                if (fakeQuantizeNode->inputShiftMemory)
-                    binaryPostOpsArgs.push_back(fakeQuantizeNode->inputShiftMemory->GetPrimitive());
-                if (fakeQuantizeNode->outputScaleMemory)
-                    binaryPostOpsArgs.push_back(fakeQuantizeNode->outputScaleMemory->GetPrimitive());
-                if (fakeQuantizeNode->outputShiftMemory)
-                    binaryPostOpsArgs.push_back(fakeQuantizeNode->outputShiftMemory->GetPrimitive());
+        if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
+            if (useLegacyPostOps) {
+                fakeQuantizeNode->appendPostOps(ops, dims);
+            } else {
+                fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
            }
            continue;
        }
@ -416,7 +410,6 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
    // attr[1] - binary
    mkldnn::primitive_attr attrs[1];
    setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims());
-//    setPostOps(attrs[1], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false, true);

    bool containJitImpl = false;

@ -494,15 +487,6 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
    }
 }

-
-void MKLDNNConvolutionNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        if (needPrepareParams())
-            prepareParams();
-        updateLastInputDims();
-    }
-}
-
 bool MKLDNNConvolutionNode::created() const {
    return getType() == Convolution;
 }
@ -552,7 +536,14 @@ MKLDNNConvolutionNode::createDescriptorInternal(const mkldnn::memory::desc& inpu

 void MKLDNNConvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
                                             const std::vector<MemoryDescPtr>& outputDesc) {
-    auto inpDesc = inputDesc[0]->isDefined() ? inputDesc[0] : MemoryDescUtils::makeDummyDesc(*inputDesc[0]);
+    MemoryDescPtr inpDesc;
+    if (inputDesc[0]->isDefined()) {
+        inpDesc = inputDesc[0];
+    } else {
+        auto dummyInDims = MemoryDescUtils::makeDummyShape(inputDesc[0]->getShape()).getStaticDims();
+        dummyInDims[1] = IC;
+        inpDesc = inputDesc[0]->cloneWithNewDims(dummyInDims);
+    }
    DnnlMemoryDescPtr definedInpMemDesc = MemoryDescUtils::convertToDnnlMemoryDesc(inpDesc);
    DnnlMemoryDescPtr definedOutMemDesc;

@ -630,7 +621,6 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) {
    // attr[1] - binary
    mkldnn::primitive_attr attrs[1];
    setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims());
-//    setPostOps(attrs[1], false, true);

    auto rightConfig = selectedPD->getConfig();
    size_t selected_count = 0;
@ -914,25 +904,63 @@ InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEn
    return internalBlob;
 }

+std::shared_ptr<MKLDNNDescriptor> MKLDNNConvolutionNode::createMkldnnConvDesc(const mkldnn::memory::desc& srcDesc,
+                                                                              const mkldnn::memory::desc& wghDesc,
+                                                                              const mkldnn::memory::desc& dstDesc,
+                                                                              const mkldnn::memory::desc& biasDesc) {
+    std::shared_ptr<mkldnn::convolution_forward::desc> dnnlConvDesc;
+    auto alg = isWinograd() ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
+
+    if (withBiases) {
+        // WA to align IR bias representation (3 to 5 rank tensors) to oneDNN representation (1 rank tensor)
+        mkldnn::memory::desc dnnlBiasDesc = biasDesc.reshape(MKLDNNExtensionUtils::convertToDnnlDims(biasesDims));
+        return std::make_shared<MKLDNNDescriptor>(createDescriptorInternal(srcDesc,
+                                                  wghDesc,
+                                                  dnnlBiasDesc,
+                                                  dstDesc,
+                                                  alg));
+    } else {
+        return std::make_shared<MKLDNNDescriptor>(createDescriptorInternal(srcDesc,
+                                                  wghDesc,
+                                                  dstDesc,
+                                                  alg));
+    }
+}
+
 void MKLDNNConvolutionNode::prepareParams() {
+    auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
+    auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr();
+    auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr();
+    if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
+        IE_THROW() << "Destination memory didn't allocate.";
+    if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
+        IE_THROW() << "Input memory didn't allocate.";
+    if (!wghMemPtr || !wghMemPtr->GetPrimitivePtr())
+        IE_THROW() << "Weight memory didn't allocate.";
+    MKLDNNMemoryPtr biasMemPtr = nullptr;
+    if (withBiases) {
+        biasMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr();
+        if (!biasMemPtr || !biasMemPtr->GetPrimitivePtr())
+            IE_THROW() << "Input memory didn't allocate.";
+    }
+
    const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
    if (selected_pd == nullptr)
        IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";

-    auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
-    auto weightMemoryDesc = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
-    auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
+    auto inMemoryDesc = srcMemPtr->GetDescWithType<DnnlMemoryDesc>();
+    auto weightMemoryDesc = wghMemPtr->GetDescWithType<DnnlMemoryDesc>();
+    auto outMemoryDesc = dstMemPtr->GetDescWithType<DnnlMemoryDesc>();
+    mkldnn::memory::desc biasDesc;
+    if (biasMemPtr) {
+        biasDesc = biasMemPtr->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
+    }

    auto initPrimitiveAttr = [&]() {
        mkldnn::primitive_attr attr;
        addZeroPoints(attr);
+        setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true);

-        // todo: [AV] delete "false" to use binary mechanism
-        if (false && getSelectedPrimitiveDescriptor()->getImplementationType() == jit_gemm) {
-            setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true, true);
-        } else {
-            setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true);
-        }
        return std::make_shared<mkldnn::primitive_attr>(std::move(attr));
    };

@ -947,61 +975,95 @@ void MKLDNNConvolutionNode::prepareParams() {
        pAttrLocal = initPrimitiveAttr();
    }

-    std::shared_ptr<mkldnn::convolution_forward::desc> dnnlConvDesc;
-    auto alg = isWinograd() ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
+    std::shared_ptr<MKLDNNDescriptor> desc = createMkldnnConvDesc(inMemoryDesc->getDnnlDesc(),
+                                                                  weightMemoryDesc->getDnnlDesc(),
+                                                                  outMemoryDesc->getDnnlDesc(),
+                                                                  biasDesc);

-    if (withBiases) {
-        auto biasMemoryDesc = getParentEdgesAtPort(2).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
-        // WA to align IR bias representation (3 to 5 rank tensors) to oneDNN representation (1 rank tensor)
-        mkldnn::memory::desc dnnlBiasDesc = biasMemoryDesc->getDnnlDesc().reshape(MKLDNNExtensionUtils::convertToDnnlDims(biasesDims));
-        dnnlConvDesc = createDescriptorInternal(inMemoryDesc->getDnnlDesc(),
-                                                weightMemoryDesc->getDnnlDesc(),
-                                                dnnlBiasDesc,
-                                                outMemoryDesc->getDnnlDesc(),
-                                                alg);
-    } else {
-        dnnlConvDesc = createDescriptorInternal(inMemoryDesc->getDnnlDesc(),
-                                                weightMemoryDesc->getDnnlDesc(),
-                                                outMemoryDesc->getDnnlDesc(),
-                                                alg);
-    }
-
-    MKLDNNDescriptor desc(dnnlConvDesc);
-
-    auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), *pAttrLocal);
+    auto itpd = desc->createPrimitiveDescriptorIterator(getEngine(), *pAttrLocal);

    convolution_forward::primitive_desc prim_desc;
-    while (static_cast<bool>(itpd))  {
+
+    execPtr = nullptr;
+    while (static_cast<bool>(itpd)) {
        impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());

        if (impl_type == selected_pd->getImplementationType()) {
            prim_desc = convolution_forward::primitive_desc(itpd.get());
+            execPtr = std::make_shared<ConvolutionExecutor>(prim_desc,
+                                                            srcMemPtr->GetPrimitive().get_desc(),
+                                                            wghMemPtr->GetPrimitive().get_desc(),
+                                                            dstMemPtr->GetPrimitive().get_desc(),
+                                                            getEngine());
            break;
        }
-        if (!itpd.next_impl())
-            IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
+
+        if (!itpd.next_impl()) {
+            auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()),
+                                                                                       srcMemPtr->GetDataType(),
+                                                                                       memory::format_tag::any);
+            auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()),
+                                                                                        wghMemPtr->GetDataType(),
+                                                                                        memory::format_tag::any);
+            auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()),
+                                                                                        dstMemPtr->GetDataType(),
+                                                                                        memory::format_tag::any);
+
+            std::shared_ptr<MKLDNNDescriptor> reorderConvDesc = createMkldnnConvDesc(inDesc, wghDesc, outDesc, biasDesc);
+            auto reordItpd = reorderConvDesc->createPrimitiveDescriptorIterator(getEngine(), *pAttrLocal);
+            if (static_cast<bool>(reordItpd)) {
+                auto prim_desc = convolution_forward::primitive_desc(reordItpd.get());
+                execPtr = std::make_shared<ConvolutionExecutor>(prim_desc, srcMemPtr->GetPrimitive().get_desc(),
+                                                                wghMemPtr->GetPrimitive().get_desc(),
+                                                                dstMemPtr->GetPrimitive().get_desc(),
+                                                                getEngine());
+                break;
+            }
+        }
    }
+    if (execPtr) {
+        primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive();
+        primArgs[DNNL_ARG_WEIGHTS] = wghMemPtr->GetPrimitive();
+        primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive();

-    prim.reset(new convolution_forward(prim_desc));
+        if (withBiases) {
+            primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive();
+        }

-    primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-    primArgs[DNNL_ARG_WEIGHTS] = getWeights();
-    primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-
-    if (withBiases) {
-        primArgs[DNNL_ARG_BIAS] = getBias();
+        MKLDNNNode::appendPostOpArgs(*pAttrLocal, primArgs, binaryPostOpsArgs);
+    } else {
+        IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
    }
-// todo: [AV] uncomment to use binary mechanism
-//    auto post_ops = attr.get_post_ops();
-//    int idx = 0;
-//    for (int i = 0; i < post_ops.len(); i++) {
-//        if (post_ops.kind(i) == mkldnn::primitive::kind::binary) {
-//            primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]});
-//        }
-//    }
 }

-void MKLDNNConvolutionNode::executeDynamicImpl(dnnl::stream strm) {
+MKLDNNConvolutionNode::ConvolutionExecutor::ConvolutionExecutor(const mkldnn::convolution_forward::primitive_desc& pd,
+                                                                const mkldnn::memory::desc& inMemDesc,
+                                                                const mkldnn::memory::desc& weightMemDesc,
+                                                                const mkldnn::memory::desc& outMemDesc,
+                                                                const mkldnn::engine& engine) {
+    execPrim.reset(new mkldnn::convolution_forward(pd));
+
+    if (inMemDesc != pd.src_desc()) {
+        inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)});
+    }
+
+    if (weightMemDesc != pd.weights_desc()) {
+        inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)});
+    }
+
+    if (outMemDesc != pd.dst_desc()) {
+        outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)});
+    }
+}
+
+void MKLDNNConvolutionNode::execute(mkldnn::stream strm) {
+    if (!execPtr) {
+        IE_THROW() << "Can't execute Convolution node with name: " << getName() << ", because executor is not compiled";
+    }
+    execPtr->exec(primArgs, strm);
+}
+
+void MKLDNNConvolutionNode::executeDynamicImpl(mkldnn::stream strm) {
    execute(strm);
 }

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
@ -9,6 +9,7 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include "common/dnnl_executor.h"

 namespace MKLDNNPlugin {

@ -23,7 +24,6 @@ public:
    void createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
                          const std::vector<MemoryDescPtr>& outputDesc) override;
    void initDescriptor(const NodeConfig& config) override;
-    void createPrimitive() override;
    void selectOptimalPrimitiveDescriptor() override;
    void initSupportedPrimitiveDescriptors() override;
    void filterSupportedPrimitiveDescriptors() override;
@ -65,11 +65,29 @@ protected:
    InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const;

 private:
+    using executorPtr = std::shared_ptr<DnnlExecutor>;
+    executorPtr execPtr = nullptr;
+
+    class ConvolutionExecutor : public DnnlExecutor {
+        public:
+            ConvolutionExecutor(const mkldnn::convolution_forward::primitive_desc& pd,
+                                const mkldnn::memory::desc& inMemDesc,
+                                const mkldnn::memory::desc& weightMemDesc,
+                                const mkldnn::memory::desc& outMemDesc,
+                                const mkldnn::engine& engine);
+    };
+
+    std::shared_ptr<MKLDNNDescriptor> createMkldnnConvDesc(const mkldnn::memory::desc& srcDesc,
+                                                           const mkldnn::memory::desc& wghDesc,
+                                                           const mkldnn::memory::desc& dstDesc,
+                                                           const mkldnn::memory::desc& biasDesc);
+
    void prepareParams() override;
+    void execute(mkldnn::stream strm) override;
    void executeDynamicImpl(mkldnn::stream strm) override;

    void addZeroPoints(mkldnn::primitive_attr& attr) const;
-    void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights, bool initAsBinary);
+    void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights);
    void filterSupportedDescriptors();
    bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const;
    bool isNspcAvailable() const;
@ -122,4 +140,3 @@ private:
 };

 }  // namespace MKLDNNPlugin
-
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
@ -7,7 +7,8 @@
 #include "common/cpu_convert.h"
 #include "common/blocked_desc_creator.h"
 #include <ngraph/opsets/opset1.hpp>
-#include "utils/ngraph_utils.hpp"
+#include <ie_ngraph_utils.hpp>
+#include <utils/ngraph_utils.hpp>

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -26,14 +27,17 @@ bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph:
    return true;
 }

-MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(op, eng, cache) {
+MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+        : MKLDNNNode(op, eng, cache) {
    std::string errorMessage;
    if (isSupportedOperation(op, errorMessage)) {
        errorPrefix = "Convert node with name '" + getName() + "'";
    } else {
        IE_THROW(NotImplemented) << errorMessage;
    }
+
+    auto convert = ov::as_type_ptr<const ngraph::opset1::Convert>(op);
+    origPrc = details::convertPrecision(convert->get_destination_type());
 }

 std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
@ -42,7 +46,8 @@ std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {

 MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
                                     const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode("Convert", nodeName, eng, cache) {
+        : MKLDNNNode("Convert", nodeName, eng, cache)
+        , origPrc(outPrc) {
    inputShapes.push_back(shape);
    addOriginalInputPrecision(inPrc);
    outputShapes.push_back(shape);
@ -124,15 +129,8 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
    }
 }

-void MKLDNNConvertNode::createPrimitive() {
-    auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
-    auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
-    if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
-        IE_THROW() << errorPrefix << " has not allocated destination memory";
-    if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
-        IE_THROW() << errorPrefix << " has not allocated input memory";
-    if (getSelectedPrimitiveDescriptor() == nullptr)
-        IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor";
+void MKLDNNConvertNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
 }

 void MKLDNNConvertNode::execute(mkldnn::stream strm) {
@ -147,7 +145,13 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) {

    void* srcPtr = parentMem.GetPtr();
    void* dstPtr = childMem.GetPtr();
-    cpu_convert(srcPtr, dstPtr, parentMem.getDesc().getPrecision(), childMem.getDesc().getPrecision(), parentPaddElemCount);
+
+    cpu_convert(srcPtr,
+                dstPtr,
+                parentMem.getDesc().getPrecision(),
+                origPrc,
+                childMem.getDesc().getPrecision(),
+                parentPaddElemCount);
 }

 bool MKLDNNConvertNode::created() const {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
@ -19,9 +19,8 @@ public:

    void getSupportedDescriptors() override;
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
-    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+    void executeDynamicImpl(mkldnn::stream strm) override;
    bool created() const override;
    bool canBeInPlace() const override {
        return false;
@ -49,6 +48,7 @@ public:
 private:
    MemoryDescPtr input;
    MemoryDescPtr output;
+    InferenceEngine::Precision origPrc;

    std::string errorPrefix;
 };
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp
@ -165,14 +165,8 @@ bool MKLDNNCTCGreedyDecoderNode::created() const {
    return getType() == CTCGreedyDecoder;
 }

-void MKLDNNCTCGreedyDecoderNode::executeDynamicImpl(dnnl::stream strm) {
-    MKLDNNCTCGreedyDecoderNode::execute(strm);
-}
-
-void MKLDNNCTCGreedyDecoderNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        updateLastInputDims();
-    }
+void MKLDNNCTCGreedyDecoderNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
 }

 bool MKLDNNCTCGreedyDecoderNode::needPrepareParams() const {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.h
@ -15,7 +15,6 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;
    void executeDynamicImpl(dnnl::stream strm) override;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp
@ -168,14 +168,8 @@ bool MKLDNNCTCGreedyDecoderSeqLenNode::created() const {
    return getType() == CTCGreedyDecoderSeqLen;
 }

-void MKLDNNCTCGreedyDecoderSeqLenNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        updateLastInputDims();
-    }
-}
-
-void MKLDNNCTCGreedyDecoderSeqLenNode::executeDynamicImpl(dnnl::stream strm) {
-    MKLDNNCTCGreedyDecoderSeqLenNode::execute(strm);
+void MKLDNNCTCGreedyDecoderSeqLenNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
 }

 bool MKLDNNCTCGreedyDecoderSeqLenNode::needPrepareParams() const {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.h
@ -15,7 +15,6 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;
    void executeDynamicImpl(dnnl::stream strm) override;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp
@ -57,12 +57,8 @@ void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() {
                         impl_desc_type::ref_any);
 }

-void MKLDNNCTCLossNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        if (needPrepareParams())
-            prepareParams();
-        updateLastInputDims();
-    }
+void MKLDNNCTCLossNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
 }

 void MKLDNNCTCLossNode::execute(mkldnn::stream strm) {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.h
@ -15,13 +15,12 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

-    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); };
+    void executeDynamicImpl(mkldnn::stream strm) override;
    bool needPrepareParams() const override { return false; };

 private:
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp
@ -12,6 +12,7 @@
 #include "ie_precision.hpp"
 #include <ie_ngraph_utils.hpp>
 #include "mkldnn_cum_sum_node.h"
+#include "utils/bfloat16.hpp"

 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@ -70,8 +71,7 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() {
        return;

    dataPrecision = getOriginalInputPrecisionAtPort(CUM_SUM_DATA);
-    if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
-        dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
+    if (!one_of(dataPrecision, Precision::I8, Precision::U8, Precision::I16, Precision::BF16, Precision::I32, Precision::FP32, Precision::I64, Precision::U64))
        IE_THROW() << errorPrefix << " has unsupported 'data' input precision: " << dataPrecision.name();

    if (inputShapes.size() == numOfInputs) {
@ -95,43 +95,17 @@ void MKLDNNCumSumNode::execute(mkldnn::stream strm) {
    if (inputShapes.size() == numOfInputs)
        axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory());

-    switch (dataPrecision) {
-        case Precision::I8   : {
-            exec<int8_t>();
-            break;
-        }
-        case Precision::U8   : {
-            exec<uint8_t>();
-            break;
-        }
-        case Precision::I16  : {
-            exec<int16_t>();
-            break;
-        }
-        case Precision::I32  : {
-            exec<int32_t>();
-            break;
-        }
-        case Precision::FP32 : {
-            exec<float>();
-            break;
-        }
-        case Precision::I64  : {
-            exec<int64_t>();
-            break;
-        }
-        case Precision::U64  : {
-            exec<uint64_t>();
-            break;
-        }
-        default : {
-            std::string errorMsg = errorPrefix + " has unsupported 'data' input precision: " + dataPrecision.name();
-            IE_THROW() << errorMsg;
-        }
-    }
+    OV_SWITCH(MKLDNNPlugin, CumSumExecute, this, dataPrecision,
+              OV_CASE(Precision::I8, int8_t),
+              OV_CASE(Precision::U8, uint8_t),
+              OV_CASE(Precision::I16, int16_t),
+              OV_CASE(Precision::BF16, bfloat16_t),
+              OV_CASE(Precision::I32, int32_t),
+              OV_CASE(Precision::FP32, float),
+              OV_CASE(Precision::I64, int64_t),
+              OV_CASE(Precision::U64, uint64_t))
 }

-
 template <typename dataType>
 void MKLDNNCumSumNode::exec() {
    const auto *input = reinterpret_cast<const dataType *>(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr());
@ -284,13 +258,7 @@ bool MKLDNNCumSumNode::needPrepareParams() const {
 }

 void MKLDNNCumSumNode::executeDynamicImpl(mkldnn::stream strm) {
-    return execute(strm);
-}
-
-void MKLDNNCumSumNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        updateLastInputDims();
-    }
+    execute(strm);
 }

 REG_MKLDNN_PRIM_FOR(MKLDNNCumSumNode, CumSum)
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h
@ -15,7 +15,6 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

@ -47,6 +46,13 @@ private:

    InferenceEngine::Precision dataPrecision;
    std::string errorPrefix;
+
+    template<typename T>
+    struct CumSumExecute {
+        void operator()(MKLDNNCumSumNode* node) {
+            node->exec<T>();
+        }
+    };
 };

 }  // namespace MKLDNNPlugin
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
@ -13,34 +13,38 @@
 #include <mkldnn_extension_utils.h>
 #include "ie_parallel.hpp"
 #include "utils/general_utils.h"
-#include <ngraph/opsets/opset1.hpp>
 #include <cpu/x64/cpu_isa_traits.hpp>
 #include <nodes/common/cpu_memcpy.h>
 #include <memory_desc/cpu_memory_desc_utils.h>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
 #include "utils/cpu_utils.hpp"

+#include <ngraph/opsets/opset1.hpp>
+#include <utils/shape_inference/static_shape.hpp>
+#include <utils/shape_inference/shape_inference.hpp>
+#include <ie_ngraph_utils.hpp>
+#include "convolution_shape_inference.hpp"
+
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;

 bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
-
        if (std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op) == nullptr &&
                std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op) == nullptr) {
            errorMessage = "Only opset1 ConvolutionBackpropData and GroupConvolutionBackpropData operations are supported";
            return false;
        }
-        size_t ndims = op->get_input_shape(0).size();
+        size_t ndims = op->get_input_partial_shape(0).rank().get_length();
        if ((ndims < 3) || (ndims > 5)) {
            errorMessage = "Only 3D, 4D and 5D blobs are supported as input";
            return false;
        }
+        if (op->get_input_partial_shape(1).is_dynamic() || (op->get_input_size() > 2 && op->get_input_partial_shape(2).is_dynamic())) {
+            errorMessage = "Doesn't support dynamic shapes for 'weights' and 'output_shape' inputs";
+            return false;
+        }
    } catch (...) {
        return false;
    }
@ -58,15 +62,14 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N

        auto convBackprop = std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op);
        auto groupConvBackprop = std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op);
-        const auto dataShape = op->get_input_shape(0);
-        weightDims = op->get_input_shape(1);
-        const auto outShape = op->get_shape();
-        OC = outShape[1];
-        IC = dataShape[1];
+        const auto& weightDims = getWeightDims();

        if (convBackprop) {
            algorithm = DeconvolutionCommon;

+            IC = weightDims[0];
+            OC = weightDims[1];
+
            groupNum = 1;
            withGroups = false;

@ -78,10 +81,17 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
            }
            paddingL = convBackprop->get_pads_begin();
            paddingR = convBackprop->get_pads_end();
+
+            outputPadding = convBackprop->get_output_padding();
+
+            autoPad = one_of(convBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
        } else if (groupConvBackprop) {
            algorithm = DeconvolutionGrouped;

            groupNum = weightDims[0];
+            IC = groupNum * weightDims[1];
+            OC = groupNum * weightDims[2];
+
            withGroups = groupNum > 1;
            isDW = withGroups && groupNum == OC && groupNum == IC;

@ -93,10 +103,26 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
            }
            paddingL = groupConvBackprop->get_pads_begin();
            paddingR = groupConvBackprop->get_pads_end();
+
+            outputPadding = groupConvBackprop->get_output_padding();
+
+            autoPad = one_of(groupConvBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
        }
        for (int i = 0; i < dilation.size(); i++) {
            kernel.push_back(weightDims[withGroups + 2 + i]);
        }
+
+        externOutShape = inputShapes.size() == 3;
+        if (externOutShape && isDynamicNode()) {
+            bool isConstOutShape = ngraph::is_type<ov::op::v0::Constant>(op->get_input_node_shared_ptr(2));
+            if (isConstOutShape) {
+                lastOutputSpatialDims = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(2))->cast_vector<int32_t>();
+            }
+            const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2;
+            if (getInputShapeAtPort(2).getStaticDims()[0] != spDimsNum || (isConstOutShape && lastOutputSpatialDims.size() != spDimsNum)) {
+                IE_THROW() << "'output_shape' input has incorrect number of elements. Expected = " << spDimsNum;
+            }
+        }
    } else {
        IE_THROW(NotImplemented) << errorMessage;
    }
@ -113,14 +139,6 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE
    auto const blbSize = blb->GetSize();

    // WA: In int8 case, we are processing weights using internal blob.
-    // So we disconnect constant node containing weights from the graph and then don't use it.
-    if (getParentEdges().size() == 3) {
-        removeEdge(getParentEdgeAt(2));
-        inputShapes.erase(inputShapes.begin() + 2);
-    }
-    removeEdge(getParentEdgeAt(1));
-    inputShapes.erase(inputShapes.begin() + 1);
-
    InferenceEngine::SizeVector dimsForBlockedDesc{dims};
    std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]);

@ -157,19 +175,19 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const {
        return false;
    }

-    // todo: [antonvor] added these checks to fix performance problems
-    if (kernel.size() == 3)
-        return false;
    if (!withGroups && stride.back() > 3)
        return false;
    if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) {
-        auto inDims = getOutputShapeAtPort(0).getStaticDims();
+        const auto& inMaxDims = getOutputShapeAtPort(0).getMaxDims();
+        if (std::any_of(inMaxDims.begin(), inMaxDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; })) {
+            return false;
+        }
        // heuristicConst = 2^26
        // heuristicParam = IC^2 * SP
        auto heuristicConst = 67108864;
        auto heuristicParam = IC * IC;
-        for (int i = 2; i < inDims.size(); i++)
-            heuristicParam *= inDims[i];
+        for (int i = 2; i < inMaxDims.size(); i++)
+            heuristicParam *= inMaxDims[i];
        if (heuristicParam > heuristicConst)
            return false;
    }
@ -206,10 +224,65 @@ bool MKLDNNDeconvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
    return (fusedWith.empty() && node->canBePerformedAsScaleShift(this));
 }

-void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
-    if (!descs_fwd.empty() && !descs_bwd.empty())
-        return;
+void MKLDNNDeconvolutionNode::initPadding(std::shared_ptr<ngraph::Node> op, const Shape &inDims, const std::vector<int32_t>& outSpDims) {
+    std::vector<ov::StaticShape> input_shapes{inDims.getStaticDims(), getWeightDims()};
+    ov::StaticShape output_shape_input;
+    if (externOutShape) {
+        IE_ASSERT(outSpDims.size() == getInputShapeAtPort(2).getStaticDims()[0]);
+        input_shapes.push_back({outSpDims.size()});
+        for (size_t i = 0; i < outSpDims.size(); i++) {
+            output_shape_input.push_back(outSpDims[i]);
+        }
+    }

+    if (getAlgorithm() == DeconvolutionCommon) {
+        auto deconv = ngraph::as_type_ptr<ngraph::op::v1::ConvolutionBackpropData>(op);
+        IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 2));
+    } else if (getAlgorithm() == DeconvolutionGrouped) {
+        auto deconv = ngraph::as_type_ptr<ngraph::op::v1::GroupConvolutionBackpropData>(op);
+        IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 3));
+    }
+}
+
+std::pair<VectorDims, VectorDims> MKLDNNDeconvolutionNode::makeDummyInOutShape() {
+    auto inShape = MemoryDescUtils::makeDummyShape(getInputShapeAtPort(0));
+    auto outShape = getOutputShapeAtPort(0);
+
+    if (isDynamicNode()) {
+        if (externOutShape) {
+            if (lastOutputSpatialDims.empty()) {
+                const auto& shape = getOutputShapeAtPort(0);
+                lastOutputSpatialDims.resize(shape.getRank() - 2);
+
+                const auto& minDims = shape.getMinDims();
+                const auto& maxDims = shape.getMaxDims();
+                const auto& dims = shape.getDims();
+                for (size_t i = 0; i < dims.size() - 2; ++i) {
+                    lastOutputSpatialDims[i] = dims[i + 2] == Shape::UNDEFINED_DIM ? std::min(maxDims[i + 2],
+                                                                                              std::max(minDims[i + 2], static_cast<Dim>(64))) : dims[i + 2];
+                }
+            }
+            ov::CoordinateDiff pb = autoPad ? ov::CoordinateDiff(paddingL.size(), 0) : paddingL;
+            ov::CoordinateDiff pe = autoPad ? ov::CoordinateDiff(paddingR.size(), 0) : paddingR;
+
+            auto inputDims = inShape.getStaticDims();
+            const auto& weightDims = getWeightDims();
+            const size_t wghOffset = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
+            for (size_t i = 0; i < inputDims.size() - 2; i++) {
+                inputDims[2 + i] = ((lastOutputSpatialDims[i] - (dilation[i] + 1) *
+                                    (weightDims[wghOffset + 2 + i] - 1) - 1 + pb[i] + pe[i] - outputPadding[i])) /
+                                    stride[i] + 1;
+            }
+
+            inShape = Shape(inputDims);
+        }
+        initPadding(opToShapeInfer, inShape, lastOutputSpatialDims);
+        outShape = Shape(shapeInferInternal(inShape.getStaticDims(), lastOutputSpatialDims));
+    }
+    return {inShape.getStaticDims(), outShape.getStaticDims()};
+}
+
+void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
    isInt8 = canBeExecutedInInt8();

    InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0);
@ -239,21 +312,17 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
    if (getChildEdges().empty())
        IE_THROW() << errorPrefix << " has incorrect number of output edges";

-    for (int i = 0; i < paddingR.size(); i++) {
-        int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
-        int krn = weightDims[with_group + 2 + i];
-        int src = getOutputShapeAtPort(0).getStaticDims()[2 + i];
-        int dst = getInputShapeAtPort(0).getStaticDims()[2 + i];
-
-        krn = (krn - 1)*(dilation[i] + 1) + 1;
-        int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
-        paddingR[i] = (dst - calc_dst) * stride[i];
-    }
+    VectorDims inDims, outDims;
+    std::tie(inDims, outDims) = makeDummyInOutShape();
+    inShape = Shape(inDims);
+    Shape outShape(outDims);
+    initPaddingR(inShape, outShape);

    if (isInt8) {
+        int8WeightDims = getWeightDims();
        //  WA: if int8 deconvolution is supported, we create internal weights blob in IO format
-        std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]);
-        internalBlobs.push_back(createWeiBlobAsIO(weightDims));
+        std::swap(int8WeightDims[withGroups + 0], int8WeightDims[withGroups + 1]);
+        internalBlobs.push_back(createWeiBlobAsIO(int8WeightDims));
        auto format = getInputShapeAtPort(0).getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc;
        MemoryDescPtr in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getInputShapeAtPort(0), inputDataType, format);
        MemoryDescPtr out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getOutputShapeAtPort(0), outputDataType, format);
@ -265,23 +334,44 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
            createDescriptor({in_candidate}, {out_candidate});
        }
    }
-    setPostOps(attr);
+    setPostOps(attr, outShape.getStaticDims());
 }

-void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
+void MKLDNNDeconvolutionNode::initPaddingR(const Shape &inShape, const Shape &outShape) {
+    for (int i = 0; i < paddingR.size(); i++) {
+        int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
+        const auto& weightDims = getWeightDims();
+        int krn = weightDims[with_group + 2 + i];
+        int src = outShape.getStaticDims()[2 + i];
+        int dst = inShape.getStaticDims()[2 + i];
+
+        krn = (krn - 1)*(dilation[i] + 1) + 1;
+        int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
+        paddingR[i] = (dst - calc_dst) * stride[i];
+    }
+}
+
+void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) {
    mkldnn::post_ops ops;

+    auto getBinPostOpShape = [&](){
+        const auto outShapeRank = getOutputShapeAtPort(0).getRank();
+        const auto chIdx = getFusingAxis();
+        std::vector<size_t> binaryShape(outShapeRank, 1);
+        binaryShape[chIdx] = dims[chIdx];
+        return binaryShape;
+    };
+
    for (auto &node : fusedWith) {
-        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-        if (eltwiseNode) {
+        if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
            // TODO [DS]: change to shape from memory
            constexpr int align = 16;
-            eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
+            // use legacy depthwise since backprop convolution does not support binary post ops
+            eltwiseNode->appendPostOps(ops, dims, align);
            continue;
        }
-        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
-        if (fakeQuantizeNode) {
-            fakeQuantizeNode->appendPostOps(ops);
+        if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
+            fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
            continue;
        }
        IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
@ -334,78 +424,300 @@ bool MKLDNNDeconvolutionNode::created() const {
    return getType() == Deconvolution;
 }

-void MKLDNNDeconvolutionNode::createPrimitive() {
-    if (prim)
-        return;
+bool MKLDNNDeconvolutionNode::needShapeInfer() const {
+    if (inputShapesModified()) {
+        return true;
+    }
+    if (externOutShape) {
+        if (lastOutputSpatialDims != readOutputSpatialDims()) {
+            return true;
+        }
+    }

-    if (isInt8) {
-        auto prim_desc = createPrimitiveDescriptor<deconvolution_forward::primitive_desc,
-                deconvolution_forward::desc>(attr);
+    return false;
+}

-        prim.reset(new deconvolution_forward(prim_desc));
+std::vector<VectorDims> MKLDNNDeconvolutionNode::shapeInfer() const {
+    const auto &dataMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
+    std::vector<int32_t> outSpDims;
+    if (externOutShape) {
+        outSpDims = readOutputSpatialDims();
+    }
+    return {shapeInferInternal(dataMemPtr->getStaticDims(), outSpDims)};
+}

-        auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-        auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-        primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, internalBlobMemory[0]->GetPrimitive()}, {DNNL_ARG_DST, dst}};
-    } else {
-        auto prim_desc = createPrimitiveDescriptor<convolution_backward_data::primitive_desc,
-                convolution_backward_data::desc, convolution_forward::primitive_desc>(attr);
+VectorDims MKLDNNDeconvolutionNode::shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const {
+    std::vector<ov::StaticShape> inputShapes = {
+            inDims,
+            getWeightDims()
+    };

-        prim.reset(new convolution_backward_data(prim_desc));
+    std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>> inputValues;

-        auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-        auto weights = getParentEdgeAt(1)->getMemory().GetPrimitive();
-        auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
-        primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, weights}, {DNNL_ARG_DIFF_SRC, dst}};
+    if (externOutShape) {
+        if (outSpDims.size() != getInputShapeAtPort(2).getStaticDims()[0]) {
+            IE_THROW() << "Can't compute output shape for node with name: " << getName()
+                       << ", because the node has 'output_shape' input, but provided output spatial dims number is incorrect";
+        }
+        inputShapes.push_back({outSpDims.size()});
+        inputValues.insert({2, std::make_shared<ngraph::runtime::HostTensor>(ngraph::element::Type_t::i32,
+                                                                              inputShapes.back().to_shape(),
+                                                                              outSpDims.data())});
+    }
+
+    std::vector<ov::StaticShape> outputShapes(1);
+    shape_inference(opToShapeInfer.get(), inputShapes, outputShapes, inputValues);
+
+    return outputShapes.back().to_shape();
+}
+
+void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) {
+    if (!execPtr) {
+        IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled";
+    }
+    execPtr->exec(primArgs, strm);
+
+    if (externOutShape) {
+        lastOutputSpatialDims = readOutputSpatialDims();
    }
 }

-void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
-                                               const std::vector<MemoryDescPtr> &outputDesc) {
-    const auto in_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inputDesc[0]);
-    const auto out_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outputDesc[0]);
+std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
+                                                                                         const mkldnn::memory::desc& wghDesc,
+                                                                                         const mkldnn::memory::desc& dstDesc,
+                                                                                         bool isWinograd) const {
+    mkldnn::algorithm alg = isWinograd ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
+    std::shared_ptr<convolution_backward_data::desc> deconv_desc;
+    std::shared_ptr<convolution_forward::primitive_desc> fwd_conv_pd;
+    std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(srcDesc, wghDesc, dstDesc, alg);
+    if (fwd_conv_pd->get(true) == nullptr) {
+        IE_THROW() << "Forward convolution primitive descriptor is nullable for node with name: " << getName();
+    }
+    return std::make_shared<MKLDNNDescriptor>(deconv_desc, fwd_conv_pd);
+}

-    // grouping and autoblicking is not compatible
-    if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
-        return;
+std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
+                                                                                      const mkldnn::memory::desc& wghDesc,
+                                                                                      const mkldnn::memory::desc& dstDesc) const {
+    return std::make_shared<MKLDNNDescriptor>(createDescriptorInternalInt8(srcDesc, wghDesc, dstDesc));
+}

+void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
+                                               MKLDNNMemoryPtr srcMemPtr,
+                                               MKLDNNMemoryPtr wghMemPtr,
+                                               MKLDNNMemoryPtr dstMemPtr,
+                                               AttrPtr attr,
+                                               impl_desc_type selectedImpl) {
+    auto itpd = desc->createPrimitiveDescriptorIterator(getEngine(), *attr);
+
+    while (static_cast<bool>(itpd)) {
+        impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
+
+        if (impl_type == selectedImpl) {
+            if (isInt8) {
+                if (internalBlobMemory.empty()) {
+                    prepareMemory(itpd);
+                }
+                auto prim_desc = deconvolution_forward::primitive_desc(itpd.get());
+                execPtr = std::make_shared<DeconvExecutorInt8>(prim_desc,
+                                                               srcMemPtr->GetPrimitive().get_desc(),
+                                                               internalBlobMemory.front()->GetPrimitive().get_desc(),
+                                                               dstMemPtr->GetPrimitive().get_desc(),
+                                                               getEngine());
+            } else {
+                auto prim_desc = convolution_backward_data::primitive_desc(itpd.get());
+                execPtr = std::make_shared<DeconvExecutorDefault>(prim_desc,
+                                                                  srcMemPtr->GetPrimitive().get_desc(),
+                                                                  wghMemPtr->GetPrimitive().get_desc(),
+                                                                  dstMemPtr->GetPrimitive().get_desc(),
+                                                                  getEngine());
+            }
+            return;
+        }
+
+        if (!itpd.next_impl()) {
+            auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()),
+                                                                                       memory::data_type::f32,
+                                                                                       memory::format_tag::any);
+            auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()),
+                                                                                        memory::data_type::f32,
+                                                                                        memory::format_tag::any);
+            auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()),
+                                                                                        memory::data_type::f32,
+                                                                                        memory::format_tag::any);
+
+            std::shared_ptr<MKLDNNDescriptor> anyDeconvDesc = createDefaultMkldnnDeconvDesc(inDesc, wghDesc, outDesc, false);
+            auto anyDeconvItpd = anyDeconvDesc->createPrimitiveDescriptorIterator(getEngine(), *attr);
+            if (static_cast<bool>(anyDeconvItpd)) {
+                auto prim_desc = convolution_backward_data::primitive_desc(anyDeconvItpd.get());
+                execPtr = std::make_shared<DeconvExecutorDefault>(prim_desc,
+                                                                  srcMemPtr->GetPrimitive().get_desc(),
+                                                                  wghMemPtr->GetPrimitive().get_desc(),
+                                                                  dstMemPtr->GetPrimitive().get_desc(),
+                                                                  getEngine());
+                return;
+            }
+        }
+    }
+    IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
+}
+
+void MKLDNNDeconvolutionNode::prepareParams() {
+    auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
+    auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr();
+    auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr();
+    if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
+        IE_THROW() << "Destination memory didn't allocate.";
+    if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
+        IE_THROW() << "Input memory didn't allocate.";
+    const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
+    if (!wghMemPtr || !wghMemPtr->GetPrimitivePtr())
+        IE_THROW() << "Weight memory didn't allocate.";
+    if (selected_pd == nullptr)
+        IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
+
+    auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
+    auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
+
+    auto initPrimitiveAttr = [&]() {
+        mkldnn::primitive_attr attr;
+        setPostOps(attr, dstMemPtr->getStaticDims());
+        return std::make_shared<mkldnn::primitive_attr>(std::move(attr));
+    };
+
+    AttrPtr pAttrLocal;
+
+    if (isDynamicNode()) {
+        if (!pAttr) {
+            pAttr = initPrimitiveAttr();
+        }
+        pAttrLocal = pAttr;
+        if (autoPad || externOutShape) {
+            initPadding(opToShapeInfer, inMemoryDesc->getShape(), externOutShape ? readOutputSpatialDims() : std::vector<int32_t>{});
+        }
+        initPaddingR(inMemoryDesc->getShape(), outMemoryDesc->getShape());
+    } else {
+        pAttrLocal = initPrimitiveAttr();
+    }
+
+    const auto in_candidate = inMemoryDesc->getDnnlDesc();
+    const auto out_candidate = outMemoryDesc->getDnnlDesc();
+
+    mkldnn::memory::desc wgh_candidate;
+    if (isInt8) {
+        if (internalBlobMemory.empty()) {
+            wgh_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
+        } else {
+            wgh_candidate = internalBlobMemory.front()->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
+        }
+    } else {
+        wgh_candidate = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
+    }
+
+    std::shared_ptr<MKLDNNDescriptor> desc;
+    if (isInt8) {
+        desc = createInt8MkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate);
+    } else {
+        desc = createDefaultMkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate,
+                                             selected_pd->getImplementationType() == MKLDNNPlugin::impl_desc_type::jit_avx512_winograd);
+    }
+
+    createDeconvPrim(desc, srcMemPtr, wghMemPtr, dstMemPtr, pAttrLocal, selected_pd->getImplementationType());
+
+    if (std::dynamic_pointer_cast<DeconvExecutorInt8>(execPtr)) {
+        primArgs = {{DNNL_ARG_SRC, srcMemPtr->GetPrimitive()},
+                    {DNNL_ARG_WEIGHTS, internalBlobMemory.front()->GetPrimitive()},
+                    {DNNL_ARG_DST, dstMemPtr->GetPrimitive()}};
+    } else {
+        primArgs = {{DNNL_ARG_DIFF_DST, srcMemPtr->GetPrimitive()},
+                    {DNNL_ARG_WEIGHTS, wghMemPtr->GetPrimitive()},
+                    {DNNL_ARG_DIFF_SRC, dstMemPtr->GetPrimitive()}};
+    }
+    MKLDNNNode::appendPostOpArgs(attr, primArgs, binaryPostOpsArgs);
+}
+
+void MKLDNNDeconvolutionNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        if (needPrepareParams())
+            prepareParams();
+        updateLastInputDims();
+    }
+}
+
+MKLDNNDeconvolutionNode::DefaultDeconvDescs MKLDNNDeconvolutionNode::createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
+                                                                                                     const mkldnn::memory::desc& wgh_candidate,
+                                                                                                     const mkldnn::memory::desc& out_candidate,
+                                                                                                     mkldnn::algorithm alg) const {
    auto convertDims = [] (const std::vector<ptrdiff_t>& orig_dims) {
        return memory::dims(orig_dims.begin(), orig_dims.end());
    };

+    std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
+    conv_desc = std::make_shared<convolution_forward::desc>(prop_kind::forward_inference, alg,
+                                                            out_candidate, wgh_candidate, in_candidate,
+                                                            convertDims(stride),
+                                                            convertDims(dilation),
+                                                            convertDims(paddingL),
+                                                            convertDims(paddingR));
+
+    std::shared_ptr<mkldnn::convolution_backward_data::desc> deconv_desc;
+    deconv_desc = std::make_shared<convolution_backward_data::desc>(alg, out_candidate, wgh_candidate,
+                                                                    in_candidate,
+                                                                    convertDims(stride),
+                                                                    convertDims(dilation),
+                                                                    convertDims(paddingL),
+                                                                    convertDims(paddingR));
+
+    auto fwd_conv_pd = std::make_shared<convolution_forward::primitive_desc>(*conv_desc, getEngine(), true);
+
+    return {deconv_desc, fwd_conv_pd};
+}
+
+MKLDNNDeconvolutionNode::Int8DeconvDesc MKLDNNDeconvolutionNode::createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
+                                                                                                   const mkldnn::memory::desc& wgh_candidate,
+                                                                                                   const mkldnn::memory::desc& out_candidate) const {
+    auto convertDims = [] (const std::vector<ptrdiff_t>& orig_dims) {
+        return memory::dims(orig_dims.begin(), orig_dims.end());
+    };
+
+    MKLDNNDeconvolutionNode::Int8DeconvDesc deconv_desc;
+    deconv_desc = std::make_shared<mkldnn::deconvolution_forward::desc>(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
+                                                                        in_candidate, wgh_candidate, out_candidate,
+                                                                        convertDims(stride), convertDims(dilation),
+                                                                        convertDims(paddingL), convertDims(paddingR));
+    return deconv_desc;
+}
+
+void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
+                                               const std::vector<MemoryDescPtr> &outputDesc) {
+    auto inDesc = inputDesc[0]->isDefined() ? inputDesc[0] : inputDesc[0]->cloneWithNewDims(inShape.getStaticDims());
+    auto dnnlInDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inDesc);
+    auto in_candidate = dnnlInDesc.getDnnlDesc();
+
+    auto outDesc = outputDesc[0];
+    if (!outDesc->isDefined()) {
+        const auto outShape = shapeInferInternal(inDesc->getShape().getStaticDims(), lastOutputSpatialDims);
+        outDesc = outDesc->cloneWithNewDims(outShape);
+    }
+    auto dnnlOutDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outDesc);
+    auto out_candidate = dnnlOutDesc.getDnnlDesc();
+
+    // grouping and autoblocking is not compatible
+    if ((withGroups && !isDW) && (dnnlInDesc.blocksExtended() || dnnlOutDesc.blocksExtended()))
+        return;
+
    if (isInt8) {
-        mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), memory::data_type::s8, memory::format_tag::any);
-        std::shared_ptr<mkldnn::deconvolution_forward::desc> deconv_desc;
-        deconv_desc.reset(new deconvolution_forward::desc(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
-                                                          in_candidate.getDnnlDesc(), wgh_candidate, out_candidate.getDnnlDesc(),
-                                                          convertDims(stride), convertDims(dilation),
-                                                          convertDims(paddingL), convertDims(paddingR)));
-        descs.emplace_back(deconv_desc);
+        mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
+        descs.emplace_back(createDescriptorInternalInt8(in_candidate, wgh_candidate, out_candidate));
    } else {
-        mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), in_candidate.getDataType(), memory::format_tag::any);
+        mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(getWeightDims()),
+                                           dnnlInDesc.getDataType(), memory::format_tag::any);
        for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) {
-            std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
-            conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg,
-                                                          out_candidate.getDnnlDesc(), wgh_candidate, in_candidate.getDnnlDesc(),
-                                                          convertDims(stride),
-                                                          convertDims(dilation),
-                                                          convertDims(paddingL),
-                                                          convertDims(paddingR)));
-
-            std::shared_ptr<mkldnn::convolution_backward_data::desc> deconv_desc;
-            deconv_desc.reset(new convolution_backward_data::desc(alg, out_candidate.getDnnlDesc(), wgh_candidate,
-                                                                  in_candidate.getDnnlDesc(),
-                                                                  convertDims(stride),
-                                                                  convertDims(dilation),
-                                                                  convertDims(paddingL),
-                                                                  convertDims(paddingR)));
-            descs_fwd.push_back(conv_desc);
-            descs_bwd.push_back(deconv_desc);
-
-            auto fwd_conv_pd = std::make_shared<convolution_forward::primitive_desc>(*conv_desc, getEngine(), true);
+            std::shared_ptr<convolution_backward_data::desc> deconv_desc;
+            std::shared_ptr<convolution_forward::primitive_desc> fwd_conv_pd;
+            std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(in_candidate, wgh_candidate, out_candidate, alg);
            if (fwd_conv_pd->get(true) == nullptr)
                continue;
-
            descs.emplace_back(deconv_desc, fwd_conv_pd);
        }
    }
@ -413,15 +725,25 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>

 std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
    if (idx == 2) {
-        return std::make_shared<CpuBlockedMemoryDesc>(getOriginalInputPrecisionAtPort(2), Shape(getInputShapeAtPort(2).getStaticDims()));
+        return std::make_shared<CpuBlockedMemoryDesc>(InferenceEngine::Precision::I32, Shape(getInputShapeAtPort(2).getStaticDims()));
+    } else if (idx > 0 && isInt8) {
+        // we need to store 'weight' input as edge,
+        // because at this moment we can't simple replace internal blob with input, since we need to save weight data as is, but with different order
+        return std::make_shared<CpuBlockedMemoryDesc>(getOriginalInputPrecisionAtPort(idx), Shape(getInputShapeAtPort(idx).getStaticDims()));
    }

    auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : isInt8 ? primitive_desc_it.src_desc(idx) : primitive_desc_it.diff_dst_desc(idx);
+    if (getInputShapeAtPort(idx).isDynamic()) {
+        return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx));
+    }
    return MKLDNNExtensionUtils::makeDescriptor(desc);
 }

 std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
    auto desc =  isInt8 ? primitive_desc_it.dst_desc(idx) : primitive_desc_it.diff_src_desc(idx);
+    if (getOutputShapeAtPort(idx).isDynamic()) {
+        return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx));
+    }
    return MKLDNNExtensionUtils::makeDescriptor(desc);
 }

@ -439,4 +761,61 @@ InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const
    return getMaxPrecision(inputPrecisions);
 }

+MKLDNNDeconvolutionNode::DeconvExecutorDefault::DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
+                                                                      const mkldnn::memory::desc& inMemDesc,
+                                                                      const mkldnn::memory::desc& weightMemDesc,
+                                                                      const mkldnn::memory::desc& outMemDesc,
+                                                                      const mkldnn::engine& engine) {
+    execPrim.reset(new mkldnn::convolution_backward_data(pd));
+
+    if (inMemDesc != pd.diff_dst_desc()) {
+        inputReorders.insert({DNNL_ARG_DIFF_DST, IntermReorder(inMemDesc, pd.diff_dst_desc(), engine)});
+    }
+
+    if (weightMemDesc != pd.weights_desc()) {
+        inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)});
+    }
+
+    if (outMemDesc != pd.diff_src_desc()) {
+        outputReorders.insert({DNNL_ARG_DIFF_SRC, IntermReorder(pd.diff_src_desc(), outMemDesc, engine)});
+    }
+}
+
+MKLDNNDeconvolutionNode::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
+                                                                const mkldnn::memory::desc& inMemDesc,
+                                                                const mkldnn::memory::desc& weightMemDesc,
+                                                                const mkldnn::memory::desc& outMemDesc,
+                                                                const mkldnn::engine& engine) {
+    execPrim.reset(new mkldnn::deconvolution_forward(pd));
+
+    if (inMemDesc != pd.src_desc()) {
+        inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)});
+    }
+
+    if (weightMemDesc != pd.weights_desc()) {
+        inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)});
+    }
+
+    if (outMemDesc != pd.dst_desc()) {
+        outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)});
+    }
+}
+
+std::vector<int32_t> MKLDNNDeconvolutionNode::readOutputSpatialDims() const {
+    if (getParentEdges().size() < 3) {
+        IE_THROW() << "Can't get output spatial dims. Inputs number = " << getParentEdges().size();
+    }
+    const auto &shapeMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr();
+    if (!shapeMemPtr || !shapeMemPtr->GetPrimitivePtr()) {
+        IE_THROW() << "'output_shape' input memory is not allocated.";
+    }
+    const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2;
+    if (shapeMemPtr->getStaticDims()[0] != spDimsNum) {
+        IE_THROW() << "Can't read output spatial dims, beause 'output_shape' input has incorrect number of elements";
+    }
+    const int32_t *outShapePtr = reinterpret_cast<const int32_t *>(shapeMemPtr->GetPtr());
+    std::vector<int32_t> outSpDims(outShapePtr, outShapePtr + shapeMemPtr->getStaticDims()[0]);
+    return outSpDims;
+}
+
 REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
@ -9,10 +9,15 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include "common/dnnl_executor.h"

 namespace MKLDNNPlugin {

 class MKLDNNDeconvolutionNode : public MKLDNNNode {
+    using DefaultDeconvDescs = std::pair<std::shared_ptr<mkldnn::convolution_backward_data::desc>,
+                                         std::shared_ptr<mkldnn::convolution_forward::primitive_desc>>;
+    using Int8DeconvDesc = std::shared_ptr<mkldnn::deconvolution_forward::desc>;
+
 public:
    MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);

@ -39,27 +44,88 @@ public:
    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
    bool canFuse(const MKLDNNNodePtr& node) const override;

-    const InferenceEngine::SizeVector& getWeightDims() { return weightDims; }
-    const std::vector<ptrdiff_t>& getStride() { return stride; }
+    const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); }
+    const std::vector<ptrdiff_t>& getStride() const { return stride; }
+
+    void prepareParams() override;
+    void execute(mkldnn::stream strm) override;
+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+    bool needShapeInfer() const override;
+    std::vector<VectorDims> shapeInfer() const override;

 private:
+    using executorPtr = std::shared_ptr<DnnlExecutor>;
+    executorPtr execPtr = nullptr;
+
+    class DeconvExecutorDefault : public DnnlExecutor {
+        public:
+            DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
+                                  const mkldnn::memory::desc& inMemDesc,
+                                  const mkldnn::memory::desc& weightMemDesc,
+                                  const mkldnn::memory::desc& outMemDesc,
+                                  const mkldnn::engine& engine);
+    };
+
+    class DeconvExecutorInt8 : public DnnlExecutor {
+        public:
+            DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
+                               const mkldnn::memory::desc& inMemDesc,
+                               const mkldnn::memory::desc& weightMemDesc,
+                               const mkldnn::memory::desc& outMemDesc,
+                               const mkldnn::engine& engine);
+    };
+
    bool withGroups = false;
    bool isDW = false;
    bool isInt8 = false;
+    bool autoPad = false;
+    bool externOutShape = false;
    size_t groupNum = 1;
    size_t IC;
    size_t OC;
    std::vector<ptrdiff_t> kernel;
    std::vector<ptrdiff_t> stride;
    std::vector<ptrdiff_t> dilation;
-    std::vector<ptrdiff_t> paddingL;
-    std::vector<ptrdiff_t> paddingR;
-    InferenceEngine::SizeVector weightDims;
-    std::vector<std::shared_ptr<mkldnn::convolution_forward::desc>> descs_fwd;
-    std::vector<std::shared_ptr<mkldnn::convolution_backward_data::desc>> descs_bwd;
+    ov::CoordinateDiff paddingL;
+    ov::CoordinateDiff paddingR;
+    ov::CoordinateDiff outputPadding;
+    std::vector<int32_t> lastOutputSpatialDims;
+    VectorDims int8WeightDims;
+
+    Shape inShape;
+
+    AttrPtr pAttr;

    mkldnn::primitive_attr attr;
-    void setPostOps(mkldnn::primitive_attr &attr);
+    void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims);
+
+    VectorDims shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const;
+    void initPadding(std::shared_ptr<ngraph::Node> op, const Shape &inShape, const std::vector<int32_t>& outSpDims);
+    void initPaddingR(const Shape &inShape, const Shape &outShape);
+    std::vector<int32_t> readOutputSpatialDims() const;
+    std::pair<VectorDims, VectorDims> makeDummyInOutShape();
+
+    DefaultDeconvDescs createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
+                                                       const mkldnn::memory::desc& wgh_candidate,
+                                                       const mkldnn::memory::desc& out_candidate,
+                                                       mkldnn::algorithm alg) const;
+    Int8DeconvDesc createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
+                                                const mkldnn::memory::desc& wgh_candidate,
+                                                const mkldnn::memory::desc& out_candidate) const;
+    std::shared_ptr<MKLDNNDescriptor> createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
+                                                                    const mkldnn::memory::desc& wghDesc,
+                                                                    const mkldnn::memory::desc& dstDesc,
+                                                                    bool isWinograd) const;
+    std::shared_ptr<MKLDNNDescriptor> createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
+                                                                 const mkldnn::memory::desc& wghDesc,
+                                                                 const mkldnn::memory::desc& dstDesc) const;
+
+    void createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
+                          MKLDNNMemoryPtr srcMemPtr,
+                          MKLDNNMemoryPtr wghMemPtr,
+                          MKLDNNMemoryPtr dstMemPtr,
+                          AttrPtr attr,
+                          impl_desc_type selectedImpl);

    std::string errorPrefix;

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp
@ -49,15 +49,7 @@ bool MKLDNNDetectionOutputNode::isSupportedOperation(const std::shared_ptr<const
    return true;
 }

-void MKLDNNDetectionOutputNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        if (needPrepareParams())
-            prepareParams();
-        updateLastInputDims();
-    }
-}
-
-MKLDNNDetectionOutputNode::MKLDNNDetectionOutputNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng,
+MKLDNNDetectionOutputNode::MKLDNNDetectionOutputNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng,
        MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) {
    std::string errorMessage;
    if (!isSupportedOperation(op, errorMessage)) {
@ -170,6 +162,10 @@ struct ConfidenceComparatorDO {
    const float* confData;
 };

+void MKLDNNDetectionOutputNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
+}
+
 void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) {
    float *dstData = reinterpret_cast<float *>(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr());

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.h
@ -16,7 +16,6 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

@ -24,7 +23,7 @@ public:

 protected:
    void prepareParams() override;
-    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+    void executeDynamicImpl(mkldnn::stream strm) override;

 private:
    static const int ID_LOC = 0;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
@ -7,6 +7,7 @@
 #include <ie_parallel.hpp>

 #include <mkldnn_types.h>
+#include "cpu_types.h"
 #include "utils/bfloat16.hpp"
 #include <cpu/x64/injectors/jit_uni_quantization_injector.hpp>
 #include <cpu/ref_eltwise.hpp>
@ -31,6 +32,7 @@
 #include "ngraph_transformations/op/leaky_relu.hpp"
 #include "ngraph_transformations/op/swish_cpu.hpp"

+#include <oneapi/dnnl/dnnl.hpp>
 #include <string>
 #include <vector>
 #include <memory>
@ -791,18 +793,41 @@ private:
    }
 };

+MKLDNNEltwiseNode::BroadcastingPolicy MKLDNNEltwiseNode::determineBroadcastingPolicy(const std::shared_ptr<ngraph::Node>& op) {
+    const auto const1 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(0));
+    const auto const2 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1));
+    int constPort = -1;
+    if (const2) {
+        constPort = 1;
+    } else if (const1) {
+        constPort = 0;
+    } else {
+        return Undefined;
+    }
+
+    auto const_shape = op->get_input_shape(constPort);
+    if (ngraph::shape_size(const_shape) == 1)
+        return PerTensor;
+    else
+        return PerChannel;
+}
+
 const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> MKLDNNEltwiseNode::initializers = {
    {ngraph::op::v1::Add::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
        node.algorithm = EltwiseAdd;
+        node.broadcastingPolicy = determineBroadcastingPolicy(op);
    }},
    {ngraph::op::v1::Subtract::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
        node.algorithm = EltwiseSubtract;
+        node.broadcastingPolicy = determineBroadcastingPolicy(op);
    }},
    {ngraph::op::v1::Multiply::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
        node.algorithm = EltwiseMultiply;
+        node.broadcastingPolicy = determineBroadcastingPolicy(op);
    }},
    {ngraph::op::v1::Divide::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
        node.algorithm = EltwiseDivide;
+        node.broadcastingPolicy = determineBroadcastingPolicy(op);
    }},
    {ngraph::op::v0::SquaredDifference::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
        node.algorithm = EltwiseSquaredDifference;
@ -828,6 +853,7 @@ const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> M
        node.alpha = powerStatic->get_power();
        node.beta = powerStatic->get_scale();
        node.gamma = powerStatic->get_shift();
+        node.broadcastingPolicy = PerTensor;
    }},
    {ngraph::op::v1::Equal::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
        node.algorithm = EltwiseEqual;
@ -954,6 +980,7 @@ const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> M
    }},
    {ngraph::op::v0::PRelu::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
        node.algorithm = EltwisePrelu;
+        node.broadcastingPolicy = determineBroadcastingPolicy(op);
    }},
    {ngraph::op::v0::Erf::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
        node.algorithm = EltwiseErf;
@ -984,7 +1011,7 @@ bool MKLDNNEltwiseNode::isSupportedOperation(const std::shared_ptr<const ngraph:
 }

 MKLDNNEltwiseNode::MKLDNNEltwiseNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(op, eng, cache) {
+    MKLDNNNode(op, eng, cache), broadcastingPolicy(Undefined) {
    std::string errorMessage;
    if (!isSupportedOperation(op, errorMessage)) {
        IE_THROW(NotImplemented) << errorMessage;
@ -1498,14 +1525,6 @@ void MKLDNNEltwiseNode::selectOptimalPrimitiveDescriptor() {
    selectPreferPrimitiveDescriptor(getPrimitivesPriority(), true);
 }

-void MKLDNNEltwiseNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        if (needPrepareParams())
-            prepareParams();
-        updateLastInputDims();
-    }
-}
-
 void MKLDNNEltwiseNode::initOptimalPrimitiveDescriptor() {
    auto selected_pd = getSelectedPrimitiveDescriptor();
    if (selected_pd == nullptr)
@ -1656,6 +1675,10 @@ void MKLDNNEltwiseNode::executeReference(const jit_eltwise_params &jep, const ji
    });
 }

+void MKLDNNEltwiseNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
+}
+
 void MKLDNNEltwiseNode::execute(mkldnn::stream strm) {
    if (execPtr) {
        jit_eltwise_call_args_ptrs args_ptrs = {};
@ -1713,106 +1736,124 @@ void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
            getInputShapeAtPort(0) == getInputShapeAtPort(1);
    if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
        std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
+        if ((parentNode->getType() == FullyConnected || parentNode->getType() == MatMul) && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract,
+                EltwiseMultiply, EltwiseDivide, EltwiseMulAdd, EltwisePowerStatic, EltwisePrelu)) {
+            std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
+        }
    }
    MKLDNNNode::fuseInto(parentNode);
 }

-void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
+void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
    const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' ";

    if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
        switch (getMKLDNNAlgorithm()) {
-            case mkldnn::algorithm::eltwise_relu:
-            case mkldnn::algorithm::eltwise_tanh:
-            case mkldnn::algorithm::eltwise_elu:
-            case mkldnn::algorithm::eltwise_square:
-            case mkldnn::algorithm::eltwise_abs:
-            case mkldnn::algorithm::eltwise_sqrt:
-            case mkldnn::algorithm::eltwise_linear:
-            case mkldnn::algorithm::eltwise_bounded_relu:
-            case mkldnn::algorithm::eltwise_soft_relu:
-            case mkldnn::algorithm::eltwise_logistic:
-            case mkldnn::algorithm::eltwise_exp:
-            case mkldnn::algorithm::eltwise_gelu_erf:
-            case mkldnn::algorithm::eltwise_gelu_tanh:
-            case mkldnn::algorithm::eltwise_clip:
-            case mkldnn::algorithm::eltwise_swish:
-            case mkldnn::algorithm::eltwise_hardswish:
-            case mkldnn::algorithm::eltwise_mish:
-            case mkldnn::algorithm::eltwise_hsigmoid:
-            case mkldnn::algorithm::eltwise_round_half_to_even:
-            case mkldnn::algorithm::eltwise_round_half_away_from_zero:
-                ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta());
-                break;
-            default: IE_THROW() << errorPrefix << "as post operation is not supported";
+        case mkldnn::algorithm::eltwise_relu:
+        case mkldnn::algorithm::eltwise_tanh:
+        case mkldnn::algorithm::eltwise_elu:
+        case mkldnn::algorithm::eltwise_square:
+        case mkldnn::algorithm::eltwise_abs:
+        case mkldnn::algorithm::eltwise_sqrt:
+        case mkldnn::algorithm::eltwise_linear:
+        case mkldnn::algorithm::eltwise_bounded_relu:
+        case mkldnn::algorithm::eltwise_soft_relu:
+        case mkldnn::algorithm::eltwise_logistic:
+        case mkldnn::algorithm::eltwise_exp:
+        case mkldnn::algorithm::eltwise_gelu_erf:
+        case mkldnn::algorithm::eltwise_gelu_tanh:
+        case mkldnn::algorithm::eltwise_clip:
+        case mkldnn::algorithm::eltwise_swish:
+        case mkldnn::algorithm::eltwise_hardswish:
+        case mkldnn::algorithm::eltwise_mish:
+        case mkldnn::algorithm::eltwise_hsigmoid:
+        case mkldnn::algorithm::eltwise_round_half_to_even:
+        case mkldnn::algorithm::eltwise_round_half_away_from_zero:
+            ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta());
+            break;
+        default: IE_THROW() << errorPrefix << "as post operation is not supported";
        }
    } else {
-        const size_t chIdx = postOpDims.size() > 1 ? 1 : 0;
+        const size_t chIdx = postOpDims.size() > 1 ? getFusingAxis() : 0;
        scalesBuffer = makeAlignedBuffer(postOpDims[chIdx], scales, align);
        if (getAlgorithm() != EltwisePrelu) {
            shiftsBuffer = makeAlignedBuffer(postOpDims[chIdx], shifts, align);
        }

-        if (initAsBinary) {
-            auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector<float> &data) {
-                if (data.empty())
-                    IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
-
-                std::vector<size_t> binaryDims(postOpDims.size(), 1);
-                binaryDims[chIdx] = postOpDims[chIdx];
-
-                DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryDims));
-                ops.append_binary(alg, memoryDesc.getDnnlDesc());
-
-                if (initBinaryMemory) {
-                    memPtr.reset(new MKLDNNMemory(getEngine()));
-                    memPtr->Create(memoryDesc, &data[0]);
-                }
-            };
-            switch (getAlgorithm()) {
-                case EltwiseAdd:
-                case EltwiseSubtract:
-                    appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
-                    break;
-                case EltwiseMultiply:
-                case EltwiseDivide:
-                    appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
-                    break;
-                case EltwiseMulAdd:
-                case EltwisePowerStatic:
-                    appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
-                    appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
-                    break;
-                case EltwisePrelu:
-                    appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scalesBuffer);
-                    break;
-                default:
-                    IE_THROW() << errorPrefix << "as post operation is not supported";
-            }
-        } else {
-            switch (getAlgorithm()) {
-                case EltwiseAdd:
-                case EltwiseSubtract:
-                case EltwiseMultiply:
-                case EltwiseDivide:
-                case EltwiseMulAdd:
-                case EltwisePowerStatic:
-                    if (scalesBuffer.empty() || shiftsBuffer.empty())
-                        IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
-                    ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]);
-                    break;
-                case EltwisePrelu:
-                    if (scalesBuffer.empty())
-                        IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
-                    ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr);
-                    break;
-                default:
-                    IE_THROW() << errorPrefix << "as post operation is not supported";
-            }
+        /* @todo legacy depthwise post ops are kept for now
+         * for performance reasons
+         */
+        switch (getAlgorithm()) {
+        case EltwiseAdd:
+        case EltwiseSubtract:
+        case EltwiseMultiply:
+        case EltwiseDivide:
+        case EltwiseMulAdd:
+        case EltwisePowerStatic:
+            if (scales.empty() || shifts.empty())
+                IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
+            ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]);
+            break;
+        case EltwisePrelu:
+            if (scales.empty())
+                IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
+            ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr);
+            break;
+        default:
+            IE_THROW() << errorPrefix << "as post operation is not supported";
        }
    }
 }

+void MKLDNNEltwiseNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) {
+    const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' as binary post op ";
+    VectorDims broadcastBinaryShape(postOpDims.size(), 1);
+
+    auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector<float> &data) {
+        if (data.empty())
+            IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
+        if (broadcastingPolicy == Undefined)
+            IE_THROW() << errorPrefix << "cannot be performed since policy is Undefined";
+
+        DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, broadcastingPolicy == PerTensor ? Shape(broadcastBinaryShape) : Shape(postOpDims));
+
+        ops.append_binary(alg, memoryDesc.getDnnlDesc());
+
+        if (!memPtr) {
+            memPtr.reset(new MKLDNNMemory(getEngine()));
+            memPtr->Create(memoryDesc, &data[0]);
+
+            binaryPostOpsMem.push_back(memPtr);
+        }
+    };
+
+    switch (getAlgorithm()) {
+    case EltwiseAdd:
+    case EltwiseSubtract:
+        appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
+        break;
+    case EltwiseDivide:
+    case EltwiseMultiply:
+        appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
+        break;
+    case EltwiseMulAdd:
+        appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
+        appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
+        break;
+    case EltwisePowerStatic:
+        if (beta != 1.0f) // Multiply if has scales
+            appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
+        if (gamma != 0.0f) // Add only if has shifts
+            appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
+        break;
+    case EltwisePrelu:
+        appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scales);
+        break;
+    default:
+        IE_THROW() << errorPrefix << "as post operation is not supported";
+    }
+}
+
 bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const {
    auto isSuitableNode = [this](const MKLDNNEltwiseNode* node) {
        // [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
@ -75,7 +75,8 @@ public:
    bool created() const override;
    bool canBeInPlace() const override;
    bool canFuse(const MKLDNNNodePtr& node) const override;
-    void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false) override;
+    void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1) override;
+    void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) override;
    void fuseInto(MKLDNNNodePtr& parentNode) override;
    InferenceEngine::Precision getRuntimePrecision() const override;

@ -89,16 +90,23 @@ public:
    bool isWithBroadcast();
    bool isSpecialConvolutionAddFusing() const { return specialConvolutionAddFusing; }

-    void createPrimitive() override;
-
    std::vector<VectorDims> shapeInfer() const override;
    bool needPrepareParams() const override;
    void prepareParams() override;

-    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+    void executeDynamicImpl(mkldnn::stream strm) override;
+
+    enum BroadcastingPolicy {
+        PerChannel,
+        PerTensor,
+        Undefined,
+    };
+
+    BroadcastingPolicy getBroadcastingPolicy() const { return broadcastingPolicy; }

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

+
 private:
    struct EltwiseExecutor {
        EltwiseExecutor(size_t batch) : batchDimIdx(batch) {}
@ -130,6 +138,8 @@ private:
        size_t fullWorkAmount = 0;
    };

+    BroadcastingPolicy broadcastingPolicy;
+
    mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef;

    static const int optimalTensorRank = 6;
@ -157,6 +167,8 @@ private:
    using Initializer = std::function<void(const std::shared_ptr<ngraph::Node>&, MKLDNNEltwiseNode& node)>;
    static const std::map<const ngraph::DiscreteTypeInfo, Initializer> initializers;

+    static BroadcastingPolicy determineBroadcastingPolicy(const std::shared_ptr<ngraph::Node>& op);
+
    void executeOptimized6D(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
                            const VectorDims &dims_out) const;
    void executeOptimizedGeneric(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp
@ -70,14 +70,6 @@ void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() {
    addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
 }

-void MKLDNNEmbeddingBagOffsetSumNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        if (needPrepareParams())
-            prepareParams();
-        updateLastInputDims();
-    }
-}
-
 void MKLDNNEmbeddingBagOffsetSumNode::prepareParams() {
    _indicesLen = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[0];
    _offsetsLen = getParentEdgesAtPort(OFFSETS_IDX)[0]->getMemory().getStaticDims()[0];
@ -126,6 +118,14 @@ void MKLDNNEmbeddingBagOffsetSumNode::getIndices(int embIndex, const int*& indic
        weightsIdx = offsetsData_[embIndex];
 }

+void MKLDNNEmbeddingBagOffsetSumNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
+}
+
+bool MKLDNNEmbeddingBagOffsetSumNode::isExecutable() const {
+    return !isInputTensorAtPortEmpty(0);
+}
+
 void MKLDNNEmbeddingBagOffsetSumNode::execute(mkldnn::stream strm) {
    const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
    auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h
@ -19,15 +19,15 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

+    bool isExecutable() const override;
    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

 protected:
    void prepareParams() override;
-    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+    void executeDynamicImpl(mkldnn::stream strm) override;

 private:
    void initFromInputs() override;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp
@ -64,14 +64,6 @@ void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() {
    addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any);
 }

-void MKLDNNEmbeddingBagPackedSumNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        if (needPrepareParams())
-            prepareParams();
-        updateLastInputDims();
-    }
-}
-
 void MKLDNNEmbeddingBagPackedSumNode::prepareParams() {
    _batch = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[0];
    _indicesPerBag = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[1];
@ -94,6 +86,14 @@ void MKLDNNEmbeddingBagPackedSumNode::getIndices(int embIndex, const int*& indic
    weightsIdx = embIndex * _indicesPerBag;
 }

+void MKLDNNEmbeddingBagPackedSumNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
+}
+
+bool MKLDNNEmbeddingBagPackedSumNode::isExecutable() const {
+    return !isInputTensorAtPortEmpty(0);
+}
+
 void MKLDNNEmbeddingBagPackedSumNode::execute(mkldnn::stream strm) {
    const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
    auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h
@ -19,15 +19,15 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

+    bool isExecutable() const override;
    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

 protected:
    void prepareParams() override;
-    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+    void executeDynamicImpl(mkldnn::stream strm) override;

 private:
    void initFromInputs() override;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp
@ -11,14 +11,6 @@
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;

-void MKLDNNEmbeddingSegmentsSumNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        if (needPrepareParams())
-            prepareParams();
-        updateLastInputDims();
-    }
-}
-
 bool MKLDNNEmbeddingSegmentsSumNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
        const auto embBagSegSumOp = ngraph::as_type_ptr<const ngraph::op::v3::EmbeddingSegmentsSum>(op);
@ -129,6 +121,14 @@ void MKLDNNEmbeddingSegmentsSumNode::getIndices(int embIndex, const int*& indice
    }
 }

+void MKLDNNEmbeddingSegmentsSumNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
+}
+
+bool MKLDNNEmbeddingSegmentsSumNode::isExecutable() const {
+    return !isInputTensorAtPortEmpty(0);
+}
+
 void MKLDNNEmbeddingSegmentsSumNode::execute(mkldnn::stream strm) {
    const auto *srcData = reinterpret_cast<const uint8_t *>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
    auto *dstData = reinterpret_cast<uint8_t *>(getChildEdgeAt(0)->getMemoryPtr()->GetPtr());
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h
@ -19,15 +19,15 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

+    bool isExecutable() const override;
    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

 protected:
    void prepareParams() override;
-    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
+    void executeDynamicImpl(mkldnn::stream strm) override;

 private:
    void initFromInputs() override;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp
@ -272,12 +272,6 @@ void MKLDNNExperimentalDetectronDetectionOutputNode::initSupportedPrimitiveDescr
                         impl_desc_type::ref_any);
 }

-void MKLDNNExperimentalDetectronDetectionOutputNode::createPrimitive() {
-    if (inputShapesDefined()) {
-        updateLastInputDims();
-    }
-}
-
 void MKLDNNExperimentalDetectronDetectionOutputNode::execute(mkldnn::stream strm) {
    const int rois_num = getParentEdgeAt(INPUT_ROIS)->getMemory().getStaticDims()[0];
    assert(classes_num_ == static_cast<int>(getParentEdgeAt(INPUT_SCORES)->getMemory().getStaticDims()[1]));
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.h
@ -15,7 +15,6 @@ public:

    void getSupportedDescriptors() override {};
    void initSupportedPrimitiveDescriptors() override;
-    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
    bool created() const override;

--- a/Show More
+++ b/Show More